From 749cf76c5a363e1383108a914ea09530bfa0bd43 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Sun, 20 Jan 2013 18:28:06 -0500
Subject: KVM: ARM: Initial skeleton to compile KVM support

Targets KVM support for Cortex A-15 processors.

Contains all the framework components, make files, header files, some
tracing functionality, and basic user space API.

Only supported core is Cortex-A15 for now.

Most functionality is in arch/arm/kvm/* or arch/arm/include/asm/kvm_*.h.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/kvm/arm.c | 350 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 350 insertions(+)
 create mode 100644 arch/arm/kvm/arm.c

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
new file mode 100644
index 000000000000..d3506b4001aa
--- /dev/null
+++ b/arch/arm/kvm/arm.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <trace/events/kvm.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#include <asm/unified.h>
+#include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/mman.h>
+#include <asm/cputype.h>
+
+#ifdef REQUIRES_VIRT
+__asm__(".arch_extension	virt");
+#endif
+
+int kvm_arch_hardware_enable(void *garbage)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+	*(int *)rtn = 0;
+}
+
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+	if (type)
+		return -EINVAL;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+	return VM_FAULT_SIGBUS;
+}
+
+void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+			   struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+{
+	return 0;
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm_arch_vcpu_free(kvm->vcpus[i]);
+			kvm->vcpus[i] = NULL;
+		}
+	}
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+	int r;
+	switch (ext) {
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_SYNC_MMU:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+	case KVM_CAP_ONE_REG:
+		r = 1;
+		break;
+	case KVM_CAP_COALESCED_MMIO:
+		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+		break;
+	case KVM_CAP_NR_VCPUS:
+		r = num_online_cpus();
+		break;
+	case KVM_CAP_MAX_VCPUS:
+		r = KVM_MAX_VCPUS;
+		break;
+	default:
+		r = 0;
+		break;
+	}
+	return r;
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+			       struct kvm_userspace_memory_region *mem,
+			       struct kvm_memory_slot old,
+			       int user_alloc)
+{
+	return 0;
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_memory_slot old,
+				   struct kvm_userspace_memory_region *mem,
+				   int user_alloc)
+{
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				   struct kvm_userspace_memory_region *mem,
+				   struct kvm_memory_slot old,
+				   int user_alloc)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	int err;
+	struct kvm_vcpu *vcpu;
+
+	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
+	return vcpu;
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+	return ERR_PTR(err);
+}
+
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_arch_vcpu_free(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int __attribute_const__ kvm_target_cpu(void)
+{
+	unsigned long implementor = read_cpuid_implementor();
+	unsigned long part_number = read_cpuid_part_number();
+
+	if (implementor != ARM_CPU_IMP_ARM)
+		return -EINVAL;
+
+	switch (part_number) {
+	case ARM_CPU_PART_CORTEX_A15:
+		return KVM_ARM_TARGET_CORTEX_A15;
+	default:
+		return -EINVAL;
+	}
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
+{
+	return -EINVAL;
+}
+
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return -EINVAL;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+
+	switch (ioctl) {
+	case KVM_ARM_VCPU_INIT: {
+		struct kvm_vcpu_init init;
+
+		if (copy_from_user(&init, argp, sizeof(init)))
+			return -EFAULT;
+
+		return kvm_vcpu_set_target(vcpu, &init);
+
+	}
+	case KVM_SET_ONE_REG:
+	case KVM_GET_ONE_REG: {
+		struct kvm_one_reg reg;
+		if (copy_from_user(&reg, argp, sizeof(reg)))
+			return -EFAULT;
+		if (ioctl == KVM_SET_ONE_REG)
+			return kvm_arm_set_reg(vcpu, &reg);
+		else
+			return kvm_arm_get_reg(vcpu, &reg);
+	}
+	case KVM_GET_REG_LIST: {
+		struct kvm_reg_list __user *user_list = argp;
+		struct kvm_reg_list reg_list;
+		unsigned n;
+
+		if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
+			return -EFAULT;
+		n = reg_list.n;
+		reg_list.n = kvm_arm_num_regs(vcpu);
+		if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
+			return -EFAULT;
+		if (n < reg_list.n)
+			return -E2BIG;
+		return kvm_arm_copy_reg_indices(vcpu, user_list->reg);
+	}
+	default:
+		return -EINVAL;
+	}
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	return -EINVAL;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		       unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_init(void *opaque)
+{
+	return 0;
+}
+
+/* NOP: Compiling as a module not supported */
+void kvm_arch_exit(void)
+{
+}
+
+static int arm_init(void)
+{
+	int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	return rc;
+}
+
+module_init(arm_init);
-- 
cgit v1.2.3


From 342cd0ab0e6ca3fe7c88a78890352748b8e894a9 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Sun, 20 Jan 2013 18:28:06 -0500
Subject: KVM: ARM: Hypervisor initialization

Sets up KVM code to handle all exceptions taken to Hyp mode.

When the kernel is booted in Hyp mode, calling an hvc instruction with r0
pointing to the new vectors, the HVBAR is changed to the the vector pointers.
This allows subsystems (like KVM here) to execute code in Hyp-mode with the
MMU disabled.

We initialize other Hyp-mode registers and enables the MMU for Hyp-mode from
the id-mapped hyp initialization code. Afterwards, the HVBAR is changed to
point to KVM Hyp vectors used to catch guest faults and to switch to Hyp mode
to perform a world-switch into a KVM guest.

Also provides memory mapping code to map required code pages, data structures,
and I/O regions  accessed in Hyp mode at the same virtual address as the host
kernel virtual addresses, but which conforms to the architectural requirements
for translations in Hyp mode. This interface is added in arch/arm/kvm/arm_mmu.c
and comprises:
 - create_hyp_mappings(from, to);
 - create_hyp_io_mappings(from, to, phys_addr);
 - free_hyp_pmds();

Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_arm.h              | 124 ++++++++++++++
 arch/arm/include/asm/kvm_asm.h              |  20 +++
 arch/arm/include/asm/kvm_host.h             |   1 +
 arch/arm/include/asm/kvm_mmu.h              |  29 ++++
 arch/arm/include/asm/pgtable-3level-hwdef.h |   4 +
 arch/arm/kvm/arm.c                          | 172 +++++++++++++++++++
 arch/arm/kvm/init.S                         |  95 +++++++++++
 arch/arm/kvm/interrupts.S                   |  62 +++++++
 arch/arm/kvm/mmu.c                          | 248 ++++++++++++++++++++++++++++
 9 files changed, 755 insertions(+)
 create mode 100644 arch/arm/include/asm/kvm_mmu.h

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index dc678e193417..8875b3f605a7 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -21,4 +21,128 @@
 
 #include <linux/types.h>
 
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_TGE		(1 << 27)
+#define HCR_TVM		(1 << 26)
+#define HCR_TTLB	(1 << 25)
+#define HCR_TPU		(1 << 24)
+#define HCR_TPC		(1 << 23)
+#define HCR_TSW		(1 << 22)
+#define HCR_TAC		(1 << 21)
+#define HCR_TIDCP	(1 << 20)
+#define HCR_TSC		(1 << 19)
+#define HCR_TID3	(1 << 18)
+#define HCR_TID2	(1 << 17)
+#define HCR_TID1	(1 << 16)
+#define HCR_TID0	(1 << 15)
+#define HCR_TWE		(1 << 14)
+#define HCR_TWI		(1 << 13)
+#define HCR_DC		(1 << 12)
+#define HCR_BSU		(3 << 10)
+#define HCR_BSU_IS	(1 << 10)
+#define HCR_FB		(1 << 9)
+#define HCR_VA		(1 << 8)
+#define HCR_VI		(1 << 7)
+#define HCR_VF		(1 << 6)
+#define HCR_AMO		(1 << 5)
+#define HCR_IMO		(1 << 4)
+#define HCR_FMO		(1 << 3)
+#define HCR_PTW		(1 << 2)
+#define HCR_SWIO	(1 << 1)
+#define HCR_VM		1
+
+/*
+ * The bits we set in HCR:
+ * TAC:		Trap ACTLR
+ * TSC:		Trap SMC
+ * TSW:		Trap cache operations by set/way
+ * TWI:		Trap WFI
+ * TIDCP:	Trap L2CTLR/L2ECTLR
+ * BSU_IS:	Upgrade barriers to the inner shareable domain
+ * FB:		Force broadcast of all maintainance operations
+ * AMO:		Override CPSR.A and enable signaling with VA
+ * IMO:		Override CPSR.I and enable signaling with VI
+ * FMO:		Override CPSR.F and enable signaling with VF
+ * SWIO:	Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
+			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
+			HCR_SWIO | HCR_TIDCP)
+
+/* Hyp System Control Register (HSCTLR) bits */
+#define HSCTLR_TE	(1 << 30)
+#define HSCTLR_EE	(1 << 25)
+#define HSCTLR_FI	(1 << 21)
+#define HSCTLR_WXN	(1 << 19)
+#define HSCTLR_I	(1 << 12)
+#define HSCTLR_C	(1 << 2)
+#define HSCTLR_A	(1 << 1)
+#define HSCTLR_M	1
+#define HSCTLR_MASK	(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I | \
+			 HSCTLR_WXN | HSCTLR_FI | HSCTLR_EE | HSCTLR_TE)
+
+/* TTBCR and HTCR Registers bits */
+#define TTBCR_EAE	(1 << 31)
+#define TTBCR_IMP	(1 << 30)
+#define TTBCR_SH1	(3 << 28)
+#define TTBCR_ORGN1	(3 << 26)
+#define TTBCR_IRGN1	(3 << 24)
+#define TTBCR_EPD1	(1 << 23)
+#define TTBCR_A1	(1 << 22)
+#define TTBCR_T1SZ	(3 << 16)
+#define TTBCR_SH0	(3 << 12)
+#define TTBCR_ORGN0	(3 << 10)
+#define TTBCR_IRGN0	(3 << 8)
+#define TTBCR_EPD0	(1 << 7)
+#define TTBCR_T0SZ	3
+#define HTCR_MASK	(TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
+
+/* Hyp Debug Configuration Register bits */
+#define HDCR_TDRA	(1 << 11)
+#define HDCR_TDOSA	(1 << 10)
+#define HDCR_TDA	(1 << 9)
+#define HDCR_TDE	(1 << 8)
+#define HDCR_HPME	(1 << 7)
+#define HDCR_TPM	(1 << 6)
+#define HDCR_TPMCR	(1 << 5)
+#define HDCR_HPMN_MASK	(0x1F)
+
+/*
+ * The architecture supports 40-bit IPA as input to the 2nd stage translations
+ * and PTRS_PER_S2_PGD becomes 1024, because each entry covers 1GB of address
+ * space.
+ */
+#define KVM_PHYS_SHIFT	(40)
+#define KVM_PHYS_SIZE	(1ULL << KVM_PHYS_SHIFT)
+#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1ULL)
+#define PTRS_PER_S2_PGD	(1ULL << (KVM_PHYS_SHIFT - 30))
+#define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+#define S2_PGD_SIZE	(1 << S2_PGD_ORDER)
+
+/* Virtualization Translation Control Register (VTCR) bits */
+#define VTCR_SH0	(3 << 12)
+#define VTCR_ORGN0	(3 << 10)
+#define VTCR_IRGN0	(3 << 8)
+#define VTCR_SL0	(3 << 6)
+#define VTCR_S		(1 << 4)
+#define VTCR_T0SZ	(0xf)
+#define VTCR_MASK	(VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0 | VTCR_SL0 | \
+			 VTCR_S | VTCR_T0SZ)
+#define VTCR_HTCR_SH	(VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0)
+#define VTCR_SL_L2	(0 << 6)	/* Starting-level: 2 */
+#define VTCR_SL_L1	(1 << 6)	/* Starting-level: 1 */
+#define KVM_VTCR_SL0	VTCR_SL_L1
+/* stage-2 input address range defined as 2^(32-T0SZ) */
+#define KVM_T0SZ	(32 - KVM_PHYS_SHIFT)
+#define KVM_VTCR_T0SZ	(KVM_T0SZ & VTCR_T0SZ)
+#define KVM_VTCR_S	((KVM_VTCR_T0SZ << 1) & VTCR_S)
+
+/* Virtualization Translation Table Base Register (VTTBR) bits */
+#if KVM_VTCR_SL0 == VTCR_SL_L2	/* see ARM DDI 0406C: B4-1720 */
+#define VTTBR_X		(14 - KVM_T0SZ)
+#else
+#define VTTBR_X		(5 - KVM_T0SZ)
+#endif
+
+
 #endif /* __ARM_KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index f9993e5fb695..81324e2eb3f9 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -54,5 +54,25 @@
 #define ARM_EXCEPTION_DATA_ABORT  4
 #define ARM_EXCEPTION_IRQ	  5
 #define ARM_EXCEPTION_FIQ	  6
+#define ARM_EXCEPTION_HVC	  7
+
+#ifndef __ASSEMBLY__
+struct kvm_vcpu;
+
+extern char __kvm_hyp_init[];
+extern char __kvm_hyp_init_end[];
+
+extern char __kvm_hyp_exit[];
+extern char __kvm_hyp_exit_end[];
+
+extern char __kvm_hyp_vector[];
+
+extern char __kvm_hyp_code_start[];
+extern char __kvm_hyp_code_end[];
+
+extern void __kvm_flush_vm_context(void);
+
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+#endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 0d9938a20751..067ef2898c26 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -111,4 +111,5 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+u64 kvm_call_hyp(void *hypfn, ...);
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
new file mode 100644
index 000000000000..e8679b317b0f
--- /dev/null
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_MMU_H__
+#define __ARM_KVM_MMU_H__
+
+int create_hyp_mappings(void *from, void *to);
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+void free_hyp_pmds(void);
+
+phys_addr_t kvm_mmu_get_httbr(void);
+int kvm_mmu_init(void);
+void kvm_clear_hyp_idmap(void);
+#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index a2d404ed1ade..18f5cef82ad5 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -32,6 +32,9 @@
 #define PMD_TYPE_SECT		(_AT(pmdval_t, 1) << 0)
 #define PMD_BIT4		(_AT(pmdval_t, 0))
 #define PMD_DOMAIN(x)		(_AT(pmdval_t, 0))
+#define PMD_APTABLE_SHIFT	(61)
+#define PMD_APTABLE		(_AT(pgdval_t, 3) << PGD_APTABLE_SHIFT)
+#define PMD_PXNTABLE		(_AT(pgdval_t, 1) << 59)
 
 /*
  *   - section
@@ -41,6 +44,7 @@
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_nG		(_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_PXN		(_AT(pmdval_t, 1) << 53)
 #define PMD_SECT_XN		(_AT(pmdval_t, 1) << 54)
 #define PMD_SECT_AP_WRITE	(_AT(pmdval_t, 0))
 #define PMD_SECT_AP_READ	(_AT(pmdval_t, 0))
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d3506b4001aa..2c6b780e78a7 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -34,11 +34,21 @@
 #include <asm/ptrace.h>
 #include <asm/mman.h>
 #include <asm/cputype.h>
+#include <asm/tlbflush.h>
+#include <asm/virt.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
 
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
 #endif
 
+static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+static struct vfp_hard_struct __percpu *kvm_host_vfp_state;
+static unsigned long hyp_default_vectors;
+
+
 int kvm_arch_hardware_enable(void *garbage)
 {
 	return 0;
@@ -331,9 +341,171 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
+static void cpu_init_hyp_mode(void *vector)
+{
+	unsigned long long pgd_ptr;
+	unsigned long pgd_low, pgd_high;
+	unsigned long hyp_stack_ptr;
+	unsigned long stack_page;
+	unsigned long vector_ptr;
+
+	/* Switch from the HYP stub to our own HYP init vector */
+	__hyp_set_vectors((unsigned long)vector);
+
+	pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
+	pgd_low = (pgd_ptr & ((1ULL << 32) - 1));
+	pgd_high = (pgd_ptr >> 32ULL);
+	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
+	hyp_stack_ptr = stack_page + PAGE_SIZE;
+	vector_ptr = (unsigned long)__kvm_hyp_vector;
+
+	/*
+	 * Call initialization code, and switch to the full blown
+	 * HYP code. The init code doesn't need to preserve these registers as
+	 * r1-r3 and r12 are already callee save according to the AAPCS.
+	 * Note that we slightly misuse the prototype by casing the pgd_low to
+	 * a void *.
+	 */
+	kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr);
+}
+
+/**
+ * Inits Hyp-mode on all online CPUs
+ */
+static int init_hyp_mode(void)
+{
+	phys_addr_t init_phys_addr;
+	int cpu;
+	int err = 0;
+
+	/*
+	 * Allocate Hyp PGD and setup Hyp identity mapping
+	 */
+	err = kvm_mmu_init();
+	if (err)
+		goto out_err;
+
+	/*
+	 * It is probably enough to obtain the default on one
+	 * CPU. It's unlikely to be different on the others.
+	 */
+	hyp_default_vectors = __hyp_get_vectors();
+
+	/*
+	 * Allocate stack pages for Hypervisor-mode
+	 */
+	for_each_possible_cpu(cpu) {
+		unsigned long stack_page;
+
+		stack_page = __get_free_page(GFP_KERNEL);
+		if (!stack_page) {
+			err = -ENOMEM;
+			goto out_free_stack_pages;
+		}
+
+		per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
+	}
+
+	/*
+	 * Execute the init code on each CPU.
+	 *
+	 * Note: The stack is not mapped yet, so don't do anything else than
+	 * initializing the hypervisor mode on each CPU using a local stack
+	 * space for temporary storage.
+	 */
+	init_phys_addr = virt_to_phys(__kvm_hyp_init);
+	for_each_online_cpu(cpu) {
+		smp_call_function_single(cpu, cpu_init_hyp_mode,
+					 (void *)(long)init_phys_addr, 1);
+	}
+
+	/*
+	 * Unmap the identity mapping
+	 */
+	kvm_clear_hyp_idmap();
+
+	/*
+	 * Map the Hyp-code called directly from the host
+	 */
+	err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
+	if (err) {
+		kvm_err("Cannot map world-switch code\n");
+		goto out_free_mappings;
+	}
+
+	/*
+	 * Map the Hyp stack pages
+	 */
+	for_each_possible_cpu(cpu) {
+		char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
+		err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE);
+
+		if (err) {
+			kvm_err("Cannot map hyp stack\n");
+			goto out_free_mappings;
+		}
+	}
+
+	/*
+	 * Map the host VFP structures
+	 */
+	kvm_host_vfp_state = alloc_percpu(struct vfp_hard_struct);
+	if (!kvm_host_vfp_state) {
+		err = -ENOMEM;
+		kvm_err("Cannot allocate host VFP state\n");
+		goto out_free_mappings;
+	}
+
+	for_each_possible_cpu(cpu) {
+		struct vfp_hard_struct *vfp;
+
+		vfp = per_cpu_ptr(kvm_host_vfp_state, cpu);
+		err = create_hyp_mappings(vfp, vfp + 1);
+
+		if (err) {
+			kvm_err("Cannot map host VFP state: %d\n", err);
+			goto out_free_vfp;
+		}
+	}
+
+	kvm_info("Hyp mode initialized successfully\n");
+	return 0;
+out_free_vfp:
+	free_percpu(kvm_host_vfp_state);
+out_free_mappings:
+	free_hyp_pmds();
+out_free_stack_pages:
+	for_each_possible_cpu(cpu)
+		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+out_err:
+	kvm_err("error initializing Hyp mode: %d\n", err);
+	return err;
+}
+
+/**
+ * Initialize Hyp-mode and memory mappings on all CPUs.
+ */
 int kvm_arch_init(void *opaque)
 {
+	int err;
+
+	if (!is_hyp_mode_available()) {
+		kvm_err("HYP mode not available\n");
+		return -ENODEV;
+	}
+
+	if (kvm_target_cpu() < 0) {
+		kvm_err("Target CPU not supported!\n");
+		return -ENODEV;
+	}
+
+	err = init_hyp_mode();
+	if (err)
+		goto out_err;
+
 	return 0;
+out_err:
+	return err;
 }
 
 /* NOP: Compiling as a module not supported */
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 1dc8926e26d2..9f37a79b880b 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -15,5 +15,100 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
+
+#include <linux/linkage.h>
+#include <asm/unified.h>
 #include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+/********************************************************************
+ * Hypervisor initialization
+ *   - should be called with:
+ *       r0,r1 = Hypervisor pgd pointer
+ *       r2 = top of Hyp stack (kernel VA)
+ *       r3 = pointer to hyp vectors
+ */
+
+	.text
+	.pushsection    .hyp.idmap.text,"ax"
+	.align 5
+__kvm_hyp_init:
+	.globl __kvm_hyp_init
+
+	@ Hyp-mode exception vector
+	W(b)	.
+	W(b)	.
+	W(b)	.
+	W(b)	.
+	W(b)	.
+	W(b)	__do_hyp_init
+	W(b)	.
+	W(b)	.
+
+__do_hyp_init:
+	@ Set the HTTBR to point to the hypervisor PGD pointer passed
+	mcrr	p15, 4, r0, r1, c2
+
+	@ Set the HTCR and VTCR to the same shareability and cacheability
+	@ settings as the non-secure TTBCR and with T0SZ == 0.
+	mrc	p15, 4, r0, c2, c0, 2	@ HTCR
+	ldr	r12, =HTCR_MASK
+	bic	r0, r0, r12
+	mrc	p15, 0, r1, c2, c0, 2	@ TTBCR
+	and	r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
+	orr	r0, r0, r1
+	mcr	p15, 4, r0, c2, c0, 2	@ HTCR
+
+	mrc	p15, 4, r1, c2, c1, 2	@ VTCR
+	ldr	r12, =VTCR_MASK
+	bic	r1, r1, r12
+	bic	r0, r0, #(~VTCR_HTCR_SH)	@ clear non-reusable HTCR bits
+	orr	r1, r0, r1
+	orr	r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S)
+	mcr	p15, 4, r1, c2, c1, 2	@ VTCR
+
+	@ Use the same memory attributes for hyp. accesses as the kernel
+	@ (copy MAIRx ro HMAIRx).
+	mrc	p15, 0, r0, c10, c2, 0
+	mcr	p15, 4, r0, c10, c2, 0
+	mrc	p15, 0, r0, c10, c2, 1
+	mcr	p15, 4, r0, c10, c2, 1
+
+	@ Set the HSCTLR to:
+	@  - ARM/THUMB exceptions: Kernel config (Thumb-2 kernel)
+	@  - Endianness: Kernel config
+	@  - Fast Interrupt Features: Kernel config
+	@  - Write permission implies XN: disabled
+	@  - Instruction cache: enabled
+	@  - Data/Unified cache: enabled
+	@  - Memory alignment checks: enabled
+	@  - MMU: enabled (this code must be run from an identity mapping)
+	mrc	p15, 4, r0, c1, c0, 0	@ HSCR
+	ldr	r12, =HSCTLR_MASK
+	bic	r0, r0, r12
+	mrc	p15, 0, r1, c1, c0, 0	@ SCTLR
+	ldr	r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
+	and	r1, r1, r12
+ ARM(	ldr	r12, =(HSCTLR_M | HSCTLR_A)			)
+ THUMB(	ldr	r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE)		)
+	orr	r1, r1, r12
+	orr	r0, r0, r1
+	isb
+	mcr	p15, 4, r0, c1, c0, 0	@ HSCR
+	isb
+
+	@ Set stack pointer and return to the kernel
+	mov	sp, r2
+
+	@ Set HVBAR to point to the HYP vectors
+	mcr	p15, 4, r3, c12, c0, 0	@ HVBAR
+
+	eret
+
+	.ltorg
+
+	.globl __kvm_hyp_init_end
+__kvm_hyp_init_end:
+
+	.popsection
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 1dc8926e26d2..d10a8075409a 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -15,5 +15,67 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
+
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/unified.h>
+#include <asm/page.h>
 #include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+	.text
+
+__kvm_hyp_code_start:
+	.globl __kvm_hyp_code_start
+
+/********************************************************************
+ * Flush per-VMID TLBs
+ */
+ENTRY(__kvm_flush_vm_context)
+	bx	lr
+ENDPROC(__kvm_flush_vm_context)
+
+/********************************************************************
+ *  Hypervisor world-switch code
+ */
+ENTRY(__kvm_vcpu_run)
+	bx	lr
+
+/********************************************************************
+ *  Call function in Hyp mode
+ *
+ *
+ * u64 kvm_call_hyp(void *hypfn, ...);
+ *
+ * This is not really a variadic function in the classic C-way and care must
+ * be taken when calling this to ensure parameters are passed in registers
+ * only, since the stack will change between the caller and the callee.
+ *
+ * Call the function with the first argument containing a pointer to the
+ * function you wish to call in Hyp mode, and subsequent arguments will be
+ * passed as r0, r1, and r2 (a maximum of 3 arguments in addition to the
+ * function pointer can be passed).  The function being called must be mapped
+ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
+ * passed in r0 and r1.
+ *
+ * The calling convention follows the standard AAPCS:
+ *   r0 - r3: caller save
+ *   r12:     caller save
+ *   rest:    callee save
+ */
+ENTRY(kvm_call_hyp)
+	hvc	#0
+	bx	lr
+
+/********************************************************************
+ * Hypervisor exception vector and handlers
+ */
+
+	.align 5
+__kvm_hyp_vector:
+	.globl __kvm_hyp_vector
+	nop
+
+__kvm_hyp_code_end:
+	.globl	__kvm_hyp_code_end
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 10ed4643269f..4decdb618019 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -15,3 +15,251 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
+
+#include <linux/mman.h>
+#include <linux/kvm_host.h>
+#include <linux/io.h>
+#include <asm/idmap.h>
+#include <asm/pgalloc.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/mach/map.h>
+
+extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+
+static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
+
+static void kvm_set_pte(pte_t *pte, pte_t new_pte)
+{
+	pte_val(*pte) = new_pte;
+	/*
+	 * flush_pmd_entry just takes a void pointer and cleans the necessary
+	 * cache entries, so we can reuse the function for ptes.
+	 */
+	flush_pmd_entry(pte);
+}
+
+static void free_ptes(pmd_t *pmd, unsigned long addr)
+{
+	pte_t *pte;
+	unsigned int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
+		if (!pmd_none(*pmd) && pmd_table(*pmd)) {
+			pte = pte_offset_kernel(pmd, addr);
+			pte_free_kernel(NULL, pte);
+		}
+		pmd++;
+	}
+}
+
+/**
+ * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
+ *
+ * Assumes this is a page table used strictly in Hyp-mode and therefore contains
+ * only mappings in the kernel memory area, which is above PAGE_OFFSET.
+ */
+void free_hyp_pmds(void)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long addr;
+
+	mutex_lock(&kvm_hyp_pgd_mutex);
+	for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) {
+		pgd = hyp_pgd + pgd_index(addr);
+		pud = pud_offset(pgd, addr);
+
+		if (pud_none(*pud))
+			continue;
+		BUG_ON(pud_bad(*pud));
+
+		pmd = pmd_offset(pud, addr);
+		free_ptes(pmd, addr);
+		pmd_free(NULL, pmd);
+		pud_clear(pud);
+	}
+	mutex_unlock(&kvm_hyp_pgd_mutex);
+}
+
+static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
+				    unsigned long end)
+{
+	pte_t *pte;
+	unsigned long addr;
+	struct page *page;
+
+	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
+		pte = pte_offset_kernel(pmd, addr);
+		BUG_ON(!virt_addr_valid(addr));
+		page = virt_to_page(addr);
+		kvm_set_pte(pte, mk_pte(page, PAGE_HYP));
+	}
+}
+
+static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start,
+				       unsigned long end,
+				       unsigned long *pfn_base)
+{
+	pte_t *pte;
+	unsigned long addr;
+
+	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
+		pte = pte_offset_kernel(pmd, addr);
+		BUG_ON(pfn_valid(*pfn_base));
+		kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE));
+		(*pfn_base)++;
+	}
+}
+
+static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
+				   unsigned long end, unsigned long *pfn_base)
+{
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long addr, next;
+
+	for (addr = start; addr < end; addr = next) {
+		pmd = pmd_offset(pud, addr);
+
+		BUG_ON(pmd_sect(*pmd));
+
+		if (pmd_none(*pmd)) {
+			pte = pte_alloc_one_kernel(NULL, addr);
+			if (!pte) {
+				kvm_err("Cannot allocate Hyp pte\n");
+				return -ENOMEM;
+			}
+			pmd_populate_kernel(NULL, pmd, pte);
+		}
+
+		next = pmd_addr_end(addr, end);
+
+		/*
+		 * If pfn_base is NULL, we map kernel pages into HYP with the
+		 * virtual address. Otherwise, this is considered an I/O
+		 * mapping and we map the physical region starting at
+		 * *pfn_base to [start, end[.
+		 */
+		if (!pfn_base)
+			create_hyp_pte_mappings(pmd, addr, next);
+		else
+			create_hyp_io_pte_mappings(pmd, addr, next, pfn_base);
+	}
+
+	return 0;
+}
+
+static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base)
+{
+	unsigned long start = (unsigned long)from;
+	unsigned long end = (unsigned long)to;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long addr, next;
+	int err = 0;
+
+	BUG_ON(start > end);
+	if (start < PAGE_OFFSET)
+		return -EINVAL;
+
+	mutex_lock(&kvm_hyp_pgd_mutex);
+	for (addr = start; addr < end; addr = next) {
+		pgd = hyp_pgd + pgd_index(addr);
+		pud = pud_offset(pgd, addr);
+
+		if (pud_none_or_clear_bad(pud)) {
+			pmd = pmd_alloc_one(NULL, addr);
+			if (!pmd) {
+				kvm_err("Cannot allocate Hyp pmd\n");
+				err = -ENOMEM;
+				goto out;
+			}
+			pud_populate(NULL, pud, pmd);
+		}
+
+		next = pgd_addr_end(addr, end);
+		err = create_hyp_pmd_mappings(pud, addr, next, pfn_base);
+		if (err)
+			goto out;
+	}
+out:
+	mutex_unlock(&kvm_hyp_pgd_mutex);
+	return err;
+}
+
+/**
+ * create_hyp_mappings - map a kernel virtual address range in Hyp mode
+ * @from:	The virtual kernel start address of the range
+ * @to:		The virtual kernel end address of the range (exclusive)
+ *
+ * The same virtual address as the kernel virtual address is also used in
+ * Hyp-mode mapping to the same underlying physical pages.
+ *
+ * Note: Wrapping around zero in the "to" address is not supported.
+ */
+int create_hyp_mappings(void *from, void *to)
+{
+	return __create_hyp_mappings(from, to, NULL);
+}
+
+/**
+ * create_hyp_io_mappings - map a physical IO range in Hyp mode
+ * @from:	The virtual HYP start address of the range
+ * @to:		The virtual HYP end address of the range (exclusive)
+ * @addr:	The physical start address which gets mapped
+ */
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
+{
+	unsigned long pfn = __phys_to_pfn(addr);
+	return __create_hyp_mappings(from, to, &pfn);
+}
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return -EINVAL;
+}
+
+phys_addr_t kvm_mmu_get_httbr(void)
+{
+	VM_BUG_ON(!virt_addr_valid(hyp_pgd));
+	return virt_to_phys(hyp_pgd);
+}
+
+int kvm_mmu_init(void)
+{
+	return hyp_pgd ? 0 : -ENOMEM;
+}
+
+/**
+ * kvm_clear_idmap - remove all idmaps from the hyp pgd
+ *
+ * Free the underlying pmds for all pgds in range and clear the pgds (but
+ * don't free them) afterwards.
+ */
+void kvm_clear_hyp_idmap(void)
+{
+	unsigned long addr, end;
+	unsigned long next;
+	pgd_t *pgd = hyp_pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	addr = virt_to_phys(__hyp_idmap_text_start);
+	end = virt_to_phys(__hyp_idmap_text_end);
+
+	pgd += pgd_index(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		pud = pud_offset(pgd, addr);
+		pmd = pmd_offset(pud, addr);
+
+		pud_clear(pud);
+		clean_pmd_entry(pmd);
+		pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
+	} while (pgd++, addr = next, addr < end);
+}
-- 
cgit v1.2.3


From d5d8184d35c990b1324d9b30bcd0e4e8aa08f56d Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Sun, 20 Jan 2013 18:28:07 -0500
Subject: KVM: ARM: Memory virtualization setup

This commit introduces the framework for guest memory management
through the use of 2nd stage translation. Each VM has a pointer
to a level-1 table (the pgd field in struct kvm_arch) which is
used for the 2nd stage translations. Entries are added when handling
guest faults (later patch) and the table itself can be allocated and
freed through the following functions implemented in
arch/arm/kvm/arm_mmu.c:
 - kvm_alloc_stage2_pgd(struct kvm *kvm);
 - kvm_free_stage2_pgd(struct kvm *kvm);

Each entry in TLBs and caches are tagged with a VMID identifier in
addition to ASIDs. The VMIDs are assigned consecutively to VMs in the
order that VMs are executed, and caches and tlbs are invalidated when
the VMID space has been used to allow for more than 255 simultaenously
running guests.

The 2nd stage pgd is allocated in kvm_arch_init_vm(). The table is
freed in kvm_arch_destroy_vm(). Both functions are called from the main
KVM code.

We pre-allocate page table memory to be able to synchronize using a
spinlock and be called under rcu_read_lock from the MMU notifiers.  We
steal the mmu_memory_cache implementation from x86 and adapt for our
specific usage.

We support MMU notifiers (thanks to Marc Zyngier) through
kvm_unmap_hva and kvm_set_spte_hva.

Finally, define kvm_phys_addr_ioremap() to map a device at a guest IPA,
which is used by VGIC support to map the virtual CPU interface registers
to the guest. This support is added by Marc Zyngier.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_asm.h  |   2 +
 arch/arm/include/asm/kvm_host.h |  18 ++
 arch/arm/include/asm/kvm_mmu.h  |   9 +
 arch/arm/kvm/Kconfig            |   1 +
 arch/arm/kvm/arm.c              |  37 +++-
 arch/arm/kvm/interrupts.S       |   7 +
 arch/arm/kvm/mmu.c              | 370 +++++++++++++++++++++++++++++++++++++++-
 arch/arm/kvm/trace.h            |  46 +++++
 8 files changed, 488 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 81324e2eb3f9..f6652f6c5d84 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -57,6 +57,7 @@
 #define ARM_EXCEPTION_HVC	  7
 
 #ifndef __ASSEMBLY__
+struct kvm;
 struct kvm_vcpu;
 
 extern char __kvm_hyp_init[];
@@ -71,6 +72,7 @@ extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
 extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 #endif
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 067ef2898c26..3636c7ea4eb2 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -112,4 +112,22 @@ struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 u64 kvm_call_hyp(void *hypfn, ...);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm,
+			unsigned long start, unsigned long end);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index e8679b317b0f..499e7b0925ff 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -23,6 +23,15 @@ int create_hyp_mappings(void *from, void *to);
 int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_hyp_pmds(void);
 
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+			  phys_addr_t pa, unsigned long size);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+
 phys_addr_t kvm_mmu_get_httbr(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 4a01b6fbf380..05227cb57a7b 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM_ARM_HOST
 	bool "KVM host support for ARM cpus."
 	depends on KVM
 	depends on MMU
+	select	MMU_NOTIFIER
 	---help---
 	  Provides host support for ARM processors.
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2c6b780e78a7..d810afb6cb84 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -81,12 +81,33 @@ void kvm_arch_sync_events(struct kvm *kvm)
 {
 }
 
+/**
+ * kvm_arch_init_vm - initializes a VM data structure
+ * @kvm:	pointer to the KVM struct
+ */
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
+	int ret = 0;
+
 	if (type)
 		return -EINVAL;
 
-	return 0;
+	ret = kvm_alloc_stage2_pgd(kvm);
+	if (ret)
+		goto out_fail_alloc;
+
+	ret = create_hyp_mappings(kvm, kvm + 1);
+	if (ret)
+		goto out_free_stage2_pgd;
+
+	/* Mark the initial VMID generation invalid */
+	kvm->arch.vmid_gen = 0;
+
+	return ret;
+out_free_stage2_pgd:
+	kvm_free_stage2_pgd(kvm);
+out_fail_alloc:
+	return ret;
 }
 
 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
@@ -104,10 +125,16 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+/**
+ * kvm_arch_destroy_vm - destroy the VM data structure
+ * @kvm:	pointer to the KVM struct
+ */
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	int i;
 
+	kvm_free_stage2_pgd(kvm);
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -196,7 +223,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_vcpu;
 
+	err = create_hyp_mappings(vcpu, vcpu + 1);
+	if (err)
+		goto vcpu_uninit;
+
 	return vcpu;
+vcpu_uninit:
+	kvm_vcpu_uninit(vcpu);
 free_vcpu:
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 out:
@@ -210,6 +243,8 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+	kvm_mmu_free_memory_caches(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index d10a8075409a..f701aff31e44 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -32,6 +32,13 @@ __kvm_hyp_code_start:
 /********************************************************************
  * Flush per-VMID TLBs
  */
+ENTRY(__kvm_tlb_flush_vmid)
+	bx	lr
+ENDPROC(__kvm_tlb_flush_vmid)
+
+/********************************************************************
+ * Flush TLBs and instruction caches of current CPU for all VMIDs
+ */
 ENTRY(__kvm_flush_vm_context)
 	bx	lr
 ENDPROC(__kvm_flush_vm_context)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 4decdb618019..4347d68f052f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -23,12 +23,21 @@
 #include <asm/pgalloc.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_asm.h>
 #include <asm/mach/map.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
 
 extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 
 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
 
+static void kvm_tlb_flush_vmid(struct kvm *kvm)
+{
+	kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
+}
+
 static void kvm_set_pte(pte_t *pte, pte_t new_pte)
 {
 	pte_val(*pte) = new_pte;
@@ -39,6 +48,38 @@ static void kvm_set_pte(pte_t *pte, pte_t new_pte)
 	flush_pmd_entry(pte);
 }
 
+static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
+				  int min, int max)
+{
+	void *page;
+
+	BUG_ON(max > KVM_NR_MEM_OBJS);
+	if (cache->nobjs >= min)
+		return 0;
+	while (cache->nobjs < max) {
+		page = (void *)__get_free_page(PGALLOC_GFP);
+		if (!page)
+			return -ENOMEM;
+		cache->objects[cache->nobjs++] = page;
+	}
+	return 0;
+}
+
+static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
+{
+	while (mc->nobjs)
+		free_page((unsigned long)mc->objects[--mc->nobjs]);
+}
+
+static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
+{
+	void *p;
+
+	BUG_ON(!mc || !mc->nobjs);
+	p = mc->objects[--mc->nobjs];
+	return p;
+}
+
 static void free_ptes(pmd_t *pmd, unsigned long addr)
 {
 	pte_t *pte;
@@ -217,11 +258,333 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr)
 	return __create_hyp_mappings(from, to, &pfn);
 }
 
+/**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
+ * support either full 40-bit input addresses or limited to 32-bit input
+ * addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * created, which can only be done once.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm)
+{
+	pgd_t *pgd;
+
+	if (kvm->arch.pgd != NULL) {
+		kvm_err("kvm_arch already initialized?\n");
+		return -EINVAL;
+	}
+
+	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
+	if (!pgd)
+		return -ENOMEM;
+
+	/* stage-2 pgd must be aligned to its size */
+	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
+
+	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
+	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
+	kvm->arch.pgd = pgd;
+
+	return 0;
+}
+
+static void clear_pud_entry(pud_t *pud)
+{
+	pmd_t *pmd_table = pmd_offset(pud, 0);
+	pud_clear(pud);
+	pmd_free(NULL, pmd_table);
+	put_page(virt_to_page(pud));
+}
+
+static void clear_pmd_entry(pmd_t *pmd)
+{
+	pte_t *pte_table = pte_offset_kernel(pmd, 0);
+	pmd_clear(pmd);
+	pte_free_kernel(NULL, pte_table);
+	put_page(virt_to_page(pmd));
+}
+
+static bool pmd_empty(pmd_t *pmd)
+{
+	struct page *pmd_page = virt_to_page(pmd);
+	return page_count(pmd_page) == 1;
+}
+
+static void clear_pte_entry(pte_t *pte)
+{
+	if (pte_present(*pte)) {
+		kvm_set_pte(pte, __pte(0));
+		put_page(virt_to_page(pte));
+	}
+}
+
+static bool pte_empty(pte_t *pte)
+{
+	struct page *pte_page = virt_to_page(pte);
+	return page_count(pte_page) == 1;
+}
+
+/**
+ * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * @kvm:   The VM pointer
+ * @start: The intermediate physical base address of the range to unmap
+ * @size:  The size of the area to unmap
+ *
+ * Clear a range of stage-2 mappings, lowering the various ref-counts.  Must
+ * be called while holding mmu_lock (unless for freeing the stage2 pgd before
+ * destroying the VM), otherwise another faulting VCPU may come in and mess
+ * with things behind our backs.
+ */
+static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	phys_addr_t addr = start, end = start + size;
+	u64 range;
+
+	while (addr < end) {
+		pgd = kvm->arch.pgd + pgd_index(addr);
+		pud = pud_offset(pgd, addr);
+		if (pud_none(*pud)) {
+			addr += PUD_SIZE;
+			continue;
+		}
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd)) {
+			addr += PMD_SIZE;
+			continue;
+		}
+
+		pte = pte_offset_kernel(pmd, addr);
+		clear_pte_entry(pte);
+		range = PAGE_SIZE;
+
+		/* If we emptied the pte, walk back up the ladder */
+		if (pte_empty(pte)) {
+			clear_pmd_entry(pmd);
+			range = PMD_SIZE;
+			if (pmd_empty(pmd)) {
+				clear_pud_entry(pud);
+				range = PUD_SIZE;
+			}
+		}
+
+		addr += range;
+	}
+}
+
+/**
+ * kvm_free_stage2_pgd - free all stage-2 tables
+ * @kvm:	The KVM struct pointer for the VM.
+ *
+ * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
+ * underlying level-2 and level-3 tables before freeing the actual level-1 table
+ * and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
+ */
+void kvm_free_stage2_pgd(struct kvm *kvm)
+{
+	if (kvm->arch.pgd == NULL)
+		return;
+
+	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+	free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+	kvm->arch.pgd = NULL;
+}
+
+
+static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte, old_pte;
+
+	/* Create 2nd stage page table mapping - Level 1 */
+	pgd = kvm->arch.pgd + pgd_index(addr);
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud)) {
+		if (!cache)
+			return 0; /* ignore calls from kvm_set_spte_hva */
+		pmd = mmu_memory_cache_alloc(cache);
+		pud_populate(NULL, pud, pmd);
+		pmd += pmd_index(addr);
+		get_page(virt_to_page(pud));
+	} else
+		pmd = pmd_offset(pud, addr);
+
+	/* Create 2nd stage page table mapping - Level 2 */
+	if (pmd_none(*pmd)) {
+		if (!cache)
+			return 0; /* ignore calls from kvm_set_spte_hva */
+		pte = mmu_memory_cache_alloc(cache);
+		clean_pte_table(pte);
+		pmd_populate_kernel(NULL, pmd, pte);
+		pte += pte_index(addr);
+		get_page(virt_to_page(pmd));
+	} else
+		pte = pte_offset_kernel(pmd, addr);
+
+	if (iomap && pte_present(*pte))
+		return -EFAULT;
+
+	/* Create 2nd stage page table mapping - Level 3 */
+	old_pte = *pte;
+	kvm_set_pte(pte, *new_pte);
+	if (pte_present(old_pte))
+		kvm_tlb_flush_vmid(kvm);
+	else
+		get_page(virt_to_page(pte));
+
+	return 0;
+}
+
+/**
+ * kvm_phys_addr_ioremap - map a device range to guest IPA
+ *
+ * @kvm:	The KVM pointer
+ * @guest_ipa:	The IPA at which to insert the mapping
+ * @pa:		The physical address of the device
+ * @size:	The size of the mapping
+ */
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+			  phys_addr_t pa, unsigned long size)
+{
+	phys_addr_t addr, end;
+	int ret = 0;
+	unsigned long pfn;
+	struct kvm_mmu_memory_cache cache = { 0, };
+
+	end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
+	pfn = __phys_to_pfn(pa);
+
+	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
+		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE | L_PTE_S2_RDWR);
+
+		ret = mmu_topup_memory_cache(&cache, 2, 2);
+		if (ret)
+			goto out;
+		spin_lock(&kvm->mmu_lock);
+		ret = stage2_set_pte(kvm, &cache, addr, &pte, true);
+		spin_unlock(&kvm->mmu_lock);
+		if (ret)
+			goto out;
+
+		pfn++;
+	}
+
+out:
+	mmu_free_memory_cache(&cache);
+	return ret;
+}
+
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	return -EINVAL;
 }
 
+static void handle_hva_to_gpa(struct kvm *kvm,
+			      unsigned long start,
+			      unsigned long end,
+			      void (*handler)(struct kvm *kvm,
+					      gpa_t gpa, void *data),
+			      void *data)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+
+	slots = kvm_memslots(kvm);
+
+	/* we only care about the pages that the guest sees */
+	kvm_for_each_memslot(memslot, slots) {
+		unsigned long hva_start, hva_end;
+		gfn_t gfn, gfn_end;
+
+		hva_start = max(start, memslot->userspace_addr);
+		hva_end = min(end, memslot->userspace_addr +
+					(memslot->npages << PAGE_SHIFT));
+		if (hva_start >= hva_end)
+			continue;
+
+		/*
+		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
+		 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
+		 */
+		gfn = hva_to_gfn_memslot(hva_start, memslot);
+		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+
+		for (; gfn < gfn_end; ++gfn) {
+			gpa_t gpa = gfn << PAGE_SHIFT;
+			handler(kvm, gpa, data);
+		}
+	}
+}
+
+static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+	unmap_stage2_range(kvm, gpa, PAGE_SIZE);
+	kvm_tlb_flush_vmid(kvm);
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	unsigned long end = hva + PAGE_SIZE;
+
+	if (!kvm->arch.pgd)
+		return 0;
+
+	trace_kvm_unmap_hva(hva);
+	handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
+	return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm,
+			unsigned long start, unsigned long end)
+{
+	if (!kvm->arch.pgd)
+		return 0;
+
+	trace_kvm_unmap_hva_range(start, end);
+	handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+	return 0;
+}
+
+static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+	pte_t *pte = (pte_t *)data;
+
+	stage2_set_pte(kvm, NULL, gpa, pte, false);
+}
+
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+	unsigned long end = hva + PAGE_SIZE;
+	pte_t stage2_pte;
+
+	if (!kvm->arch.pgd)
+		return;
+
+	trace_kvm_set_spte_hva(hva);
+	stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
+	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
+}
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
+{
+	mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+}
+
 phys_addr_t kvm_mmu_get_httbr(void)
 {
 	VM_BUG_ON(!virt_addr_valid(hyp_pgd));
@@ -230,7 +593,12 @@ phys_addr_t kvm_mmu_get_httbr(void)
 
 int kvm_mmu_init(void)
 {
-	return hyp_pgd ? 0 : -ENOMEM;
+	if (!hyp_pgd) {
+		kvm_err("Hyp mode PGD not allocated\n");
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 /**
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index f8869c19c0a3..862b2cc12fbe 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -39,7 +39,53 @@ TRACE_EVENT(kvm_exit,
 	TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
 );
 
+TRACE_EVENT(kvm_unmap_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
 
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
+);
+
+TRACE_EVENT(kvm_unmap_hva_range,
+	TP_PROTO(unsigned long start, unsigned long end),
+	TP_ARGS(start, end),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	start		)
+		__field(	unsigned long,	end		)
+	),
+
+	TP_fast_assign(
+		__entry->start		= start;
+		__entry->end		= end;
+	),
+
+	TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
+		  __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_set_spte_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
+);
 
 #endif /* _TRACE_KVM_H */
 
-- 
cgit v1.2.3


From 86ce85352f0da7e1431ad8efcb04323819a620e7 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@cs.columbia.edu>
Date: Sun, 20 Jan 2013 18:28:08 -0500
Subject: KVM: ARM: Inject IRQs and FIQs from userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All interrupt injection is now based on the VM ioctl KVM_IRQ_LINE.  This
works semantically well for the GIC as we in fact raise/lower a line on
a machine component (the gic).  The IOCTL uses the follwing struct.

struct kvm_irq_level {
	union {
		__u32 irq;     /* GSI */
		__s32 status;  /* not used for KVM_IRQ_LEVEL */
	};
	__u32 level;           /* 0 or 1 */
};

ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip
(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for
specific cpus.  The irq field is interpreted like this:

  bits:  | 31 ... 24 | 23  ... 16 | 15    ...    0 |
  field: | irq_type  | vcpu_index |   irq_number   |

The irq_type field has the following values:
- irq_type[0]: out-of-kernel GIC: irq_number 0 is IRQ, irq_number 1 is FIQ
- irq_type[1]: in-kernel GIC: SPI, irq_number between 32 and 1019 (incl.)
               (the vcpu_index field is ignored)
- irq_type[2]: in-kernel GIC: PPI, irq_number between 16 and 31 (incl.)

The irq_number thus corresponds to the irq ID in as in the GICv2 specs.

This is documented in Documentation/kvm/api.txt.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 Documentation/virtual/kvm/api.txt | 25 ++++++++++++---
 arch/arm/include/asm/kvm_arm.h    |  1 +
 arch/arm/include/uapi/asm/kvm.h   | 21 +++++++++++++
 arch/arm/kvm/arm.c                | 65 +++++++++++++++++++++++++++++++++++++++
 arch/arm/kvm/trace.h              | 25 +++++++++++++++
 include/uapi/linux/kvm.h          |  1 +
 6 files changed, 134 insertions(+), 4 deletions(-)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4237c27ea612..505049299298 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -615,15 +615,32 @@ created.
 4.25 KVM_IRQ_LINE
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86, ia64, arm
 Type: vm ioctl
 Parameters: struct kvm_irq_level
 Returns: 0 on success, -1 on error
 
 Sets the level of a GSI input to the interrupt controller model in the kernel.
-Requires that an interrupt controller model has been previously created with
-KVM_CREATE_IRQCHIP.  Note that edge-triggered interrupts require the level
-to be set to 1 and then back to 0.
+On some architectures it is required that an interrupt controller model has
+been previously created with KVM_CREATE_IRQCHIP.  Note that edge-triggered
+interrupts require the level to be set to 1 and then back to 0.
+
+ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip
+(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for
+specific cpus.  The irq field is interpreted like this:
+
+  bits:  | 31 ... 24 | 23  ... 16 | 15    ...    0 |
+  field: | irq_type  | vcpu_index |     irq_id     |
+
+The irq_type field has the following values:
+- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
+- irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
+               (the vcpu_index field is ignored)
+- irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
+
+(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
+
+In both cases, level is used to raise/lower the line.
 
 struct kvm_irq_level {
 	union {
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 8875b3f605a7..d64b5250ad4e 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -68,6 +68,7 @@
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
 			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
 			HCR_SWIO | HCR_TIDCP)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
 /* Hyp System Control Register (HSCTLR) bits */
 #define HSCTLR_TE	(1 << 30)
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 1083327b5fcd..53f45f146875 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -23,6 +23,7 @@
 #include <asm/ptrace.h>
 
 #define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -103,4 +104,24 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
 #define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / 4)
 
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT		24
+#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT		16
+#define KVM_ARM_IRQ_VCPU_MASK		0xff
+#define KVM_ARM_IRQ_NUM_SHIFT		0
+#define KVM_ARM_IRQ_NUM_MASK		0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU		0
+#define KVM_ARM_IRQ_TYPE_SPI		1
+#define KVM_ARM_IRQ_TYPE_PPI		2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ		0
+#define KVM_ARM_IRQ_CPU_FIQ		1
+
+/* Highest supported SPI, from VGIC_NR_IRQS */
+#define KVM_ARM_IRQ_GIC_MAX		127
+
 #endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d810afb6cb84..2101152c3a4b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -24,6 +24,7 @@
 #include <linux/fs.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
+#include <linux/kvm.h>
 #include <trace/events/kvm.h>
 
 #define CREATE_TRACE_POINTS
@@ -284,6 +285,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+	vcpu->cpu = cpu;
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -319,6 +321,69 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return -EINVAL;
 }
 
+static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
+{
+	int bit_index;
+	bool set;
+	unsigned long *ptr;
+
+	if (number == KVM_ARM_IRQ_CPU_IRQ)
+		bit_index = __ffs(HCR_VI);
+	else /* KVM_ARM_IRQ_CPU_FIQ */
+		bit_index = __ffs(HCR_VF);
+
+	ptr = (unsigned long *)&vcpu->arch.irq_lines;
+	if (level)
+		set = test_and_set_bit(bit_index, ptr);
+	else
+		set = test_and_clear_bit(bit_index, ptr);
+
+	/*
+	 * If we didn't change anything, no need to wake up or kick other CPUs
+	 */
+	if (set == level)
+		return 0;
+
+	/*
+	 * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
+	 * trigger a world-switch round on the running physical CPU to set the
+	 * virtual IRQ/FIQ fields in the HCR appropriately.
+	 */
+	kvm_vcpu_kick(vcpu);
+
+	return 0;
+}
+
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
+{
+	u32 irq = irq_level->irq;
+	unsigned int irq_type, vcpu_idx, irq_num;
+	int nrcpus = atomic_read(&kvm->online_vcpus);
+	struct kvm_vcpu *vcpu = NULL;
+	bool level = irq_level->level;
+
+	irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
+	vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
+	irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
+
+	trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
+
+	if (irq_type != KVM_ARM_IRQ_TYPE_CPU)
+		return -EINVAL;
+
+	if (vcpu_idx >= nrcpus)
+		return -EINVAL;
+
+	vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+	if (!vcpu)
+		return -EINVAL;
+
+	if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
+		return -EINVAL;
+
+	return vcpu_interrupt_line(vcpu, irq_num, level);
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 862b2cc12fbe..105d1f79909a 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -39,6 +39,31 @@ TRACE_EVENT(kvm_exit,
 	TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
 );
 
+TRACE_EVENT(kvm_irq_line,
+	TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
+	TP_ARGS(type, vcpu_idx, irq_num, level),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	type		)
+		__field(	int,		vcpu_idx	)
+		__field(	int,		irq_num		)
+		__field(	int,		level		)
+	),
+
+	TP_fast_assign(
+		__entry->type		= type;
+		__entry->vcpu_idx	= vcpu_idx;
+		__entry->irq_num	= irq_num;
+		__entry->level		= level;
+	),
+
+	TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d",
+		  (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" :
+		  (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" :
+		  (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN",
+		  __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
+);
+
 TRACE_EVENT(kvm_unmap_hva,
 	TP_PROTO(unsigned long hva),
 	TP_ARGS(hva),
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 24978d525c6e..dc63665e73ad 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -115,6 +115,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
+	 * For ARM: See Documentation/virtual/kvm/api.txt
 	 */
 	union {
 		__u32 irq;
-- 
cgit v1.2.3


From f7ed45be3ba524e06a6d933f0517dc7ad2d06703 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Sun, 20 Jan 2013 18:47:42 -0500
Subject: KVM: ARM: World-switch implementation

Provides complete world-switch implementation to switch to other guests
running in non-secure modes. Includes Hyp exception handlers that
capture necessary exception information and stores the information on
the VCPU and KVM structures.

The following Hyp-ABI is also documented in the code:

Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
   Switching to Hyp mode is done through a simple HVC #0 instruction. The
   exception vector code will check that the HVC comes from VMID==0 and if
   so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
   - r0 contains a pointer to a HYP function
   - r1, r2, and r3 contain arguments to the above function.
   - The HYP function will be called with its arguments in r0, r1 and r2.
   On HYP function return, we return directly to SVC.

A call to a function executing in Hyp mode is performed like the following:

        <svc code>
        ldr     r0, =BSYM(my_hyp_fn)
        ldr     r1, =my_param
        hvc #0  ; Call my_hyp_fn(my_param) from HYP mode
        <svc code>

Otherwise, the world-switch is pretty straight-forward. All state that
can be modified by the guest is first backed up on the Hyp stack and the
VCPU values is loaded onto the hardware. State, which is not loaded, but
theoretically modifiable by the guest is protected through the
virtualiation features to generate a trap and cause software emulation.
Upon guest returns, all state is restored from hardware onto the VCPU
struct and the original state is restored from the Hyp-stack onto the
hardware.

SMP support using the VMPIDR calculated on the basis of the host MPIDR
and overriding the low bits with KVM vcpu_id contributed by Marc Zyngier.

Reuse of VMIDs has been implemented by Antonios Motakis and adapated from
a separate patch into the appropriate patches introducing the
functionality. Note that the VMIDs are stored per VM as required by the ARM
architecture reference manual.

To support VFP/NEON we trap those instructions using the HPCTR. When
we trap, we switch the FPU.  After a guest exit, the VFP state is
returned to the host.  When disabling access to floating point
instructions, we also mask FPEXC_EN in order to avoid the guest
receiving Undefined instruction exceptions before we have a chance to
switch back the floating point state.  We are reusing vfp_hard_struct,
so we depend on VFPv3 being enabled in the host kernel, if not, we still
trap cp10 and cp11 in order to inject an undefined instruction exception
whenever the guest tries to use VFP/NEON. VFP/NEON developed by
Antionios Motakis and Rusty Russell.

Aborts that are permission faults, and not stage-1 page table walk, do
not report the faulting address in the HPFAR.  We have to resolve the
IPA, and store it just like the HPFAR register on the VCPU struct. If
the IPA cannot be resolved, it means another CPU is playing with the
page tables, and we simply restart the guest.  This quirk was fixed by
Marc Zyngier.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_arm.h  |  51 +++++
 arch/arm/include/asm/kvm_host.h |  13 ++
 arch/arm/kernel/asm-offsets.c   |  25 +++
 arch/arm/kvm/arm.c              | 200 +++++++++++++++++-
 arch/arm/kvm/interrupts.S       | 396 +++++++++++++++++++++++++++++++++++-
 arch/arm/kvm/interrupts_head.S  | 441 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 1122 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/kvm/interrupts_head.S

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index d64b5250ad4e..c69936b1fc53 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -98,6 +98,18 @@
 #define TTBCR_T0SZ	3
 #define HTCR_MASK	(TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
 
+/* Hyp System Trap Register */
+#define HSTR_T(x)	(1 << x)
+#define HSTR_TTEE	(1 << 16)
+#define HSTR_TJDBX	(1 << 17)
+
+/* Hyp Coprocessor Trap Register */
+#define HCPTR_TCP(x)	(1 << x)
+#define HCPTR_TCP_MASK	(0x3fff)
+#define HCPTR_TASE	(1 << 15)
+#define HCPTR_TTA	(1 << 20)
+#define HCPTR_TCPAC	(1 << 31)
+
 /* Hyp Debug Configuration Register bits */
 #define HDCR_TDRA	(1 << 11)
 #define HDCR_TDOSA	(1 << 10)
@@ -144,6 +156,45 @@
 #else
 #define VTTBR_X		(5 - KVM_T0SZ)
 #endif
+#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
+#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  (48LLU)
+#define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
+
+/* Hyp Syndrome Register (HSR) bits */
+#define HSR_EC_SHIFT	(26)
+#define HSR_EC		(0x3fU << HSR_EC_SHIFT)
+#define HSR_IL		(1U << 25)
+#define HSR_ISS		(HSR_IL - 1)
+#define HSR_ISV_SHIFT	(24)
+#define HSR_ISV		(1U << HSR_ISV_SHIFT)
+#define HSR_FSC		(0x3f)
+#define HSR_FSC_TYPE	(0x3c)
+#define HSR_WNR		(1 << 6)
+
+#define FSC_FAULT	(0x04)
+#define FSC_PERM	(0x0c)
+
+/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
+#define HPFAR_MASK	(~0xf)
 
+#define HSR_EC_UNKNOWN	(0x00)
+#define HSR_EC_WFI	(0x01)
+#define HSR_EC_CP15_32	(0x03)
+#define HSR_EC_CP15_64	(0x04)
+#define HSR_EC_CP14_MR	(0x05)
+#define HSR_EC_CP14_LS	(0x06)
+#define HSR_EC_CP_0_13	(0x07)
+#define HSR_EC_CP10_ID	(0x08)
+#define HSR_EC_JAZELLE	(0x09)
+#define HSR_EC_BXJ	(0x0A)
+#define HSR_EC_CP14_64	(0x0C)
+#define HSR_EC_SVC_HYP	(0x11)
+#define HSR_EC_HVC	(0x12)
+#define HSR_EC_SMC	(0x13)
+#define HSR_EC_IABT	(0x20)
+#define HSR_EC_IABT_HYP	(0x21)
+#define HSR_EC_DABT	(0x24)
+#define HSR_EC_DABT_HYP	(0x25)
 
 #endif /* __ARM_KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 3636c7ea4eb2..7a121089c733 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
 
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
+#include <asm/fpstate.h>
 
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #define KVM_MEMORY_SLOTS 32
@@ -85,6 +86,14 @@ struct kvm_vcpu_arch {
 	u32 hxfar;		/* Hyp Data/Inst Fault Address Register */
 	u32 hpfar;		/* Hyp IPA Fault Address Register */
 
+	/* Floating point registers (VFP and Advanced SIMD/NEON) */
+	struct vfp_hard_struct vfp_guest;
+	struct vfp_hard_struct *vfp_host;
+
+	/*
+	 * Anything that is not used directly from assembly code goes
+	 * here.
+	 */
 	/* Interrupt related fields */
 	u32 irq_lines;		/* IRQ and FIQ levels */
 
@@ -93,6 +102,9 @@ struct kvm_vcpu_arch {
 
 	/* Cache some mmu pages needed inside spinlock regions */
 	struct kvm_mmu_memory_cache mmu_page_cache;
+
+	/* Detect first run of a vcpu */
+	bool has_run_once;
 };
 
 struct kvm_vm_stat {
@@ -112,6 +124,7 @@ struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 u64 kvm_call_hyp(void *hypfn, ...);
+void force_vm_exit(const cpumask_t *mask);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 struct kvm;
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index c985b481192c..c8b3272dfed1 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -13,6 +13,9 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
+#ifdef CONFIG_KVM_ARM_HOST
+#include <linux/kvm_host.h>
+#endif
 #include <asm/cacheflush.h>
 #include <asm/glue-df.h>
 #include <asm/glue-pf.h>
@@ -146,5 +149,27 @@ int main(void)
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
   DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
   DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
+#ifdef CONFIG_KVM_ARM_HOST
+  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_MIDR,		offsetof(struct kvm_vcpu, arch.midr));
+  DEFINE(VCPU_CP15,		offsetof(struct kvm_vcpu, arch.cp15));
+  DEFINE(VCPU_VFP_GUEST,	offsetof(struct kvm_vcpu, arch.vfp_guest));
+  DEFINE(VCPU_VFP_HOST,		offsetof(struct kvm_vcpu, arch.vfp_host));
+  DEFINE(VCPU_REGS,		offsetof(struct kvm_vcpu, arch.regs));
+  DEFINE(VCPU_USR_REGS,		offsetof(struct kvm_vcpu, arch.regs.usr_regs));
+  DEFINE(VCPU_SVC_REGS,		offsetof(struct kvm_vcpu, arch.regs.svc_regs));
+  DEFINE(VCPU_ABT_REGS,		offsetof(struct kvm_vcpu, arch.regs.abt_regs));
+  DEFINE(VCPU_UND_REGS,		offsetof(struct kvm_vcpu, arch.regs.und_regs));
+  DEFINE(VCPU_IRQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.irq_regs));
+  DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
+  DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
+  DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
+  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
+  DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.hsr));
+  DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.hxfar));
+  DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.hpfar));
+  DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.hyp_pc));
+  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
+#endif
   return 0; 
 }
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2101152c3a4b..9e9fa4477884 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -40,6 +40,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_emulate.h>
 
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
@@ -49,6 +50,10 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
 static struct vfp_hard_struct __percpu *kvm_host_vfp_state;
 static unsigned long hyp_default_vectors;
 
+/* The VMID used in the VTTBR */
+static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
+static u8 kvm_next_vmid;
+static DEFINE_SPINLOCK(kvm_vmid_lock);
 
 int kvm_arch_hardware_enable(void *garbage)
 {
@@ -276,6 +281,8 @@ int __attribute_const__ kvm_target_cpu(void)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	/* Force users to call KVM_ARM_VCPU_INIT */
+	vcpu->arch.target = -1;
 	return 0;
 }
 
@@ -286,6 +293,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
+	vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -316,9 +324,199 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 	return 0;
 }
 
+/* Just ensure a guest exit from a particular CPU */
+static void exit_vm_noop(void *info)
+{
+}
+
+void force_vm_exit(const cpumask_t *mask)
+{
+	smp_call_function_many(mask, exit_vm_noop, NULL, true);
+}
+
+/**
+ * need_new_vmid_gen - check that the VMID is still valid
+ * @kvm: The VM's VMID to checkt
+ *
+ * return true if there is a new generation of VMIDs being used
+ *
+ * The hardware supports only 256 values with the value zero reserved for the
+ * host, so we check if an assigned value belongs to a previous generation,
+ * which which requires us to assign a new value. If we're the first to use a
+ * VMID for the new generation, we must flush necessary caches and TLBs on all
+ * CPUs.
+ */
+static bool need_new_vmid_gen(struct kvm *kvm)
+{
+	return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
+}
+
+/**
+ * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
+ * @kvm	The guest that we are about to run
+ *
+ * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
+ * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
+ * caches and TLBs.
+ */
+static void update_vttbr(struct kvm *kvm)
+{
+	phys_addr_t pgd_phys;
+	u64 vmid;
+
+	if (!need_new_vmid_gen(kvm))
+		return;
+
+	spin_lock(&kvm_vmid_lock);
+
+	/*
+	 * We need to re-check the vmid_gen here to ensure that if another vcpu
+	 * already allocated a valid vmid for this vm, then this vcpu should
+	 * use the same vmid.
+	 */
+	if (!need_new_vmid_gen(kvm)) {
+		spin_unlock(&kvm_vmid_lock);
+		return;
+	}
+
+	/* First user of a new VMID generation? */
+	if (unlikely(kvm_next_vmid == 0)) {
+		atomic64_inc(&kvm_vmid_gen);
+		kvm_next_vmid = 1;
+
+		/*
+		 * On SMP we know no other CPUs can use this CPU's or each
+		 * other's VMID after force_vm_exit returns since the
+		 * kvm_vmid_lock blocks them from reentry to the guest.
+		 */
+		force_vm_exit(cpu_all_mask);
+		/*
+		 * Now broadcast TLB + ICACHE invalidation over the inner
+		 * shareable domain to make sure all data structures are
+		 * clean.
+		 */
+		kvm_call_hyp(__kvm_flush_vm_context);
+	}
+
+	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
+	kvm->arch.vmid = kvm_next_vmid;
+	kvm_next_vmid++;
+
+	/* update vttbr to be used with the new vmid */
+	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
+	kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
+	kvm->arch.vttbr |= vmid;
+
+	spin_unlock(&kvm_vmid_lock);
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to QEMU.
+ */
+static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		       int exception_index)
+{
+	run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	return 0;
+}
+
+static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
+{
+	if (likely(vcpu->arch.has_run_once))
+		return 0;
+
+	vcpu->arch.has_run_once = true;
+	return 0;
+}
+
+/**
+ * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
+ * @vcpu:	The VCPU pointer
+ * @run:	The kvm_run structure pointer used for userspace state exchange
+ *
+ * This function is called through the VCPU_RUN ioctl called from user space. It
+ * will execute VM code in a loop until the time slice for the process is used
+ * or some emulation is needed from user space in which case the function will
+ * return with return value 0 and with the kvm_run structure filled in with the
+ * required data for the requested emulation.
+ */
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	return -EINVAL;
+	int ret;
+	sigset_t sigsaved;
+
+	/* Make sure they initialize the vcpu with KVM_ARM_VCPU_INIT */
+	if (unlikely(vcpu->arch.target < 0))
+		return -ENOEXEC;
+
+	ret = kvm_vcpu_first_run_init(vcpu);
+	if (ret)
+		return ret;
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	ret = 1;
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	while (ret > 0) {
+		/*
+		 * Check conditions before entering the guest
+		 */
+		cond_resched();
+
+		update_vttbr(vcpu->kvm);
+
+		local_irq_disable();
+
+		/*
+		 * Re-check atomic conditions
+		 */
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			run->exit_reason = KVM_EXIT_INTR;
+		}
+
+		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
+			local_irq_enable();
+			continue;
+		}
+
+		/**************************************************************
+		 * Enter the guest
+		 */
+		trace_kvm_entry(*vcpu_pc(vcpu));
+		kvm_guest_enter();
+		vcpu->mode = IN_GUEST_MODE;
+
+		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
+
+		vcpu->mode = OUTSIDE_GUEST_MODE;
+		kvm_guest_exit();
+		trace_kvm_exit(*vcpu_pc(vcpu));
+		/*
+		 * We may have taken a host interrupt in HYP mode (ie
+		 * while executing the guest). This interrupt is still
+		 * pending, as we haven't serviced it yet!
+		 *
+		 * We're now back in SVC mode, with interrupts
+		 * disabled.  Enabling the interrupts now will have
+		 * the effect of taking the interrupt again, in SVC
+		 * mode this time.
+		 */
+		local_irq_enable();
+
+		/*
+		 * Back from guest
+		 *************************************************************/
+
+		ret = handle_exit(vcpu, run, ret);
+	}
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	return ret;
 }
 
 static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index f701aff31e44..c5400d2e97ca 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -20,9 +20,12 @@
 #include <linux/const.h>
 #include <asm/unified.h>
 #include <asm/page.h>
+#include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
+#include <asm/vfpmacros.h>
+#include "interrupts_head.S"
 
 	.text
 
@@ -31,23 +34,164 @@ __kvm_hyp_code_start:
 
 /********************************************************************
  * Flush per-VMID TLBs
+ *
+ * void __kvm_tlb_flush_vmid(struct kvm *kvm);
+ *
+ * We rely on the hardware to broadcast the TLB invalidation to all CPUs
+ * inside the inner-shareable domain (which is the case for all v7
+ * implementations).  If we come across a non-IS SMP implementation, we'll
+ * have to use an IPI based mechanism. Until then, we stick to the simple
+ * hardware assisted version.
  */
 ENTRY(__kvm_tlb_flush_vmid)
+	push	{r2, r3}
+
+	add	r0, r0, #KVM_VTTBR
+	ldrd	r2, r3, [r0]
+	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	isb
+	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
+	dsb
+	isb
+	mov	r2, #0
+	mov	r3, #0
+	mcrr	p15, 6, r2, r3, c2	@ Back to VMID #0
+	isb				@ Not necessary if followed by eret
+
+	pop	{r2, r3}
 	bx	lr
 ENDPROC(__kvm_tlb_flush_vmid)
 
 /********************************************************************
- * Flush TLBs and instruction caches of current CPU for all VMIDs
+ * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
+ * domain, for all VMIDs
+ *
+ * void __kvm_flush_vm_context(void);
  */
 ENTRY(__kvm_flush_vm_context)
+	mov	r0, #0			@ rn parameter for c15 flushes is SBZ
+
+	/* Invalidate NS Non-Hyp TLB Inner Shareable (TLBIALLNSNHIS) */
+	mcr     p15, 4, r0, c8, c3, 4
+	/* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
+	mcr     p15, 0, r0, c7, c1, 0
+	dsb
+	isb				@ Not necessary if followed by eret
+
 	bx	lr
 ENDPROC(__kvm_flush_vm_context)
 
+
 /********************************************************************
  *  Hypervisor world-switch code
+ *
+ *
+ * int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
  */
 ENTRY(__kvm_vcpu_run)
-	bx	lr
+	@ Save the vcpu pointer
+	mcr	p15, 4, vcpu, c13, c0, 2	@ HTPIDR
+
+	save_host_regs
+
+	@ Store hardware CP15 state and load guest state
+	read_cp15_state store_to_vcpu = 0
+	write_cp15_state read_from_vcpu = 1
+
+	@ If the host kernel has not been configured with VFPv3 support,
+	@ then it is safer if we deny guests from using it as well.
+#ifdef CONFIG_VFPv3
+	@ Set FPEXC_EN so the guest doesn't trap floating point instructions
+	VFPFMRX r2, FPEXC		@ VMRS
+	push	{r2}
+	orr	r2, r2, #FPEXC_EN
+	VFPFMXR FPEXC, r2		@ VMSR
+#endif
+
+	@ Configure Hyp-role
+	configure_hyp_role vmentry
+
+	@ Trap coprocessor CRx accesses
+	set_hstr vmentry
+	set_hcptr vmentry, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
+	set_hdcr vmentry
+
+	@ Write configured ID register into MIDR alias
+	ldr	r1, [vcpu, #VCPU_MIDR]
+	mcr	p15, 4, r1, c0, c0, 0
+
+	@ Write guest view of MPIDR into VMPIDR
+	ldr	r1, [vcpu, #CP15_OFFSET(c0_MPIDR)]
+	mcr	p15, 4, r1, c0, c0, 5
+
+	@ Set up guest memory translation
+	ldr	r1, [vcpu, #VCPU_KVM]
+	add	r1, r1, #KVM_VTTBR
+	ldrd	r2, r3, [r1]
+	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+
+	@ We're all done, just restore the GPRs and go to the guest
+	restore_guest_regs
+	clrex				@ Clear exclusive monitor
+	eret
+
+__kvm_vcpu_return:
+	/*
+	 * return convention:
+	 * guest r0, r1, r2 saved on the stack
+	 * r0: vcpu pointer
+	 * r1: exception code
+	 */
+	save_guest_regs
+
+	@ Set VMID == 0
+	mov	r2, #0
+	mov	r3, #0
+	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+
+	@ Don't trap coprocessor accesses for host kernel
+	set_hstr vmexit
+	set_hdcr vmexit
+	set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
+
+#ifdef CONFIG_VFPv3
+	@ Save floating point registers we if let guest use them.
+	tst	r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
+	bne	after_vfp_restore
+
+	@ Switch VFP/NEON hardware state to the host's
+	add	r7, vcpu, #VCPU_VFP_GUEST
+	store_vfp_state r7
+	add	r7, vcpu, #VCPU_VFP_HOST
+	ldr	r7, [r7]
+	restore_vfp_state r7
+
+after_vfp_restore:
+	@ Restore FPEXC_EN which we clobbered on entry
+	pop	{r2}
+	VFPFMXR FPEXC, r2
+#endif
+
+	@ Reset Hyp-role
+	configure_hyp_role vmexit
+
+	@ Let host read hardware MIDR
+	mrc	p15, 0, r2, c0, c0, 0
+	mcr	p15, 4, r2, c0, c0, 0
+
+	@ Back to hardware MPIDR
+	mrc	p15, 0, r2, c0, c0, 5
+	mcr	p15, 4, r2, c0, c0, 5
+
+	@ Store guest CP15 state and restore host state
+	read_cp15_state store_to_vcpu = 1
+	write_cp15_state read_from_vcpu = 0
+
+	restore_host_regs
+	clrex				@ Clear exclusive monitor
+	mov	r0, r1			@ Return the return code
+	mov	r1, #0			@ Clear upper bits in return value
+	bx	lr			@ return to IOCTL
 
 /********************************************************************
  *  Call function in Hyp mode
@@ -77,12 +221,258 @@ ENTRY(kvm_call_hyp)
 
 /********************************************************************
  * Hypervisor exception vector and handlers
+ *
+ *
+ * The KVM/ARM Hypervisor ABI is defined as follows:
+ *
+ * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
+ * instruction is issued since all traps are disabled when running the host
+ * kernel as per the Hyp-mode initialization at boot time.
+ *
+ * HVC instructions cause a trap to the vector page + offset 0x18 (see hyp_hvc
+ * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
+ * host kernel) and they cause a trap to the vector page + offset 0xc when HVC
+ * instructions are called from within Hyp-mode.
+ *
+ * Hyp-ABI: Calling HYP-mode functions from host (in SVC mode):
+ *    Switching to Hyp mode is done through a simple HVC #0 instruction. The
+ *    exception vector code will check that the HVC comes from VMID==0 and if
+ *    so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
+ *    - r0 contains a pointer to a HYP function
+ *    - r1, r2, and r3 contain arguments to the above function.
+ *    - The HYP function will be called with its arguments in r0, r1 and r2.
+ *    On HYP function return, we return directly to SVC.
+ *
+ * Note that the above is used to execute code in Hyp-mode from a host-kernel
+ * point of view, and is a different concept from performing a world-switch and
+ * executing guest code SVC mode (with a VMID != 0).
  */
 
+/* Handle undef, svc, pabt, or dabt by crashing with a user notice */
+.macro bad_exception exception_code, panic_str
+	push	{r0-r2}
+	mrrc	p15, 6, r0, r1, c2	@ Read VTTBR
+	lsr	r1, r1, #16
+	ands	r1, r1, #0xff
+	beq	99f
+
+	load_vcpu			@ Load VCPU pointer
+	.if \exception_code == ARM_EXCEPTION_DATA_ABORT
+	mrc	p15, 4, r2, c5, c2, 0	@ HSR
+	mrc	p15, 4, r1, c6, c0, 0	@ HDFAR
+	str	r2, [vcpu, #VCPU_HSR]
+	str	r1, [vcpu, #VCPU_HxFAR]
+	.endif
+	.if \exception_code == ARM_EXCEPTION_PREF_ABORT
+	mrc	p15, 4, r2, c5, c2, 0	@ HSR
+	mrc	p15, 4, r1, c6, c0, 2	@ HIFAR
+	str	r2, [vcpu, #VCPU_HSR]
+	str	r1, [vcpu, #VCPU_HxFAR]
+	.endif
+	mov	r1, #\exception_code
+	b	__kvm_vcpu_return
+
+	@ We were in the host already. Let's craft a panic-ing return to SVC.
+99:	mrs	r2, cpsr
+	bic	r2, r2, #MODE_MASK
+	orr	r2, r2, #SVC_MODE
+THUMB(	orr	r2, r2, #PSR_T_BIT	)
+	msr	spsr_cxsf, r2
+	mrs	r1, ELR_hyp
+	ldr	r2, =BSYM(panic)
+	msr	ELR_hyp, r2
+	ldr	r0, =\panic_str
+	eret
+.endm
+
+	.text
+
 	.align 5
 __kvm_hyp_vector:
 	.globl __kvm_hyp_vector
-	nop
+
+	@ Hyp-mode exception vector
+	W(b)	hyp_reset
+	W(b)	hyp_undef
+	W(b)	hyp_svc
+	W(b)	hyp_pabt
+	W(b)	hyp_dabt
+	W(b)	hyp_hvc
+	W(b)	hyp_irq
+	W(b)	hyp_fiq
+
+	.align
+hyp_reset:
+	b	hyp_reset
+
+	.align
+hyp_undef:
+	bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str
+
+	.align
+hyp_svc:
+	bad_exception ARM_EXCEPTION_HVC, svc_die_str
+
+	.align
+hyp_pabt:
+	bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str
+
+	.align
+hyp_dabt:
+	bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str
+
+	.align
+hyp_hvc:
+	/*
+	 * Getting here is either becuase of a trap from a guest or from calling
+	 * HVC from the host kernel, which means "switch to Hyp mode".
+	 */
+	push	{r0, r1, r2}
+
+	@ Check syndrome register
+	mrc	p15, 4, r1, c5, c2, 0	@ HSR
+	lsr	r0, r1, #HSR_EC_SHIFT
+#ifdef CONFIG_VFPv3
+	cmp	r0, #HSR_EC_CP_0_13
+	beq	switch_to_guest_vfp
+#endif
+	cmp	r0, #HSR_EC_HVC
+	bne	guest_trap		@ Not HVC instr.
+
+	/*
+	 * Let's check if the HVC came from VMID 0 and allow simple
+	 * switch to Hyp mode
+	 */
+	mrrc    p15, 6, r0, r2, c2
+	lsr     r2, r2, #16
+	and     r2, r2, #0xff
+	cmp     r2, #0
+	bne	guest_trap		@ Guest called HVC
+
+host_switch_to_hyp:
+	pop	{r0, r1, r2}
+
+	push	{lr}
+	mrs	lr, SPSR
+	push	{lr}
+
+	mov	lr, r0
+	mov	r0, r1
+	mov	r1, r2
+	mov	r2, r3
+
+THUMB(	orr	lr, #1)
+	blx	lr			@ Call the HYP function
+
+	pop	{lr}
+	msr	SPSR_csxf, lr
+	pop	{lr}
+	eret
+
+guest_trap:
+	load_vcpu			@ Load VCPU pointer to r0
+	str	r1, [vcpu, #VCPU_HSR]
+
+	@ Check if we need the fault information
+	lsr	r1, r1, #HSR_EC_SHIFT
+	cmp	r1, #HSR_EC_IABT
+	mrceq	p15, 4, r2, c6, c0, 2	@ HIFAR
+	beq	2f
+	cmp	r1, #HSR_EC_DABT
+	bne	1f
+	mrc	p15, 4, r2, c6, c0, 0	@ HDFAR
+
+2:	str	r2, [vcpu, #VCPU_HxFAR]
+
+	/*
+	 * B3.13.5 Reporting exceptions taken to the Non-secure PL2 mode:
+	 *
+	 * Abort on the stage 2 translation for a memory access from a
+	 * Non-secure PL1 or PL0 mode:
+	 *
+	 * For any Access flag fault or Translation fault, and also for any
+	 * Permission fault on the stage 2 translation of a memory access
+	 * made as part of a translation table walk for a stage 1 translation,
+	 * the HPFAR holds the IPA that caused the fault. Otherwise, the HPFAR
+	 * is UNKNOWN.
+	 */
+
+	/* Check for permission fault, and S1PTW */
+	mrc	p15, 4, r1, c5, c2, 0	@ HSR
+	and	r0, r1, #HSR_FSC_TYPE
+	cmp	r0, #FSC_PERM
+	tsteq	r1, #(1 << 7)		@ S1PTW
+	mrcne	p15, 4, r2, c6, c0, 4	@ HPFAR
+	bne	3f
+
+	/* Resolve IPA using the xFAR */
+	mcr	p15, 0, r2, c7, c8, 0	@ ATS1CPR
+	isb
+	mrrc	p15, 0, r0, r1, c7	@ PAR
+	tst	r0, #1
+	bne	4f			@ Failed translation
+	ubfx	r2, r0, #12, #20
+	lsl	r2, r2, #4
+	orr	r2, r2, r1, lsl #24
+
+3:	load_vcpu			@ Load VCPU pointer to r0
+	str	r2, [r0, #VCPU_HPFAR]
+
+1:	mov	r1, #ARM_EXCEPTION_HVC
+	b	__kvm_vcpu_return
+
+4:	pop	{r0, r1, r2}		@ Failed translation, return to guest
+	eret
+
+/*
+ * If VFPv3 support is not available, then we will not switch the VFP
+ * registers; however cp10 and cp11 accesses will still trap and fallback
+ * to the regular coprocessor emulation code, which currently will
+ * inject an undefined exception to the guest.
+ */
+#ifdef CONFIG_VFPv3
+switch_to_guest_vfp:
+	load_vcpu			@ Load VCPU pointer to r0
+	push	{r3-r7}
+
+	@ NEON/VFP used.  Turn on VFP access.
+	set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11))
+
+	@ Switch VFP/NEON hardware state to the guest's
+	add	r7, r0, #VCPU_VFP_HOST
+	ldr	r7, [r7]
+	store_vfp_state r7
+	add	r7, r0, #VCPU_VFP_GUEST
+	restore_vfp_state r7
+
+	pop	{r3-r7}
+	pop	{r0-r2}
+	eret
+#endif
+
+	.align
+hyp_irq:
+	push	{r0, r1, r2}
+	mov	r1, #ARM_EXCEPTION_IRQ
+	load_vcpu			@ Load VCPU pointer to r0
+	b	__kvm_vcpu_return
+
+	.align
+hyp_fiq:
+	b	hyp_fiq
+
+	.ltorg
 
 __kvm_hyp_code_end:
 	.globl	__kvm_hyp_code_end
+
+	.section ".rodata"
+
+und_die_str:
+	.ascii	"unexpected undefined exception in Hyp mode at: %#08x"
+pabt_die_str:
+	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x"
+dabt_die_str:
+	.ascii	"unexpected data abort in Hyp mode at: %#08x"
+svc_die_str:
+	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x"
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
new file mode 100644
index 000000000000..6a95d341e9c5
--- /dev/null
+++ b/arch/arm/kvm/interrupts_head.S
@@ -0,0 +1,441 @@
+#define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
+#define VCPU_USR_SP		(VCPU_USR_REG(13))
+#define VCPU_USR_LR		(VCPU_USR_REG(14))
+#define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4))
+
+/*
+ * Many of these macros need to access the VCPU structure, which is always
+ * held in r0. These macros should never clobber r1, as it is used to hold the
+ * exception code on the return path (except of course the macro that switches
+ * all the registers before the final jump to the VM).
+ */
+vcpu	.req	r0		@ vcpu pointer always in r0
+
+/* Clobbers {r2-r6} */
+.macro store_vfp_state vfp_base
+	@ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions
+	VFPFMRX	r2, FPEXC
+	@ Make sure VFP is enabled so we can touch the registers.
+	orr	r6, r2, #FPEXC_EN
+	VFPFMXR	FPEXC, r6
+
+	VFPFMRX	r3, FPSCR
+	tst	r2, #FPEXC_EX		@ Check for VFP Subarchitecture
+	beq	1f
+	@ If FPEXC_EX is 0, then FPINST/FPINST2 reads are upredictable, so
+	@ we only need to save them if FPEXC_EX is set.
+	VFPFMRX r4, FPINST
+	tst	r2, #FPEXC_FP2V
+	VFPFMRX r5, FPINST2, ne		@ vmrsne
+	bic	r6, r2, #FPEXC_EX	@ FPEXC_EX disable
+	VFPFMXR	FPEXC, r6
+1:
+	VFPFSTMIA \vfp_base, r6		@ Save VFP registers
+	stm	\vfp_base, {r2-r5}	@ Save FPEXC, FPSCR, FPINST, FPINST2
+.endm
+
+/* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r6} */
+.macro restore_vfp_state vfp_base
+	VFPFLDMIA \vfp_base, r6		@ Load VFP registers
+	ldm	\vfp_base, {r2-r5}	@ Load FPEXC, FPSCR, FPINST, FPINST2
+
+	VFPFMXR FPSCR, r3
+	tst	r2, #FPEXC_EX		@ Check for VFP Subarchitecture
+	beq	1f
+	VFPFMXR FPINST, r4
+	tst	r2, #FPEXC_FP2V
+	VFPFMXR FPINST2, r5, ne
+1:
+	VFPFMXR FPEXC, r2	@ FPEXC	(last, in case !EN)
+.endm
+
+/* These are simply for the macros to work - value don't have meaning */
+.equ usr, 0
+.equ svc, 1
+.equ abt, 2
+.equ und, 3
+.equ irq, 4
+.equ fiq, 5
+
+.macro push_host_regs_mode mode
+	mrs	r2, SP_\mode
+	mrs	r3, LR_\mode
+	mrs	r4, SPSR_\mode
+	push	{r2, r3, r4}
+.endm
+
+/*
+ * Store all host persistent registers on the stack.
+ * Clobbers all registers, in all modes, except r0 and r1.
+ */
+.macro save_host_regs
+	/* Hyp regs. Only ELR_hyp (SPSR_hyp already saved) */
+	mrs	r2, ELR_hyp
+	push	{r2}
+
+	/* usr regs */
+	push	{r4-r12}	@ r0-r3 are always clobbered
+	mrs	r2, SP_usr
+	mov	r3, lr
+	push	{r2, r3}
+
+	push_host_regs_mode svc
+	push_host_regs_mode abt
+	push_host_regs_mode und
+	push_host_regs_mode irq
+
+	/* fiq regs */
+	mrs	r2, r8_fiq
+	mrs	r3, r9_fiq
+	mrs	r4, r10_fiq
+	mrs	r5, r11_fiq
+	mrs	r6, r12_fiq
+	mrs	r7, SP_fiq
+	mrs	r8, LR_fiq
+	mrs	r9, SPSR_fiq
+	push	{r2-r9}
+.endm
+
+.macro pop_host_regs_mode mode
+	pop	{r2, r3, r4}
+	msr	SP_\mode, r2
+	msr	LR_\mode, r3
+	msr	SPSR_\mode, r4
+.endm
+
+/*
+ * Restore all host registers from the stack.
+ * Clobbers all registers, in all modes, except r0 and r1.
+ */
+.macro restore_host_regs
+	pop	{r2-r9}
+	msr	r8_fiq, r2
+	msr	r9_fiq, r3
+	msr	r10_fiq, r4
+	msr	r11_fiq, r5
+	msr	r12_fiq, r6
+	msr	SP_fiq, r7
+	msr	LR_fiq, r8
+	msr	SPSR_fiq, r9
+
+	pop_host_regs_mode irq
+	pop_host_regs_mode und
+	pop_host_regs_mode abt
+	pop_host_regs_mode svc
+
+	pop	{r2, r3}
+	msr	SP_usr, r2
+	mov	lr, r3
+	pop	{r4-r12}
+
+	pop	{r2}
+	msr	ELR_hyp, r2
+.endm
+
+/*
+ * Restore SP, LR and SPSR for a given mode. offset is the offset of
+ * this mode's registers from the VCPU base.
+ *
+ * Assumes vcpu pointer in vcpu reg
+ *
+ * Clobbers r1, r2, r3, r4.
+ */
+.macro restore_guest_regs_mode mode, offset
+	add	r1, vcpu, \offset
+	ldm	r1, {r2, r3, r4}
+	msr	SP_\mode, r2
+	msr	LR_\mode, r3
+	msr	SPSR_\mode, r4
+.endm
+
+/*
+ * Restore all guest registers from the vcpu struct.
+ *
+ * Assumes vcpu pointer in vcpu reg
+ *
+ * Clobbers *all* registers.
+ */
+.macro restore_guest_regs
+	restore_guest_regs_mode svc, #VCPU_SVC_REGS
+	restore_guest_regs_mode abt, #VCPU_ABT_REGS
+	restore_guest_regs_mode und, #VCPU_UND_REGS
+	restore_guest_regs_mode irq, #VCPU_IRQ_REGS
+
+	add	r1, vcpu, #VCPU_FIQ_REGS
+	ldm	r1, {r2-r9}
+	msr	r8_fiq, r2
+	msr	r9_fiq, r3
+	msr	r10_fiq, r4
+	msr	r11_fiq, r5
+	msr	r12_fiq, r6
+	msr	SP_fiq, r7
+	msr	LR_fiq, r8
+	msr	SPSR_fiq, r9
+
+	@ Load return state
+	ldr	r2, [vcpu, #VCPU_PC]
+	ldr	r3, [vcpu, #VCPU_CPSR]
+	msr	ELR_hyp, r2
+	msr	SPSR_cxsf, r3
+
+	@ Load user registers
+	ldr	r2, [vcpu, #VCPU_USR_SP]
+	ldr	r3, [vcpu, #VCPU_USR_LR]
+	msr	SP_usr, r2
+	mov	lr, r3
+	add	vcpu, vcpu, #(VCPU_USR_REGS)
+	ldm	vcpu, {r0-r12}
+.endm
+
+/*
+ * Save SP, LR and SPSR for a given mode. offset is the offset of
+ * this mode's registers from the VCPU base.
+ *
+ * Assumes vcpu pointer in vcpu reg
+ *
+ * Clobbers r2, r3, r4, r5.
+ */
+.macro save_guest_regs_mode mode, offset
+	add	r2, vcpu, \offset
+	mrs	r3, SP_\mode
+	mrs	r4, LR_\mode
+	mrs	r5, SPSR_\mode
+	stm	r2, {r3, r4, r5}
+.endm
+
+/*
+ * Save all guest registers to the vcpu struct
+ * Expects guest's r0, r1, r2 on the stack.
+ *
+ * Assumes vcpu pointer in vcpu reg
+ *
+ * Clobbers r2, r3, r4, r5.
+ */
+.macro save_guest_regs
+	@ Store usr registers
+	add	r2, vcpu, #VCPU_USR_REG(3)
+	stm	r2, {r3-r12}
+	add	r2, vcpu, #VCPU_USR_REG(0)
+	pop	{r3, r4, r5}		@ r0, r1, r2
+	stm	r2, {r3, r4, r5}
+	mrs	r2, SP_usr
+	mov	r3, lr
+	str	r2, [vcpu, #VCPU_USR_SP]
+	str	r3, [vcpu, #VCPU_USR_LR]
+
+	@ Store return state
+	mrs	r2, ELR_hyp
+	mrs	r3, spsr
+	str	r2, [vcpu, #VCPU_PC]
+	str	r3, [vcpu, #VCPU_CPSR]
+
+	@ Store other guest registers
+	save_guest_regs_mode svc, #VCPU_SVC_REGS
+	save_guest_regs_mode abt, #VCPU_ABT_REGS
+	save_guest_regs_mode und, #VCPU_UND_REGS
+	save_guest_regs_mode irq, #VCPU_IRQ_REGS
+.endm
+
+/* Reads cp15 registers from hardware and stores them in memory
+ * @store_to_vcpu: If 0, registers are written in-order to the stack,
+ * 		   otherwise to the VCPU struct pointed to by vcpup
+ *
+ * Assumes vcpu pointer in vcpu reg
+ *
+ * Clobbers r2 - r12
+ */
+.macro read_cp15_state store_to_vcpu
+	mrc	p15, 0, r2, c1, c0, 0	@ SCTLR
+	mrc	p15, 0, r3, c1, c0, 2	@ CPACR
+	mrc	p15, 0, r4, c2, c0, 2	@ TTBCR
+	mrc	p15, 0, r5, c3, c0, 0	@ DACR
+	mrrc	p15, 0, r6, r7, c2	@ TTBR 0
+	mrrc	p15, 1, r8, r9, c2	@ TTBR 1
+	mrc	p15, 0, r10, c10, c2, 0	@ PRRR
+	mrc	p15, 0, r11, c10, c2, 1	@ NMRR
+	mrc	p15, 2, r12, c0, c0, 0	@ CSSELR
+
+	.if \store_to_vcpu == 0
+	push	{r2-r12}		@ Push CP15 registers
+	.else
+	str	r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
+	str	r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
+	str	r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
+	str	r5, [vcpu, #CP15_OFFSET(c3_DACR)]
+	add	r2, vcpu, #CP15_OFFSET(c2_TTBR0)
+	strd	r6, r7, [r2]
+	add	r2, vcpu, #CP15_OFFSET(c2_TTBR1)
+	strd	r8, r9, [r2]
+	str	r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
+	str	r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
+	str	r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
+	.endif
+
+	mrc	p15, 0, r2, c13, c0, 1	@ CID
+	mrc	p15, 0, r3, c13, c0, 2	@ TID_URW
+	mrc	p15, 0, r4, c13, c0, 3	@ TID_URO
+	mrc	p15, 0, r5, c13, c0, 4	@ TID_PRIV
+	mrc	p15, 0, r6, c5, c0, 0	@ DFSR
+	mrc	p15, 0, r7, c5, c0, 1	@ IFSR
+	mrc	p15, 0, r8, c5, c1, 0	@ ADFSR
+	mrc	p15, 0, r9, c5, c1, 1	@ AIFSR
+	mrc	p15, 0, r10, c6, c0, 0	@ DFAR
+	mrc	p15, 0, r11, c6, c0, 2	@ IFAR
+	mrc	p15, 0, r12, c12, c0, 0	@ VBAR
+
+	.if \store_to_vcpu == 0
+	push	{r2-r12}		@ Push CP15 registers
+	.else
+	str	r2, [vcpu, #CP15_OFFSET(c13_CID)]
+	str	r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
+	str	r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
+	str	r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
+	str	r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
+	str	r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
+	str	r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
+	str	r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
+	str	r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
+	str	r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
+	str	r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
+	.endif
+.endm
+
+/*
+ * Reads cp15 registers from memory and writes them to hardware
+ * @read_from_vcpu: If 0, registers are read in-order from the stack,
+ *		    otherwise from the VCPU struct pointed to by vcpup
+ *
+ * Assumes vcpu pointer in vcpu reg
+ */
+.macro write_cp15_state read_from_vcpu
+	.if \read_from_vcpu == 0
+	pop	{r2-r12}
+	.else
+	ldr	r2, [vcpu, #CP15_OFFSET(c13_CID)]
+	ldr	r3, [vcpu, #CP15_OFFSET(c13_TID_URW)]
+	ldr	r4, [vcpu, #CP15_OFFSET(c13_TID_URO)]
+	ldr	r5, [vcpu, #CP15_OFFSET(c13_TID_PRIV)]
+	ldr	r6, [vcpu, #CP15_OFFSET(c5_DFSR)]
+	ldr	r7, [vcpu, #CP15_OFFSET(c5_IFSR)]
+	ldr	r8, [vcpu, #CP15_OFFSET(c5_ADFSR)]
+	ldr	r9, [vcpu, #CP15_OFFSET(c5_AIFSR)]
+	ldr	r10, [vcpu, #CP15_OFFSET(c6_DFAR)]
+	ldr	r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
+	ldr	r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
+	.endif
+
+	mcr	p15, 0, r2, c13, c0, 1	@ CID
+	mcr	p15, 0, r3, c13, c0, 2	@ TID_URW
+	mcr	p15, 0, r4, c13, c0, 3	@ TID_URO
+	mcr	p15, 0, r5, c13, c0, 4	@ TID_PRIV
+	mcr	p15, 0, r6, c5, c0, 0	@ DFSR
+	mcr	p15, 0, r7, c5, c0, 1	@ IFSR
+	mcr	p15, 0, r8, c5, c1, 0	@ ADFSR
+	mcr	p15, 0, r9, c5, c1, 1	@ AIFSR
+	mcr	p15, 0, r10, c6, c0, 0	@ DFAR
+	mcr	p15, 0, r11, c6, c0, 2	@ IFAR
+	mcr	p15, 0, r12, c12, c0, 0	@ VBAR
+
+	.if \read_from_vcpu == 0
+	pop	{r2-r12}
+	.else
+	ldr	r2, [vcpu, #CP15_OFFSET(c1_SCTLR)]
+	ldr	r3, [vcpu, #CP15_OFFSET(c1_CPACR)]
+	ldr	r4, [vcpu, #CP15_OFFSET(c2_TTBCR)]
+	ldr	r5, [vcpu, #CP15_OFFSET(c3_DACR)]
+	add	r12, vcpu, #CP15_OFFSET(c2_TTBR0)
+	ldrd	r6, r7, [r12]
+	add	r12, vcpu, #CP15_OFFSET(c2_TTBR1)
+	ldrd	r8, r9, [r12]
+	ldr	r10, [vcpu, #CP15_OFFSET(c10_PRRR)]
+	ldr	r11, [vcpu, #CP15_OFFSET(c10_NMRR)]
+	ldr	r12, [vcpu, #CP15_OFFSET(c0_CSSELR)]
+	.endif
+
+	mcr	p15, 0, r2, c1, c0, 0	@ SCTLR
+	mcr	p15, 0, r3, c1, c0, 2	@ CPACR
+	mcr	p15, 0, r4, c2, c0, 2	@ TTBCR
+	mcr	p15, 0, r5, c3, c0, 0	@ DACR
+	mcrr	p15, 0, r6, r7, c2	@ TTBR 0
+	mcrr	p15, 1, r8, r9, c2	@ TTBR 1
+	mcr	p15, 0, r10, c10, c2, 0	@ PRRR
+	mcr	p15, 0, r11, c10, c2, 1	@ NMRR
+	mcr	p15, 2, r12, c0, c0, 0	@ CSSELR
+.endm
+
+/*
+ * Save the VGIC CPU state into memory
+ *
+ * Assumes vcpu pointer in vcpu reg
+ */
+.macro save_vgic_state
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ *
+ * Assumes vcpu pointer in vcpu reg
+ */
+.macro restore_vgic_state
+.endm
+
+.equ vmentry,	0
+.equ vmexit,	1
+
+/* Configures the HSTR (Hyp System Trap Register) on entry/return
+ * (hardware reset value is 0) */
+.macro set_hstr operation
+	mrc	p15, 4, r2, c1, c1, 3
+	ldr	r3, =HSTR_T(15)
+	.if \operation == vmentry
+	orr	r2, r2, r3		@ Trap CR{15}
+	.else
+	bic	r2, r2, r3		@ Don't trap any CRx accesses
+	.endif
+	mcr	p15, 4, r2, c1, c1, 3
+.endm
+
+/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
+ * (hardware reset value is 0). Keep previous value in r2. */
+.macro set_hcptr operation, mask
+	mrc	p15, 4, r2, c1, c1, 2
+	ldr	r3, =\mask
+	.if \operation == vmentry
+	orr	r3, r2, r3		@ Trap coproc-accesses defined in mask
+	.else
+	bic	r3, r2, r3		@ Don't trap defined coproc-accesses
+	.endif
+	mcr	p15, 4, r3, c1, c1, 2
+.endm
+
+/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return
+ * (hardware reset value is 0) */
+.macro set_hdcr operation
+	mrc	p15, 4, r2, c1, c1, 1
+	ldr	r3, =(HDCR_TPM|HDCR_TPMCR)
+	.if \operation == vmentry
+	orr	r2, r2, r3		@ Trap some perfmon accesses
+	.else
+	bic	r2, r2, r3		@ Don't trap any perfmon accesses
+	.endif
+	mcr	p15, 4, r2, c1, c1, 1
+.endm
+
+/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
+.macro configure_hyp_role operation
+	mrc	p15, 4, r2, c1, c1, 0	@ HCR
+	bic	r2, r2, #HCR_VIRT_EXCP_MASK
+	ldr	r3, =HCR_GUEST_MASK
+	.if \operation == vmentry
+	orr	r2, r2, r3
+	ldr	r3, [vcpu, #VCPU_IRQ_LINES]
+	orr	r2, r2, r3
+	.else
+	bic	r2, r2, r3
+	.endif
+	mcr	p15, 4, r2, c1, c1, 0
+.endm
+
+.macro load_vcpu
+	mrc	p15, 4, vcpu, c13, c0, 2	@ HTPIDR
+.endm
-- 
cgit v1.2.3


From 5b3e5e5bf230f56309706dfc05fc0cb173cc83aa Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Sun, 20 Jan 2013 18:28:09 -0500
Subject: KVM: ARM: Emulation framework and CP15 emulation

Adds a new important function in the main KVM/ARM code called
handle_exit() which is called from kvm_arch_vcpu_ioctl_run() on returns
from guest execution. This function examines the Hyp-Syndrome-Register
(HSR), which contains information telling KVM what caused the exit from
the guest.

Some of the reasons for an exit are CP15 accesses, which are
not allowed from the guest and this commit handles these exits by
emulating the intended operation in software and skipping the guest
instruction.

Minor notes about the coproc register reset:
1) We reserve a value of 0 as an invalid cp15 offset, to catch bugs in our
   table, at cost of 4 bytes per vcpu.

2) Added comments on the table indicating how we handle each register, for
   simplicity of understanding.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_arm.h     |  11 ++
 arch/arm/include/asm/kvm_coproc.h  |  14 ++
 arch/arm/include/asm/kvm_emulate.h |   6 +
 arch/arm/include/asm/kvm_host.h    |   4 +
 arch/arm/kvm/Makefile              |   2 +-
 arch/arm/kvm/arm.c                 | 169 ++++++++++++++++-
 arch/arm/kvm/coproc.c              | 360 +++++++++++++++++++++++++++++++++++++
 arch/arm/kvm/coproc.h              | 153 ++++++++++++++++
 arch/arm/kvm/coproc_a15.c          | 162 +++++++++++++++++
 arch/arm/kvm/emulate.c             | 218 ++++++++++++++++++++++
 arch/arm/kvm/trace.h               |  65 +++++++
 11 files changed, 1160 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/kvm/coproc.h
 create mode 100644 arch/arm/kvm/coproc_a15.c

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index c69936b1fc53..9a34c20d41ec 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -70,6 +70,11 @@
 			HCR_SWIO | HCR_TIDCP)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
+/* System Control Register (SCTLR) bits */
+#define SCTLR_TE	(1 << 30)
+#define SCTLR_EE	(1 << 25)
+#define SCTLR_V		(1 << 13)
+
 /* Hyp System Control Register (HSCTLR) bits */
 #define HSCTLR_TE	(1 << 30)
 #define HSCTLR_EE	(1 << 25)
@@ -171,6 +176,10 @@
 #define HSR_FSC		(0x3f)
 #define HSR_FSC_TYPE	(0x3c)
 #define HSR_WNR		(1 << 6)
+#define HSR_CV_SHIFT	(24)
+#define HSR_CV		(1U << HSR_CV_SHIFT)
+#define HSR_COND_SHIFT	(20)
+#define HSR_COND	(0xfU << HSR_COND_SHIFT)
 
 #define FSC_FAULT	(0x04)
 #define FSC_PERM	(0x0c)
@@ -197,4 +206,6 @@
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
 
+#define HSR_HVC_IMM_MASK	((1UL << 16) - 1)
+
 #endif /* __ARM_KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h
index b6d023deb426..bd1ace030495 100644
--- a/arch/arm/include/asm/kvm_coproc.h
+++ b/arch/arm/include/asm/kvm_coproc.h
@@ -21,4 +21,18 @@
 
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
+struct kvm_coproc_target_table {
+	unsigned target;
+	const struct coproc_reg *table;
+	size_t num;
+};
+void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table);
+
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+void kvm_coproc_table_init(void);
 #endif /* __ARM_KVM_COPROC_H__ */
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 17dad674b90f..01a755b80632 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -25,6 +25,12 @@
 u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
 u32 *vcpu_spsr(struct kvm_vcpu *vcpu);
 
+int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
+void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr);
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+
 static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu)
 {
 	return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7a121089c733..e1d4168d4f19 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -94,6 +94,10 @@ struct kvm_vcpu_arch {
 	 * Anything that is not used directly from assembly code goes
 	 * here.
 	 */
+	/* dcache set/way operation pending */
+	int last_pcpu;
+	cpumask_t require_dcache_flush;
+
 	/* Interrupt related fields */
 	u32 irq_lines;		/* IRQ and FIQ levels */
 
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index dfc293f277b3..88edce6c97d4 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -18,4 +18,4 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o
+obj-y += coproc.o coproc_a15.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e9fa4477884..be06c5de51e3 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -36,11 +36,14 @@
 #include <asm/mman.h>
 #include <asm/cputype.h>
 #include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
 #include <asm/virt.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/opcodes.h>
 
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
@@ -294,6 +297,15 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
 	vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state);
+
+	/*
+	 * Check whether this vcpu requires the cache to be flushed on
+	 * this physical CPU. This is a consequence of doing dcache
+	 * operations by set/way on this vcpu. We do it here to be in
+	 * a non-preemptible section.
+	 */
+	if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
+		flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -319,9 +331,16 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return -EINVAL;
 }
 
+/**
+ * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
+ * @v:		The VCPU pointer
+ *
+ * If the guest CPU is not waiting for interrupts or an interrupt line is
+ * asserted, the CPU is by definition runnable.
+ */
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-	return 0;
+	return !!v->arch.irq_lines;
 }
 
 /* Just ensure a guest exit from a particular CPU */
@@ -411,6 +430,110 @@ static void update_vttbr(struct kvm *kvm)
 	spin_unlock(&kvm_vmid_lock);
 }
 
+static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* SVC called from Hyp mode should never get here */
+	kvm_debug("SVC called from Hyp mode shouldn't go here\n");
+	BUG();
+	return -EINVAL; /* Squash warning */
+}
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
+		      vcpu->arch.hsr & HSR_HVC_IMM_MASK);
+
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* We don't support SMC; don't do that. */
+	kvm_debug("smc: at %08x", *vcpu_pc(vcpu));
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* The hypervisor should never cause aborts */
+	kvm_err("Prefetch Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
+		vcpu->arch.hxfar, vcpu->arch.hsr);
+	return -EFAULT;
+}
+
+static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* This is either an error in the ws. code or an external abort */
+	kvm_err("Data Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
+		vcpu->arch.hxfar, vcpu->arch.hsr);
+	return -EFAULT;
+}
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+static exit_handle_fn arm_exit_handlers[] = {
+	[HSR_EC_WFI]		= kvm_handle_wfi,
+	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
+	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
+	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[HSR_EC_CP14_LS]	= kvm_handle_cp14_load_store,
+	[HSR_EC_CP14_64]	= kvm_handle_cp14_access,
+	[HSR_EC_CP_0_13]	= kvm_handle_cp_0_13_access,
+	[HSR_EC_CP10_ID]	= kvm_handle_cp10_id,
+	[HSR_EC_SVC_HYP]	= handle_svc_hyp,
+	[HSR_EC_HVC]		= handle_hvc,
+	[HSR_EC_SMC]		= handle_smc,
+	[HSR_EC_IABT]		= kvm_handle_guest_abort,
+	[HSR_EC_IABT_HYP]	= handle_pabt_hyp,
+	[HSR_EC_DABT]		= kvm_handle_guest_abort,
+	[HSR_EC_DABT_HYP]	= handle_dabt_hyp,
+};
+
+/*
+ * A conditional instruction is allowed to trap, even though it
+ * wouldn't be executed.  So let's re-implement the hardware, in
+ * software!
+ */
+static bool kvm_condition_valid(struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr, cond, insn;
+
+	/*
+	 * Exception Code 0 can only happen if we set HCR.TGE to 1, to
+	 * catch undefined instructions, and then we won't get past
+	 * the arm_exit_handlers test anyway.
+	 */
+	BUG_ON(((vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT) == 0);
+
+	/* Top two bits non-zero?  Unconditional. */
+	if (vcpu->arch.hsr >> 30)
+		return true;
+
+	cpsr = *vcpu_cpsr(vcpu);
+
+	/* Is condition field valid? */
+	if ((vcpu->arch.hsr & HSR_CV) >> HSR_CV_SHIFT)
+		cond = (vcpu->arch.hsr & HSR_COND) >> HSR_COND_SHIFT;
+	else {
+		/* This can happen in Thumb mode: examine IT state. */
+		unsigned long it;
+
+		it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+		/* it == 0 => unconditional. */
+		if (it == 0)
+			return true;
+
+		/* The cond for this insn works out as the top 4 bits. */
+		cond = (it >> 4);
+	}
+
+	/* Shift makes it look like an ARM-mode instruction */
+	insn = cond << 28;
+	return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL;
+}
+
 /*
  * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
  * proper exit to QEMU.
@@ -418,8 +541,46 @@ static void update_vttbr(struct kvm *kvm)
 static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		       int exception_index)
 {
-	run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-	return 0;
+	unsigned long hsr_ec;
+
+	switch (exception_index) {
+	case ARM_EXCEPTION_IRQ:
+		return 1;
+	case ARM_EXCEPTION_UNDEFINED:
+		kvm_err("Undefined exception in Hyp mode at: %#08x\n",
+			vcpu->arch.hyp_pc);
+		BUG();
+		panic("KVM: Hypervisor undefined exception!\n");
+	case ARM_EXCEPTION_DATA_ABORT:
+	case ARM_EXCEPTION_PREF_ABORT:
+	case ARM_EXCEPTION_HVC:
+		hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT;
+
+		if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers)
+		    || !arm_exit_handlers[hsr_ec]) {
+			kvm_err("Unkown exception class: %#08lx, "
+				"hsr: %#08x\n", hsr_ec,
+				(unsigned int)vcpu->arch.hsr);
+			BUG();
+		}
+
+		/*
+		 * See ARM ARM B1.14.1: "Hyp traps on instructions
+		 * that fail their condition code check"
+		 */
+		if (!kvm_condition_valid(vcpu)) {
+			bool is_wide = vcpu->arch.hsr & HSR_IL;
+			kvm_skip_instr(vcpu, is_wide);
+			return 1;
+		}
+
+		return arm_exit_handlers[hsr_ec](vcpu, run);
+	default:
+		kvm_pr_unimpl("Unsupported exception type: %d",
+			      exception_index);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return 0;
+	}
 }
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
@@ -493,6 +654,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 
 		vcpu->mode = OUTSIDE_GUEST_MODE;
+		vcpu->arch.last_pcpu = smp_processor_id();
 		kvm_guest_exit();
 		trace_kvm_exit(*vcpu_pc(vcpu));
 		/*
@@ -801,6 +963,7 @@ int kvm_arch_init(void *opaque)
 	if (err)
 		goto out_err;
 
+	kvm_coproc_table_init();
 	return 0;
 out_err:
 	return err;
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 0c433558591c..722efe3b1675 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -16,8 +16,368 @@
  * along with this program; if not, write to the Free Software
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
+#include <linux/mm.h>
 #include <linux/kvm_host.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <trace/events/kvm.h>
 
+#include "trace.h"
+#include "coproc.h"
+
+
+/******************************************************************************
+ * Co-processor emulation
+ *****************************************************************************/
+
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/*
+	 * We can get here, if the host has been built without VFPv3 support,
+	 * but the guest attempted a floating point operation.
+	 */
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+/* See note at ARM ARM B1.14.4 */
+static bool access_dcsw(struct kvm_vcpu *vcpu,
+			const struct coproc_params *p,
+			const struct coproc_reg *r)
+{
+	u32 val;
+	int cpu;
+
+	cpu = get_cpu();
+
+	if (!p->is_write)
+		return read_from_write_only(vcpu, p);
+
+	cpumask_setall(&vcpu->arch.require_dcache_flush);
+	cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
+
+	/* If we were already preempted, take the long way around */
+	if (cpu != vcpu->arch.last_pcpu) {
+		flush_cache_all();
+		goto done;
+	}
+
+	val = *vcpu_reg(vcpu, p->Rt1);
+
+	switch (p->CRm) {
+	case 6:			/* Upgrade DCISW to DCCISW, as per HCR.SWIO */
+	case 14:		/* DCCISW */
+		asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
+		break;
+
+	case 10:		/* DCCSW */
+		asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
+		break;
+	}
+
+done:
+	put_cpu();
+
+	return true;
+}
+
+/*
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters.  Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ */
+static bool pm_fake(struct kvm_vcpu *vcpu,
+		    const struct coproc_params *p,
+		    const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+	else
+		return read_zero(vcpu, p);
+}
+
+#define access_pmcr pm_fake
+#define access_pmcntenset pm_fake
+#define access_pmcntenclr pm_fake
+#define access_pmovsr pm_fake
+#define access_pmselr pm_fake
+#define access_pmceid0 pm_fake
+#define access_pmceid1 pm_fake
+#define access_pmccntr pm_fake
+#define access_pmxevtyper pm_fake
+#define access_pmxevcntr pm_fake
+#define access_pmuserenr pm_fake
+#define access_pmintenset pm_fake
+#define access_pmintenclr pm_fake
+
+/* Architected CP15 registers.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ */
+static const struct coproc_reg cp15_regs[] = {
+	/* CSSELR: swapped by interrupt.S. */
+	{ CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
+			NULL, reset_unknown, c0_CSSELR },
+
+	/* TTBR0/TTBR1: swapped by interrupt.S. */
+	{ CRm( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
+	{ CRm( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
+
+	/* TTBCR: swapped by interrupt.S. */
+	{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
+			NULL, reset_val, c2_TTBCR, 0x00000000 },
+
+	/* DACR: swapped by interrupt.S. */
+	{ CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
+			NULL, reset_unknown, c3_DACR },
+
+	/* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
+	{ CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
+			NULL, reset_unknown, c5_DFSR },
+	{ CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
+			NULL, reset_unknown, c5_IFSR },
+	{ CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
+			NULL, reset_unknown, c5_ADFSR },
+	{ CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
+			NULL, reset_unknown, c5_AIFSR },
+
+	/* DFAR/IFAR: swapped by interrupt.S. */
+	{ CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
+			NULL, reset_unknown, c6_DFAR },
+	{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
+			NULL, reset_unknown, c6_IFAR },
+	/*
+	 * DC{C,I,CI}SW operations:
+	 */
+	{ CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw},
+	{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
+	{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
+	/*
+	 * Dummy performance monitor implementation.
+	 */
+	{ CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
+	{ CRn( 9), CRm(12), Op1( 0), Op2( 1), is32, access_pmcntenset},
+	{ CRn( 9), CRm(12), Op1( 0), Op2( 2), is32, access_pmcntenclr},
+	{ CRn( 9), CRm(12), Op1( 0), Op2( 3), is32, access_pmovsr},
+	{ CRn( 9), CRm(12), Op1( 0), Op2( 5), is32, access_pmselr},
+	{ CRn( 9), CRm(12), Op1( 0), Op2( 6), is32, access_pmceid0},
+	{ CRn( 9), CRm(12), Op1( 0), Op2( 7), is32, access_pmceid1},
+	{ CRn( 9), CRm(13), Op1( 0), Op2( 0), is32, access_pmccntr},
+	{ CRn( 9), CRm(13), Op1( 0), Op2( 1), is32, access_pmxevtyper},
+	{ CRn( 9), CRm(13), Op1( 0), Op2( 2), is32, access_pmxevcntr},
+	{ CRn( 9), CRm(14), Op1( 0), Op2( 0), is32, access_pmuserenr},
+	{ CRn( 9), CRm(14), Op1( 0), Op2( 1), is32, access_pmintenset},
+	{ CRn( 9), CRm(14), Op1( 0), Op2( 2), is32, access_pmintenclr},
+
+	/* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
+	{ CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
+			NULL, reset_unknown, c10_PRRR},
+	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
+			NULL, reset_unknown, c10_NMRR},
+
+	/* VBAR: swapped by interrupt.S. */
+	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
+			NULL, reset_val, c12_VBAR, 0x00000000 },
+
+	/* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
+	{ CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
+			NULL, reset_val, c13_CID, 0x00000000 },
+	{ CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
+			NULL, reset_unknown, c13_TID_URW },
+	{ CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
+			NULL, reset_unknown, c13_TID_URO },
+	{ CRn(13), CRm( 0), Op1( 0), Op2( 4), is32,
+			NULL, reset_unknown, c13_TID_PRIV },
+};
+
+/* Target specific emulation tables */
+static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
+
+void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
+{
+	target_tables[table->target] = table;
+}
+
+/* Get specific register table for this target. */
+static const struct coproc_reg *get_target_table(unsigned target, size_t *num)
+{
+	struct kvm_coproc_target_table *table;
+
+	table = target_tables[target];
+	*num = table->num;
+	return table->table;
+}
+
+static const struct coproc_reg *find_reg(const struct coproc_params *params,
+					 const struct coproc_reg table[],
+					 unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		const struct coproc_reg *r = &table[i];
+
+		if (params->is_64bit != r->is_64)
+			continue;
+		if (params->CRn != r->CRn)
+			continue;
+		if (params->CRm != r->CRm)
+			continue;
+		if (params->Op1 != r->Op1)
+			continue;
+		if (params->Op2 != r->Op2)
+			continue;
+
+		return r;
+	}
+	return NULL;
+}
+
+static int emulate_cp15(struct kvm_vcpu *vcpu,
+			const struct coproc_params *params)
+{
+	size_t num;
+	const struct coproc_reg *table, *r;
+
+	trace_kvm_emulate_cp15_imp(params->Op1, params->Rt1, params->CRn,
+				   params->CRm, params->Op2, params->is_write);
+
+	table = get_target_table(vcpu->arch.target, &num);
+
+	/* Search target-specific then generic table. */
+	r = find_reg(params, table, num);
+	if (!r)
+		r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
+
+	if (likely(r)) {
+		/* If we don't have an accessor, we should never get here! */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1);
+			return 1;
+		}
+		/* If access function fails, it should complain. */
+	} else {
+		kvm_err("Unsupported guest CP15 access at: %08x\n",
+			*vcpu_pc(vcpu));
+		print_cp_instr(params);
+	}
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+/**
+ * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params;
+
+	params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
+	params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
+	params.is_write = ((vcpu->arch.hsr & 1) == 0);
+	params.is_64bit = true;
+
+	params.Op1 = (vcpu->arch.hsr >> 16) & 0xf;
+	params.Op2 = 0;
+	params.Rt2 = (vcpu->arch.hsr >> 10) & 0xf;
+	params.CRn = 0;
+
+	return emulate_cp15(vcpu, &params);
+}
+
+static void reset_coproc_regs(struct kvm_vcpu *vcpu,
+			      const struct coproc_reg *table, size_t num)
+{
+	unsigned long i;
+
+	for (i = 0; i < num; i++)
+		if (table[i].reset)
+			table[i].reset(vcpu, &table[i]);
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct coproc_params params;
+
+	params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
+	params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
+	params.is_write = ((vcpu->arch.hsr & 1) == 0);
+	params.is_64bit = false;
+
+	params.CRn = (vcpu->arch.hsr >> 10) & 0xf;
+	params.Op1 = (vcpu->arch.hsr >> 14) & 0x7;
+	params.Op2 = (vcpu->arch.hsr >> 17) & 0x7;
+	params.Rt2 = 0;
+
+	return emulate_cp15(vcpu, &params);
+}
+
+void kvm_coproc_table_init(void)
+{
+	unsigned int i;
+
+	/* Make sure tables are unique and in order. */
+	for (i = 1; i < ARRAY_SIZE(cp15_regs); i++)
+		BUG_ON(cmp_reg(&cp15_regs[i-1], &cp15_regs[i]) >= 0);
+}
+
+/**
+ * kvm_reset_coprocs - sets cp15 registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu)
 {
+	size_t num;
+	const struct coproc_reg *table;
+
+	/* Catch someone adding a register without putting in reset entry. */
+	memset(vcpu->arch.cp15, 0x42, sizeof(vcpu->arch.cp15));
+
+	/* Generic chip reset first (so target could override). */
+	reset_coproc_regs(vcpu, cp15_regs, ARRAY_SIZE(cp15_regs));
+
+	table = get_target_table(vcpu->arch.target, &num);
+	reset_coproc_regs(vcpu, table, num);
+
+	for (num = 1; num < NR_CP15_REGS; num++)
+		if (vcpu->arch.cp15[num] == 0x42424242)
+			panic("Didn't reset vcpu->arch.cp15[%zi]", num);
 }
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
new file mode 100644
index 000000000000..992adfafa2ff
--- /dev/null
+++ b/arch/arm/kvm/coproc.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_COPROC_LOCAL_H__
+#define __ARM_KVM_COPROC_LOCAL_H__
+
+struct coproc_params {
+	unsigned long CRn;
+	unsigned long CRm;
+	unsigned long Op1;
+	unsigned long Op2;
+	unsigned long Rt1;
+	unsigned long Rt2;
+	bool is_64bit;
+	bool is_write;
+};
+
+struct coproc_reg {
+	/* MRC/MCR/MRRC/MCRR instruction which accesses it. */
+	unsigned long CRn;
+	unsigned long CRm;
+	unsigned long Op1;
+	unsigned long Op2;
+
+	bool is_64;
+
+	/* Trapped access from guest, if non-NULL. */
+	bool (*access)(struct kvm_vcpu *,
+		       const struct coproc_params *,
+		       const struct coproc_reg *);
+
+	/* Initialization for vcpu. */
+	void (*reset)(struct kvm_vcpu *, const struct coproc_reg *);
+
+	/* Index into vcpu->arch.cp15[], or 0 if we don't need to save it. */
+	unsigned long reg;
+
+	/* Value (usually reset value) */
+	u64 val;
+};
+
+static inline void print_cp_instr(const struct coproc_params *p)
+{
+	/* Look, we even formatted it for you to paste into the table! */
+	if (p->is_64bit) {
+		kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n",
+			      p->CRm, p->Op1, p->is_write ? "write" : "read");
+	} else {
+		kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
+			      " func_%s },\n",
+			      p->CRn, p->CRm, p->Op1, p->Op2,
+			      p->is_write ? "write" : "read");
+	}
+}
+
+static inline bool ignore_write(struct kvm_vcpu *vcpu,
+				const struct coproc_params *p)
+{
+	return true;
+}
+
+static inline bool read_zero(struct kvm_vcpu *vcpu,
+			     const struct coproc_params *p)
+{
+	*vcpu_reg(vcpu, p->Rt1) = 0;
+	return true;
+}
+
+static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
+				      const struct coproc_params *params)
+{
+	kvm_debug("CP15 write to read-only register at: %08x\n",
+		  *vcpu_pc(vcpu));
+	print_cp_instr(params);
+	return false;
+}
+
+static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
+					const struct coproc_params *params)
+{
+	kvm_debug("CP15 read to write-only register at: %08x\n",
+		  *vcpu_pc(vcpu));
+	print_cp_instr(params);
+	return false;
+}
+
+/* Reset functions */
+static inline void reset_unknown(struct kvm_vcpu *vcpu,
+				 const struct coproc_reg *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15));
+	vcpu->arch.cp15[r->reg] = 0xdecafbad;
+}
+
+static inline void reset_val(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= ARRAY_SIZE(vcpu->arch.cp15));
+	vcpu->arch.cp15[r->reg] = r->val;
+}
+
+static inline void reset_unknown64(struct kvm_vcpu *vcpu,
+				   const struct coproc_reg *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg + 1 >= ARRAY_SIZE(vcpu->arch.cp15));
+
+	vcpu->arch.cp15[r->reg] = 0xdecafbad;
+	vcpu->arch.cp15[r->reg+1] = 0xd0c0ffee;
+}
+
+static inline int cmp_reg(const struct coproc_reg *i1,
+			  const struct coproc_reg *i2)
+{
+	BUG_ON(i1 == i2);
+	if (!i1)
+		return 1;
+	else if (!i2)
+		return -1;
+	if (i1->CRn != i2->CRn)
+		return i1->CRn - i2->CRn;
+	if (i1->CRm != i2->CRm)
+		return i1->CRm - i2->CRm;
+	if (i1->Op1 != i2->Op1)
+		return i1->Op1 - i2->Op1;
+	return i1->Op2 - i2->Op2;
+}
+
+
+#define CRn(_x)		.CRn = _x
+#define CRm(_x) 	.CRm = _x
+#define Op1(_x) 	.Op1 = _x
+#define Op2(_x) 	.Op2 = _x
+#define is64		.is_64 = true
+#define is32		.is_64 = false
+
+#endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
new file mode 100644
index 000000000000..685063a6d0cf
--- /dev/null
+++ b/arch/arm/kvm/coproc_a15.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <linux/init.h>
+
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	/*
+	 * Compute guest MPIDR:
+	 * (Even if we present only one VCPU to the guest on an SMP
+	 * host we don't set the U bit in the MPIDR, or vice versa, as
+	 * revealing the underlying hardware properties is likely to
+	 * be the best choice).
+	 */
+	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
+		| (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
+}
+
+#include "coproc.h"
+
+/* A15 TRM 4.3.28: RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct coproc_params *p,
+			 const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+	return true;
+}
+
+/* A15 TRM 4.3.60: R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+			const struct coproc_params *p,
+			const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return write_to_read_only(vcpu, p);
+	return read_zero(vcpu, p);
+}
+
+/* A15 TRM 4.3.48: R/O WI. */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+			  const struct coproc_params *p,
+			  const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+	return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 l2ctlr, ncores;
+
+	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+	l2ctlr &= ~(3 << 24);
+	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+	l2ctlr |= (ncores & 3) << 24;
+
+	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 actlr;
+
+	/* ACTLR contains SMP bit: make sure you create all cpus first! */
+	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+	/* Make the SMP bit consistent with the guest configuration */
+	if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+		actlr |= 1U << 6;
+	else
+		actlr &= ~(1U << 6);
+
+	vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+			   const struct coproc_params *p,
+			   const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = 0;
+	return true;
+}
+
+/*
+ * A15-specific CP15 registers.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ */
+static const struct coproc_reg a15_regs[] = {
+	/* MPIDR: we use VMPIDR for guest access. */
+	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+			NULL, reset_mpidr, c0_MPIDR },
+
+	/* SCTLR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+			NULL, reset_val, c1_SCTLR, 0x00C50078 },
+	/* ACTLR: trapped by HCR.TAC bit. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+			access_actlr, reset_actlr, c1_ACTLR },
+	/* CPACR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+			NULL, reset_val, c1_CPACR, 0x00000000 },
+
+	/*
+	 * L2CTLR access (guest wants to know #CPUs).
+	 */
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+			access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
+	/* The Configuration Base Address Register. */
+	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
+};
+
+static struct kvm_coproc_target_table a15_target_table = {
+	.target = KVM_ARM_TARGET_CORTEX_A15,
+	.table = a15_regs,
+	.num = ARRAY_SIZE(a15_regs),
+};
+
+static int __init coproc_a15_init(void)
+{
+	unsigned int i;
+
+	for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
+		BUG_ON(cmp_reg(&a15_regs[i-1],
+			       &a15_regs[i]) >= 0);
+
+	kvm_register_target_coproc_table(&a15_target_table);
+	return 0;
+}
+late_initcall(coproc_a15_init);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index 3eadc25e95de..d61450ac6665 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -16,7 +16,13 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_arm.h>
 #include <asm/kvm_emulate.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
 
 #define VCPU_NR_MODES		6
 #define VCPU_REG_OFFSET_USR	0
@@ -153,3 +159,215 @@ u32 *vcpu_spsr(struct kvm_vcpu *vcpu)
 		BUG();
 	}
 }
+
+/**
+ * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * @vcpu:	the vcpu pointer
+ * @run:	the kvm_run structure pointer
+ *
+ * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
+ * halt execution of world-switches and schedule other host processes until
+ * there is an incoming IRQ or FIQ to the VM.
+ */
+int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	trace_kvm_wfi(*vcpu_pc(vcpu));
+	kvm_vcpu_block(vcpu);
+	return 1;
+}
+
+/**
+ * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * @vcpu:	The VCPU pointer
+ *
+ * When exceptions occur while instructions are executed in Thumb IF-THEN
+ * blocks, the ITSTATE field of the CPSR is not advanved (updated), so we have
+ * to do this little bit of work manually. The fields map like this:
+ *
+ * IT[7:0] -> CPSR[26:25],CPSR[15:10]
+ */
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+{
+	unsigned long itbits, cond;
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_arm = !(cpsr & PSR_T_BIT);
+
+	BUG_ON(is_arm && (cpsr & PSR_IT_MASK));
+
+	if (!(cpsr & PSR_IT_MASK))
+		return;
+
+	cond = (cpsr & 0xe000) >> 13;
+	itbits = (cpsr & 0x1c00) >> (10 - 2);
+	itbits |= (cpsr & (0x3 << 25)) >> 25;
+
+	/* Perform ITAdvance (see page A-52 in ARM DDI 0406C) */
+	if ((itbits & 0x7) == 0)
+		itbits = cond = 0;
+	else
+		itbits = (itbits << 1) & 0x1f;
+
+	cpsr &= ~PSR_IT_MASK;
+	cpsr |= cond << 13;
+	cpsr |= (itbits & 0x1c) << (10 - 2);
+	cpsr |= (itbits & 0x3) << 25;
+	*vcpu_cpsr(vcpu) = cpsr;
+}
+
+/**
+ * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * @vcpu: The vcpu pointer
+ */
+void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	bool is_thumb;
+
+	is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_T_BIT);
+	if (is_thumb && !is_wide_instr)
+		*vcpu_pc(vcpu) += 2;
+	else
+		*vcpu_pc(vcpu) += 4;
+	kvm_adjust_itstate(vcpu);
+}
+
+
+/******************************************************************************
+ * Inject exceptions into the guest
+ */
+
+static u32 exc_vector_base(struct kvm_vcpu *vcpu)
+{
+	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
+	u32 vbar = vcpu->arch.cp15[c12_VBAR];
+
+	if (sctlr & SCTLR_V)
+		return 0xffff0000;
+	else /* always have security exceptions */
+		return vbar;
+}
+
+/**
+ * kvm_inject_undefined - inject an undefined exception into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ *
+ * Modelled after TakeUndefInstrException() pseudocode.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+	u32 new_lr_value;
+	u32 new_spsr_value;
+	u32 cpsr = *vcpu_cpsr(vcpu);
+	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
+	bool is_thumb = (cpsr & PSR_T_BIT);
+	u32 vect_offset = 4;
+	u32 return_offset = (is_thumb) ? 2 : 4;
+
+	new_spsr_value = cpsr;
+	new_lr_value = *vcpu_pc(vcpu) - return_offset;
+
+	*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | UND_MODE;
+	*vcpu_cpsr(vcpu) |= PSR_I_BIT;
+	*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
+
+	if (sctlr & SCTLR_TE)
+		*vcpu_cpsr(vcpu) |= PSR_T_BIT;
+	if (sctlr & SCTLR_EE)
+		*vcpu_cpsr(vcpu) |= PSR_E_BIT;
+
+	/* Note: These now point to UND banked copies */
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_reg(vcpu, 14) = new_lr_value;
+
+	/* Branch to exception vector */
+	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
+{
+	u32 new_lr_value;
+	u32 new_spsr_value;
+	u32 cpsr = *vcpu_cpsr(vcpu);
+	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
+	bool is_thumb = (cpsr & PSR_T_BIT);
+	u32 vect_offset;
+	u32 return_offset = (is_thumb) ? 4 : 0;
+	bool is_lpae;
+
+	new_spsr_value = cpsr;
+	new_lr_value = *vcpu_pc(vcpu) + return_offset;
+
+	*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | ABT_MODE;
+	*vcpu_cpsr(vcpu) |= PSR_I_BIT | PSR_A_BIT;
+	*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
+
+	if (sctlr & SCTLR_TE)
+		*vcpu_cpsr(vcpu) |= PSR_T_BIT;
+	if (sctlr & SCTLR_EE)
+		*vcpu_cpsr(vcpu) |= PSR_E_BIT;
+
+	/* Note: These now point to ABT banked copies */
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_reg(vcpu, 14) = new_lr_value;
+
+	if (is_pabt)
+		vect_offset = 12;
+	else
+		vect_offset = 16;
+
+	/* Branch to exception vector */
+	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
+
+	if (is_pabt) {
+		/* Set DFAR and DFSR */
+		vcpu->arch.cp15[c6_IFAR] = addr;
+		is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
+		/* Always give debug fault for now - should give guest a clue */
+		if (is_lpae)
+			vcpu->arch.cp15[c5_IFSR] = 1 << 9 | 0x22;
+		else
+			vcpu->arch.cp15[c5_IFSR] = 2;
+	} else { /* !iabt */
+		/* Set DFAR and DFSR */
+		vcpu->arch.cp15[c6_DFAR] = addr;
+		is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
+		/* Always give debug fault for now - should give guest a clue */
+		if (is_lpae)
+			vcpu->arch.cp15[c5_DFSR] = 1 << 9 | 0x22;
+		else
+			vcpu->arch.cp15[c5_DFSR] = 2;
+	}
+
+}
+
+/**
+ * kvm_inject_dabt - inject a data abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	inject_abt(vcpu, false, addr);
+}
+
+/**
+ * kvm_inject_pabt - inject a prefetch abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	inject_abt(vcpu, true, addr);
+}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 105d1f79909a..022305b38c27 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -64,6 +64,51 @@ TRACE_EVENT(kvm_irq_line,
 		  __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
 );
 
+/* Architecturally implementation defined CP15 register access */
+TRACE_EVENT(kvm_emulate_cp15_imp,
+	TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
+		 unsigned long CRm, unsigned long Op2, bool is_write),
+	TP_ARGS(Op1, Rt1, CRn, CRm, Op2, is_write),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	Op1		)
+		__field(	unsigned int,	Rt1		)
+		__field(	unsigned int,	CRn		)
+		__field(	unsigned int,	CRm		)
+		__field(	unsigned int,	Op2		)
+		__field(	bool,		is_write	)
+	),
+
+	TP_fast_assign(
+		__entry->is_write		= is_write;
+		__entry->Op1			= Op1;
+		__entry->Rt1			= Rt1;
+		__entry->CRn			= CRn;
+		__entry->CRm			= CRm;
+		__entry->Op2			= Op2;
+	),
+
+	TP_printk("Implementation defined CP15: %s\tp15, %u, r%u, c%u, c%u, %u",
+			(__entry->is_write) ? "mcr" : "mrc",
+			__entry->Op1, __entry->Rt1, __entry->CRn,
+			__entry->CRm, __entry->Op2)
+);
+
+TRACE_EVENT(kvm_wfi,
+	TP_PROTO(unsigned long vcpu_pc),
+	TP_ARGS(vcpu_pc),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_pc		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc		= vcpu_pc;
+	),
+
+	TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc)
+);
+
 TRACE_EVENT(kvm_unmap_hva,
 	TP_PROTO(unsigned long hva),
 	TP_ARGS(hva),
@@ -112,6 +157,26 @@ TRACE_EVENT(kvm_set_spte_hva,
 	TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
 );
 
+TRACE_EVENT(kvm_hvc,
+	TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
+	TP_ARGS(vcpu_pc, r0, imm),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_pc		)
+		__field(	unsigned long,	r0		)
+		__field(	unsigned long,	imm		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc		= vcpu_pc;
+		__entry->r0		= r0;
+		__entry->imm		= imm;
+	),
+
+	TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx",
+		  __entry->vcpu_pc, __entry->r0, __entry->imm)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From 45e96ea6b369539a37040a8df9c59a39f073d9d6 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Sun, 20 Jan 2013 18:43:58 -0500
Subject: KVM: ARM: Handle I/O aborts

When the guest accesses I/O memory this will create data abort
exceptions and they are handled by decoding the HSR information
(physical address, read/write, length, register) and forwarding reads
and writes to QEMU which performs the device emulation.

Certain classes of load/store operations do not support the syndrome
information provided in the HSR.  We don't support decoding these (patches
are available elsewhere), so we report an error to user space in this case.

This requires changing the general flow somewhat since new calls to run
the VCPU must check if there's a pending MMIO load and perform the write
after userspace has made the data available.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_arm.h     |   3 +
 arch/arm/include/asm/kvm_emulate.h |   6 ++
 arch/arm/include/asm/kvm_host.h    |   4 +
 arch/arm/include/asm/kvm_mmio.h    |  56 ++++++++++++++
 arch/arm/kvm/Makefile              |   2 +-
 arch/arm/kvm/arm.c                 |   6 ++
 arch/arm/kvm/mmio.c                | 153 +++++++++++++++++++++++++++++++++++++
 arch/arm/kvm/mmu.c                 |   7 +-
 arch/arm/kvm/trace.h               |  21 +++++
 9 files changed, 255 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm_mmio.h
 create mode 100644 arch/arm/kvm/mmio.c

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 9a34c20d41ec..7c3d813e15df 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -173,8 +173,11 @@
 #define HSR_ISS		(HSR_IL - 1)
 #define HSR_ISV_SHIFT	(24)
 #define HSR_ISV		(1U << HSR_ISV_SHIFT)
+#define HSR_SRT_SHIFT	(16)
+#define HSR_SRT_MASK	(0xf << HSR_SRT_SHIFT)
 #define HSR_FSC		(0x3f)
 #define HSR_FSC_TYPE	(0x3c)
+#define HSR_SSE		(1 << 21)
 #define HSR_WNR		(1 << 6)
 #define HSR_CV_SHIFT	(24)
 #define HSR_CV		(1U << HSR_CV_SHIFT)
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 01a755b80632..4c1a073280be 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -21,6 +21,7 @@
 
 #include <linux/kvm_host.h>
 #include <asm/kvm_asm.h>
+#include <asm/kvm_mmio.h>
 
 u32 *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
 u32 *vcpu_spsr(struct kvm_vcpu *vcpu);
@@ -53,4 +54,9 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
 	return cpsr_mode > USR_MODE;;
 }
 
+static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
+{
+	return reg == 15;
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index ed79043d7921..e65fc967a71d 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
 
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
+#include <asm/kvm_mmio.h>
 #include <asm/fpstate.h>
 
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
@@ -99,6 +100,9 @@ struct kvm_vcpu_arch {
 	int last_pcpu;
 	cpumask_t require_dcache_flush;
 
+	/* IO related fields */
+	struct kvm_decode mmio_decode;
+
 	/* Interrupt related fields */
 	u32 irq_lines;		/* IRQ and FIQ levels */
 
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
new file mode 100644
index 000000000000..adcc0d7d3175
--- /dev/null
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_MMIO_H__
+#define __ARM_KVM_MMIO_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+struct kvm_decode {
+	unsigned long rt;
+	bool sign_extend;
+};
+
+/*
+ * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
+ * which is an anonymous type. Use our own type instead.
+ */
+struct kvm_exit_mmio {
+	phys_addr_t	phys_addr;
+	u8		data[8];
+	u32		len;
+	bool		is_write;
+};
+
+static inline void kvm_prepare_mmio(struct kvm_run *run,
+				    struct kvm_exit_mmio *mmio)
+{
+	run->mmio.phys_addr	= mmio->phys_addr;
+	run->mmio.len		= mmio->len;
+	run->mmio.is_write	= mmio->is_write;
+	memcpy(run->mmio.data, mmio->data, mmio->len);
+	run->exit_reason	= KVM_EXIT_MMIO;
+}
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		 phys_addr_t fault_ipa);
+
+#endif	/* __ARM_KVM_MMIO_H__ */
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 88edce6c97d4..1e45cd97a7fc 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -18,4 +18,4 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o
+obj-y += coproc.o coproc_a15.o mmio.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index be06c5de51e3..8680b9ffd2ae 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -616,6 +616,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	if (ret)
 		return ret;
 
+	if (run->exit_reason == KVM_EXIT_MMIO) {
+		ret = kvm_handle_mmio_return(vcpu, vcpu->run);
+		if (ret)
+			return ret;
+	}
+
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
new file mode 100644
index 000000000000..0144baf82904
--- /dev/null
+++ b/arch/arm/kvm/mmio.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmio.h>
+#include <asm/kvm_emulate.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+/**
+ * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
+ * @vcpu: The VCPU pointer
+ * @run:  The VCPU run struct containing the mmio data
+ *
+ * This should only be called after returning from userspace for MMIO load
+ * emulation.
+ */
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	__u32 *dest;
+	unsigned int len;
+	int mask;
+
+	if (!run->mmio.is_write) {
+		dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
+		memset(dest, 0, sizeof(int));
+
+		len = run->mmio.len;
+		if (len > 4)
+			return -EINVAL;
+
+		memcpy(dest, run->mmio.data, len);
+
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+				*((u64 *)run->mmio.data));
+
+		if (vcpu->arch.mmio_decode.sign_extend && len < 4) {
+			mask = 1U << ((len * 8) - 1);
+			*dest = (*dest ^ mask) - mask;
+		}
+	}
+
+	return 0;
+}
+
+static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+		      struct kvm_exit_mmio *mmio)
+{
+	unsigned long rt, len;
+	bool is_write, sign_extend;
+
+	if ((vcpu->arch.hsr >> 8) & 1) {
+		/* cache operation on I/O addr, tell guest unsupported */
+		kvm_inject_dabt(vcpu, vcpu->arch.hxfar);
+		return 1;
+	}
+
+	if ((vcpu->arch.hsr >> 7) & 1) {
+		/* page table accesses IO mem: tell guest to fix its TTBR */
+		kvm_inject_dabt(vcpu, vcpu->arch.hxfar);
+		return 1;
+	}
+
+	switch ((vcpu->arch.hsr >> 22) & 0x3) {
+	case 0:
+		len = 1;
+		break;
+	case 1:
+		len = 2;
+		break;
+	case 2:
+		len = 4;
+		break;
+	default:
+		kvm_err("Hardware is weird: SAS 0b11 is reserved\n");
+		return -EFAULT;
+	}
+
+	is_write = vcpu->arch.hsr & HSR_WNR;
+	sign_extend = vcpu->arch.hsr & HSR_SSE;
+	rt = (vcpu->arch.hsr & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
+
+	if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
+		/* IO memory trying to read/write pc */
+		kvm_inject_pabt(vcpu, vcpu->arch.hxfar);
+		return 1;
+	}
+
+	mmio->is_write = is_write;
+	mmio->phys_addr = fault_ipa;
+	mmio->len = len;
+	vcpu->arch.mmio_decode.sign_extend = sign_extend;
+	vcpu->arch.mmio_decode.rt = rt;
+
+	/*
+	 * The MMIO instruction is emulated and should not be re-executed
+	 * in the guest.
+	 */
+	kvm_skip_instr(vcpu, (vcpu->arch.hsr >> 25) & 1);
+	return 0;
+}
+
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		 phys_addr_t fault_ipa)
+{
+	struct kvm_exit_mmio mmio;
+	unsigned long rt;
+	int ret;
+
+	/*
+	 * Prepare MMIO operation. First stash it in a private
+	 * structure that we can use for in-kernel emulation. If the
+	 * kernel can't handle it, copy it into run->mmio and let user
+	 * space do its magic.
+	 */
+
+	if (vcpu->arch.hsr & HSR_ISV) {
+		ret = decode_hsr(vcpu, fault_ipa, &mmio);
+		if (ret)
+			return ret;
+	} else {
+		kvm_err("load/store instruction decoding not implemented\n");
+		return -ENOSYS;
+	}
+
+	rt = vcpu->arch.mmio_decode.rt;
+	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
+					 KVM_TRACE_MMIO_READ_UNSATISFIED,
+			mmio.len, fault_ipa,
+			(mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0);
+
+	if (mmio.is_write)
+		memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len);
+
+	kvm_prepare_mmio(run, &mmio);
+	return 0;
+}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index a4b7b0f900e5..f30e13163a96 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,11 +19,13 @@
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
 #include <linux/io.h>
+#include <trace/events/kvm.h>
 #include <asm/idmap.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_mmio.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/mach/map.h>
@@ -624,8 +626,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			goto out_unlock;
 		}
 
-		kvm_pr_unimpl("I/O address abort...");
-		ret = 0;
+		/* Adjust page offset */
+		fault_ipa |= vcpu->arch.hxfar & ~PAGE_MASK;
+		ret = io_mem_abort(vcpu, run, fault_ipa);
 		goto out_unlock;
 	}
 
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 624b5a4e8fad..a8e73ed5ad5b 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -90,6 +90,27 @@ TRACE_EVENT(kvm_irq_line,
 		  __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
 );
 
+TRACE_EVENT(kvm_mmio_emulate,
+	TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
+		 unsigned long cpsr),
+	TP_ARGS(vcpu_pc, instr, cpsr),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_pc		)
+		__field(	unsigned long,	instr		)
+		__field(	unsigned long,	cpsr		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc		= vcpu_pc;
+		__entry->instr			= instr;
+		__entry->cpsr			= cpsr;
+	),
+
+	TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
+		  __entry->vcpu_pc, __entry->instr, __entry->cpsr)
+);
+
 /* Architecturally implementation defined CP15 register access */
 TRACE_EVENT(kvm_emulate_cp15_imp,
 	TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
-- 
cgit v1.2.3


From aa024c2f35a07cc32e48c5f62a5807be01c09249 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sun, 20 Jan 2013 18:28:13 -0500
Subject: KVM: ARM: Power State Coordination Interface implementation

Implement the PSCI specification (ARM DEN 0022A) to control
virtual CPUs being "powered" on or off.

PSCI/KVM is detected using the KVM_CAP_ARM_PSCI capability.

A virtual CPU can now be initialized in a "powered off" state,
using the KVM_ARM_VCPU_POWER_OFF feature flag.

The guest can use either SMC or HVC to execute a PSCI function.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 Documentation/virtual/kvm/api.txt  |   4 ++
 arch/arm/include/asm/kvm_emulate.h |  10 ++++
 arch/arm/include/asm/kvm_host.h    |   5 +-
 arch/arm/include/asm/kvm_psci.h    |  23 ++++++++
 arch/arm/include/uapi/asm/kvm.h    |  16 ++++++
 arch/arm/kvm/Makefile              |   2 +-
 arch/arm/kvm/arm.c                 |  30 ++++++++++-
 arch/arm/kvm/psci.c                | 108 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h           |   1 +
 9 files changed, 195 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm_psci.h
 create mode 100644 arch/arm/kvm/psci.c

(limited to 'arch/arm/kvm/arm.c')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 38066a7a74e1..c25439a58274 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2185,6 +2185,10 @@ return ENOEXEC for that vcpu.
 Note that because some registers reflect machine topology, all vcpus
 should be created before this ioctl is invoked.
 
+Possible features:
+	- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
+	  Depends on KVM_CAP_ARM_PSCI.
+
 
 4.78 KVM_GET_REG_LIST
 
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 4c1a073280be..fd611996bfb5 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -32,6 +32,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
+{
+	return 1;
+}
+
 static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu)
 {
 	return (u32 *)&vcpu->arch.regs.usr_regs.ARM_pc;
@@ -42,6 +47,11 @@ static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu)
 	return (u32 *)&vcpu->arch.regs.usr_regs.ARM_cpsr;
 }
 
+static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
+{
+	*vcpu_cpsr(vcpu) |= PSR_T_BIT;
+}
+
 static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
 {
 	unsigned long cpsr_mode = vcpu->arch.regs.usr_regs.ARM_cpsr & MODE_MASK;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index e65fc967a71d..98b4d1a72923 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -30,7 +30,7 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 
-#define KVM_VCPU_MAX_FEATURES 0
+#define KVM_VCPU_MAX_FEATURES 1
 
 /* We don't currently support large pages. */
 #define KVM_HPAGE_GFN_SHIFT(x)	0
@@ -100,6 +100,9 @@ struct kvm_vcpu_arch {
 	int last_pcpu;
 	cpumask_t require_dcache_flush;
 
+	/* Don't run the guest on this vcpu */
+	bool pause;
+
 	/* IO related fields */
 	struct kvm_decode mmio_decode;
 
diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
new file mode 100644
index 000000000000..9a83d98bf170
--- /dev/null
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_PSCI_H__
+#define __ARM_KVM_PSCI_H__
+
+bool kvm_psci_call(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index bbb6b2328004..3303ff5adbf3 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -65,6 +65,8 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_CORTEX_A15	0
 #define KVM_ARM_NUM_TARGETS		1
 
+#define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+
 struct kvm_vcpu_init {
 	__u32 target;
 	__u32 features[7];
@@ -145,4 +147,18 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX		127
 
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE		0x95c1ba5e
+#define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND		KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS		0
+#define KVM_PSCI_RET_NI			((unsigned long)-1)
+#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
+#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+
 #endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 1e45cd97a7fc..ea27987bd07f 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -18,4 +18,4 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o
+obj-y += coproc.o coproc_a15.o mmio.o psci.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 8680b9ffd2ae..2d30e3afdaf9 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -43,6 +43,7 @@
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_psci.h>
 #include <asm/opcodes.h>
 
 #ifdef REQUIRES_VIRT
@@ -160,6 +161,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 	case KVM_CAP_ONE_REG:
+	case KVM_CAP_ARM_PSCI:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -443,14 +445,18 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 		      vcpu->arch.hsr & HSR_HVC_IMM_MASK);
 
+	if (kvm_psci_call(vcpu))
+		return 1;
+
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	/* We don't support SMC; don't do that. */
-	kvm_debug("smc: at %08x", *vcpu_pc(vcpu));
+	if (kvm_psci_call(vcpu))
+		return 1;
+
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
@@ -589,9 +595,26 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 		return 0;
 
 	vcpu->arch.has_run_once = true;
+
+	/*
+	 * Handle the "start in power-off" case by calling into the
+	 * PSCI code.
+	 */
+	if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
+		*vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
+		kvm_psci_call(vcpu);
+	}
+
 	return 0;
 }
 
+static void vcpu_pause(struct kvm_vcpu *vcpu)
+{
+	wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
+
+	wait_event_interruptible(*wq, !vcpu->arch.pause);
+}
+
 /**
  * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
  * @vcpu:	The VCPU pointer
@@ -635,6 +658,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 		update_vttbr(vcpu->kvm);
 
+		if (vcpu->arch.pause)
+			vcpu_pause(vcpu);
+
 		local_irq_disable();
 
 		/*
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
new file mode 100644
index 000000000000..7ee5bb7a3667
--- /dev/null
+++ b/arch/arm/kvm/psci.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2012 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/wait.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_psci.h>
+
+/*
+ * This is an implementation of the Power State Coordination Interface
+ * as described in ARM document number ARM DEN 0022A.
+ */
+
+static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pause = true;
+}
+
+static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
+{
+	struct kvm *kvm = source_vcpu->kvm;
+	struct kvm_vcpu *vcpu;
+	wait_queue_head_t *wq;
+	unsigned long cpu_id;
+	phys_addr_t target_pc;
+
+	cpu_id = *vcpu_reg(source_vcpu, 1);
+	if (vcpu_mode_is_32bit(source_vcpu))
+		cpu_id &= ~((u32) 0);
+
+	if (cpu_id >= atomic_read(&kvm->online_vcpus))
+		return KVM_PSCI_RET_INVAL;
+
+	target_pc = *vcpu_reg(source_vcpu, 2);
+
+	vcpu = kvm_get_vcpu(kvm, cpu_id);
+
+	wq = kvm_arch_vcpu_wq(vcpu);
+	if (!waitqueue_active(wq))
+		return KVM_PSCI_RET_INVAL;
+
+	kvm_reset_vcpu(vcpu);
+
+	/* Gracefully handle Thumb2 entry point */
+	if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
+		target_pc &= ~((phys_addr_t) 1);
+		vcpu_set_thumb(vcpu);
+	}
+
+	*vcpu_pc(vcpu) = target_pc;
+	vcpu->arch.pause = false;
+	smp_mb();		/* Make sure the above is visible */
+
+	wake_up_interruptible(wq);
+
+	return KVM_PSCI_RET_SUCCESS;
+}
+
+/**
+ * kvm_psci_call - handle PSCI call if r0 value is in range
+ * @vcpu: Pointer to the VCPU struct
+ *
+ * Handle PSCI calls from guests through traps from HVC or SMC instructions.
+ * The calling convention is similar to SMC calls to the secure world where
+ * the function number is placed in r0 and this function returns true if the
+ * function number specified in r0 is withing the PSCI range, and false
+ * otherwise.
+ */
+bool kvm_psci_call(struct kvm_vcpu *vcpu)
+{
+	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+
+	switch (psci_fn) {
+	case KVM_PSCI_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = KVM_PSCI_RET_SUCCESS;
+		break;
+	case KVM_PSCI_FN_CPU_ON:
+		val = kvm_psci_vcpu_on(vcpu);
+		break;
+	case KVM_PSCI_FN_CPU_SUSPEND:
+	case KVM_PSCI_FN_MIGRATE:
+		val = KVM_PSCI_RET_NI;
+		break;
+
+	default:
+		return false;
+	}
+
+	*vcpu_reg(vcpu, 0) = val;
+	return true;
+}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index dc63665e73ad..7f2360a46fc2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -636,6 +636,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IRQFD_RESAMPLE 82
 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 #define KVM_CAP_PPC_HTAB_FD 84
+#define KVM_CAP_ARM_PSCI 87
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 3401d54696f992edf036f00f46c8c399d1b75c2a Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Wed, 23 Jan 2013 13:18:04 -0500
Subject: KVM: ARM: Introduce KVM_ARM_SET_DEVICE_ADDR ioctl

On ARM some bits are specific to the model being emulated for the guest and
user space needs a way to tell the kernel about those bits.  An example is mmio
device base addresses, where KVM must know the base address for a given device
to properly emulate mmio accesses within a certain address range or directly
map a device with virtualiation extensions into the guest address space.

We make this API ARM-specific as we haven't yet reached a consensus for a
generic API for all KVM architectures that will allow us to do something like
this.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/virtual/kvm/api.txt | 37 +++++++++++++++++++++++++++++++++++++
 arch/arm/include/uapi/asm/kvm.h   | 13 +++++++++++++
 arch/arm/kvm/arm.c                | 23 ++++++++++++++++++++++-
 include/uapi/linux/kvm.h          |  8 ++++++++
 4 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index c25439a58274..4505f869e450 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2210,6 +2210,43 @@ This ioctl returns the guest registers that are supported for the
 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 
 
+4.80 KVM_ARM_SET_DEVICE_ADDR
+
+Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
+Architectures: arm
+Type: vm ioctl
+Parameters: struct kvm_arm_device_address (in)
+Returns: 0 on success, -1 on error
+Errors:
+  ENODEV: The device id is unknown
+  ENXIO:  Device not supported on current system
+  EEXIST: Address already set
+  E2BIG:  Address outside guest physical address space
+
+struct kvm_arm_device_addr {
+	__u64 id;
+	__u64 addr;
+};
+
+Specify a device address in the guest's physical address space where guests
+can access emulated or directly exposed devices, which the host kernel needs
+to know about. The id field is an architecture specific identifier for a
+specific device.
+
+ARM divides the id field into two parts, a device id and an address type id
+specific to the individual device.
+
+  bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
+  field: |        0x00000000      |     device id   |  addr type id  |
+
+ARM currently only require this when using the in-kernel GIC support for the
+hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id.  When
+setting the base address for the guest's mapping of the VGIC virtual CPU
+and distributor interface, the ioctl must be called after calling
+KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
+this ioctl twice for any of the base addresses will return -EEXIST.
+
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 3303ff5adbf3..346ac3f4a2b8 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -65,6 +65,19 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_CORTEX_A15	0
 #define KVM_ARM_NUM_TARGETS		1
 
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT	0
+#define KVM_ARM_DEVICE_TYPE_MASK	(0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT		16
+#define KVM_ARM_DEVICE_ID_MASK		(0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2		0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST	0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU	1
+
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 
 struct kvm_vcpu_init {
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2d30e3afdaf9..523f77a44e44 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -167,6 +167,8 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
+	case KVM_CAP_ARM_SET_DEVICE_ADDR:
+		r = 1;
 	case KVM_CAP_NR_VCPUS:
 		r = num_online_cpus();
 		break;
@@ -827,10 +829,29 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return -EINVAL;
 }
 
+static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
+					struct kvm_arm_device_addr *dev_addr)
+{
+	return -ENODEV;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
-	return -EINVAL;
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+
+	switch (ioctl) {
+	case KVM_ARM_SET_DEVICE_ADDR: {
+		struct kvm_arm_device_addr dev_addr;
+
+		if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
+			return -EFAULT;
+		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
+	}
+	default:
+		return -EINVAL;
+	}
 }
 
 static void cpu_init_hyp_mode(void *vector)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7f2360a46fc2..c70577cf67bc 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -637,6 +637,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 #define KVM_CAP_PPC_HTAB_FD 84
 #define KVM_CAP_ARM_PSCI 87
+#define KVM_CAP_ARM_SET_DEVICE_ADDR 88
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -784,6 +785,11 @@ struct kvm_msi {
 	__u8  pad[16];
 };
 
+struct kvm_arm_device_addr {
+	__u64 id;
+	__u64 addr;
+};
+
 /*
  * ioctls for VM fds
  */
@@ -869,6 +875,8 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
 /* Available with KVM_CAP_PPC_HTAB_FD */
 #define KVM_PPC_GET_HTAB_FD	  _IOW(KVMIO,  0xaa, struct kvm_get_htab_fd)
+/* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */
+#define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.2.3


From 1638a12d4ee519ed397f4b9e6c088faed155d164 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 21 Jan 2013 19:36:11 -0500
Subject: ARM: KVM: Keep track of currently running vcpus

When an interrupt occurs for the guest, it is sometimes necessary
to find out which vcpu was running at that point.

Keep track of which vcpu is being run in kvm_arch_vcpu_ioctl_run(),
and allow the data to be retrieved using either:
- kvm_arm_get_running_vcpu(): returns the vcpu running at this point
  on the current CPU. Can only be used in a non-preemptible context.
- kvm_arm_get_running_vcpus(): returns the per-CPU variable holding
  the running vcpus, usable for per-CPU interrupts.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_host.h | 10 ++++++++++
 arch/arm/kvm/arm.c              | 30 ++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 98b4d1a72923..fc161de21ea2 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -158,4 +158,14 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 {
 	return 0;
 }
+
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
+struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+
+int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
+struct kvm_one_reg;
+int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 523f77a44e44..0cf144f95945 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -54,11 +54,38 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
 static struct vfp_hard_struct __percpu *kvm_host_vfp_state;
 static unsigned long hyp_default_vectors;
 
+/* Per-CPU variable containing the currently running vcpu. */
+static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
+
 /* The VMID used in the VTTBR */
 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(preemptible());
+	__get_cpu_var(kvm_arm_running_vcpu) = vcpu;
+}
+
+/**
+ * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
+ * Must be called from non-preemptible context
+ */
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
+{
+	BUG_ON(preemptible());
+	return __get_cpu_var(kvm_arm_running_vcpu);
+}
+
+/**
+ * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
+ */
+struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+{
+	return &kvm_arm_running_vcpu;
+}
+
 int kvm_arch_hardware_enable(void *garbage)
 {
 	return 0;
@@ -310,10 +337,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	 */
 	if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
 		flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
+
+	kvm_arm_set_running_vcpu(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	kvm_arm_set_running_vcpu(NULL);
 }
 
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-- 
cgit v1.2.3


From 1a89dd9113badd7487313410a5f2e09b2944f92b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 21 Jan 2013 19:36:12 -0500
Subject: ARM: KVM: Initial VGIC infrastructure code

Wire the basic framework code for VGIC support and the initial in-kernel
MMIO support code for the VGIC, used for the distributor emulation.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_host.h |   8 +++
 arch/arm/include/asm/kvm_vgic.h |  80 +++++++++++++++++++++
 arch/arm/kvm/Makefile           |   1 +
 arch/arm/kvm/arm.c              |  27 ++++++-
 arch/arm/kvm/interrupts.S       |   4 ++
 arch/arm/kvm/mmio.c             |   3 +
 arch/arm/kvm/vgic.c             | 153 ++++++++++++++++++++++++++++++++++++++++
 7 files changed, 275 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm/include/asm/kvm_vgic.h
 create mode 100644 arch/arm/kvm/vgic.c

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index fc161de21ea2..6791c888b9f4 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -37,6 +37,8 @@
 #define KVM_NR_PAGE_SIZES	1
 #define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
 
+#include <asm/kvm_vgic.h>
+
 struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int kvm_target_cpu(void);
@@ -58,6 +60,9 @@ struct kvm_arch {
 
 	/* Stage-2 page table */
 	pgd_t *pgd;
+
+	/* Interrupt controller */
+	struct vgic_dist	vgic;
 };
 
 #define KVM_NR_MEM_OBJS     40
@@ -92,6 +97,9 @@ struct kvm_vcpu_arch {
 	struct vfp_hard_struct vfp_guest;
 	struct vfp_hard_struct *vfp_host;
 
+	/* VGIC state */
+	struct vgic_cpu vgic_cpu;
+
 	/*
 	 * Anything that is not used directly from assembly code goes
 	 * here.
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
new file mode 100644
index 000000000000..8f44799b8db1
--- /dev/null
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARM_KVM_VGIC_H
+#define __ASM_ARM_KVM_VGIC_H
+
+#include <linux/irqchip/arm-gic.h>
+
+struct vgic_dist {
+};
+
+struct vgic_cpu {
+};
+
+struct kvm;
+struct kvm_vcpu;
+struct kvm_run;
+struct kvm_exit_mmio;
+
+#ifdef CONFIG_KVM_ARM_VGIC
+bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		      struct kvm_exit_mmio *mmio);
+
+#else
+static inline int kvm_vgic_hyp_init(void)
+{
+	return 0;
+}
+
+static inline int kvm_vgic_init(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline int kvm_vgic_create(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
+
+static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+				    struct kvm_exit_mmio *mmio)
+{
+	return false;
+}
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return 0;
+}
+#endif
+
+#endif
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index ea27987bd07f..dece8edc2e4a 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -19,3 +19,4 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o mmio.o psci.o
+obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 0cf144f95945..7305aef28d0e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -62,6 +62,8 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static bool vgic_present;
+
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(preemptible());
@@ -184,6 +186,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 {
 	int r;
 	switch (ext) {
+	case KVM_CAP_IRQCHIP:
+		r = vgic_present;
+		break;
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
@@ -315,8 +320,16 @@ int __attribute_const__ kvm_target_cpu(void)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
+
+	/* Set up VGIC */
+	ret = kvm_vgic_vcpu_init(vcpu);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -374,7 +387,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
  */
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-	return !!v->arch.irq_lines;
+	return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v);
 }
 
 /* Just ensure a guest exit from a particular CPU */
@@ -693,6 +706,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		if (vcpu->arch.pause)
 			vcpu_pause(vcpu);
 
+		kvm_vgic_flush_hwstate(vcpu);
+
 		local_irq_disable();
 
 		/*
@@ -705,6 +720,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
 			local_irq_enable();
+			kvm_vgic_sync_hwstate(vcpu);
 			continue;
 		}
 
@@ -737,6 +753,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * Back from guest
 		 *************************************************************/
 
+		kvm_vgic_sync_hwstate(vcpu);
+
 		ret = handle_exit(vcpu, run, ret);
 	}
 
@@ -1011,6 +1029,13 @@ static int init_hyp_mode(void)
 		}
 	}
 
+	/*
+	 * Init HYP view of VGIC
+	 */
+	err = kvm_vgic_hyp_init();
+	if (err)
+		goto out_free_vfp;
+
 	kvm_info("Hyp mode initialized successfully\n");
 	return 0;
 out_free_vfp:
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index c5400d2e97ca..5f113bedfaee 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -94,6 +94,8 @@ ENTRY(__kvm_vcpu_run)
 
 	save_host_regs
 
+	restore_vgic_state
+
 	@ Store hardware CP15 state and load guest state
 	read_cp15_state store_to_vcpu = 0
 	write_cp15_state read_from_vcpu = 1
@@ -187,6 +189,8 @@ after_vfp_restore:
 	read_cp15_state store_to_vcpu = 1
 	write_cp15_state read_from_vcpu = 0
 
+	save_vgic_state
+
 	restore_host_regs
 	clrex				@ Clear exclusive monitor
 	mov	r0, r1			@ Return the return code
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 0144baf82904..98a870ff1a5c 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -148,6 +148,9 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	if (mmio.is_write)
 		memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len);
 
+	if (vgic_handle_mmio(vcpu, run, &mmio))
+		return 1;
+
 	kvm_prepare_mmio(run, &mmio);
 	return 0;
 }
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
new file mode 100644
index 000000000000..c400661409ab
--- /dev/null
+++ b/arch/arm/kvm/vgic.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <asm/kvm_emulate.h>
+
+#define ACCESS_READ_VALUE	(1 << 0)
+#define ACCESS_READ_RAZ		(0 << 0)
+#define ACCESS_READ_MASK(x)	((x) & (1 << 0))
+#define ACCESS_WRITE_IGNORED	(0 << 1)
+#define ACCESS_WRITE_SETBIT	(1 << 1)
+#define ACCESS_WRITE_CLEARBIT	(2 << 1)
+#define ACCESS_WRITE_VALUE	(3 << 1)
+#define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
+
+static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
+{
+	return *((u32 *)mmio->data) & mask;
+}
+
+static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
+{
+	*((u32 *)mmio->data) = value & mask;
+}
+
+/**
+ * vgic_reg_access - access vgic register
+ * @mmio:   pointer to the data describing the mmio access
+ * @reg:    pointer to the virtual backing of vgic distributor data
+ * @offset: least significant 2 bits used for word offset
+ * @mode:   ACCESS_ mode (see defines above)
+ *
+ * Helper to make vgic register access easier using one of the access
+ * modes defined for vgic register access
+ * (read,raz,write-ignored,setbit,clearbit,write)
+ */
+static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
+			    phys_addr_t offset, int mode)
+{
+	int word_offset = (offset & 3) * 8;
+	u32 mask = (1UL << (mmio->len * 8)) - 1;
+	u32 regval;
+
+	/*
+	 * Any alignment fault should have been delivered to the guest
+	 * directly (ARM ARM B3.12.7 "Prioritization of aborts").
+	 */
+
+	if (reg) {
+		regval = *reg;
+	} else {
+		BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
+		regval = 0;
+	}
+
+	if (mmio->is_write) {
+		u32 data = mmio_data_read(mmio, mask) << word_offset;
+		switch (ACCESS_WRITE_MASK(mode)) {
+		case ACCESS_WRITE_IGNORED:
+			return;
+
+		case ACCESS_WRITE_SETBIT:
+			regval |= data;
+			break;
+
+		case ACCESS_WRITE_CLEARBIT:
+			regval &= ~data;
+			break;
+
+		case ACCESS_WRITE_VALUE:
+			regval = (regval & ~(mask << word_offset)) | data;
+			break;
+		}
+		*reg = regval;
+	} else {
+		switch (ACCESS_READ_MASK(mode)) {
+		case ACCESS_READ_RAZ:
+			regval = 0;
+			/* fall through */
+
+		case ACCESS_READ_VALUE:
+			mmio_data_write(mmio, mask, regval >> word_offset);
+		}
+	}
+}
+
+/*
+ * I would have liked to use the kvm_bus_io_*() API instead, but it
+ * cannot cope with banked registers (only the VM pointer is passed
+ * around, and we need the vcpu). One of these days, someone please
+ * fix it!
+ */
+struct mmio_range {
+	phys_addr_t base;
+	unsigned long len;
+	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
+			    phys_addr_t offset);
+};
+
+static const struct mmio_range vgic_ranges[] = {
+	{}
+};
+
+static const
+struct mmio_range *find_matching_range(const struct mmio_range *ranges,
+				       struct kvm_exit_mmio *mmio,
+				       phys_addr_t base)
+{
+	const struct mmio_range *r = ranges;
+	phys_addr_t addr = mmio->phys_addr - base;
+
+	while (r->len) {
+		if (addr >= r->base &&
+		    (addr + mmio->len) <= (r->base + r->len))
+			return r;
+		r++;
+	}
+
+	return NULL;
+}
+
+/**
+ * vgic_handle_mmio - handle an in-kernel MMIO access
+ * @vcpu:	pointer to the vcpu performing the access
+ * @run:	pointer to the kvm_run structure
+ * @mmio:	pointer to the data describing the access
+ *
+ * returns true if the MMIO access has been performed in kernel space,
+ * and false if it needs to be emulated in user space.
+ */
+bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		      struct kvm_exit_mmio *mmio)
+{
+	return KVM_EXIT_MMIO;
+}
-- 
cgit v1.2.3


From 330690cdceba06b60afcfe50a65f72fab7f4f970 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Mon, 21 Jan 2013 19:36:13 -0500
Subject: ARM: KVM: VGIC accept vcpu and dist base addresses from user space

User space defines the model to emulate to a guest and should therefore
decide which addresses are used for both the virtual CPU interface
directly mapped in the guest physical address space and for the emulated
distributor interface, which is mapped in software by the in-kernel VGIC
support.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/virtual/kvm/api.txt |  1 +
 arch/arm/include/asm/kvm_vgic.h   |  9 ++++++
 arch/arm/include/uapi/asm/kvm.h   |  3 ++
 arch/arm/kvm/arm.c                | 16 +++++++++-
 arch/arm/kvm/vgic.c               | 62 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4505f869e450..e0fa0ea2b187 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2222,6 +2222,7 @@ Errors:
   ENXIO:  Device not supported on current system
   EEXIST: Address already set
   E2BIG:  Address outside guest physical address space
+  EBUSY:  Address overlaps with other device range
 
 struct kvm_arm_device_addr {
 	__u64 id;
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index 8f44799b8db1..b56fcf3c3575 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -22,6 +22,9 @@
 #include <linux/irqchip/arm-gic.h>
 
 struct vgic_dist {
+	/* Distributor and vcpu interface mapping in the guest */
+	phys_addr_t		vgic_dist_base;
+	phys_addr_t		vgic_cpu_base;
 };
 
 struct vgic_cpu {
@@ -33,6 +36,7 @@ struct kvm_run;
 struct kvm_exit_mmio;
 
 #ifdef CONFIG_KVM_ARM_VGIC
+int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		      struct kvm_exit_mmio *mmio);
 
@@ -42,6 +46,11 @@ static inline int kvm_vgic_hyp_init(void)
 	return 0;
 }
 
+static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
+{
+	return 0;
+}
+
 static inline int kvm_vgic_init(struct kvm *kvm)
 {
 	return 0;
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 346ac3f4a2b8..023bfeb367bf 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -78,6 +78,9 @@ struct kvm_regs {
 #define KVM_VGIC_V2_ADDR_TYPE_DIST	0
 #define KVM_VGIC_V2_ADDR_TYPE_CPU	1
 
+#define KVM_VGIC_V2_DIST_SIZE		0x1000
+#define KVM_VGIC_V2_CPU_SIZE		0x2000
+
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 
 struct kvm_vcpu_init {
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 7305aef28d0e..c327fd9d8ec3 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -880,7 +880,21 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 					struct kvm_arm_device_addr *dev_addr)
 {
-	return -ENODEV;
+	unsigned long dev_id, type;
+
+	dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
+		KVM_ARM_DEVICE_ID_SHIFT;
+	type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
+		KVM_ARM_DEVICE_TYPE_SHIFT;
+
+	switch (dev_id) {
+	case KVM_ARM_DEVICE_VGIC_V2:
+		if (!vgic_present)
+			return -ENXIO;
+		return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
+	default:
+		return -ENODEV;
+	}
 }
 
 long kvm_arch_vm_ioctl(struct file *filp,
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index c400661409ab..b333b58de4cb 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -22,6 +22,9 @@
 #include <linux/io.h>
 #include <asm/kvm_emulate.h>
 
+#define VGIC_ADDR_UNDEF		(-1)
+#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
+
 #define ACCESS_READ_VALUE	(1 << 0)
 #define ACCESS_READ_RAZ		(0 << 0)
 #define ACCESS_READ_MASK(x)	((x) & (1 << 0))
@@ -151,3 +154,62 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 {
 	return KVM_EXIT_MMIO;
 }
+
+static bool vgic_ioaddr_overlap(struct kvm *kvm)
+{
+	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
+	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
+
+	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
+		return 0;
+	if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
+	    (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
+		return -EBUSY;
+	return 0;
+}
+
+static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
+			      phys_addr_t addr, phys_addr_t size)
+{
+	int ret;
+
+	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
+		return -EEXIST;
+	if (addr + size < addr)
+		return -EINVAL;
+
+	ret = vgic_ioaddr_overlap(kvm);
+	if (ret)
+		return ret;
+	*ioaddr = addr;
+	return ret;
+}
+
+int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
+{
+	int r = 0;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+
+	if (addr & ~KVM_PHYS_MASK)
+		return -E2BIG;
+
+	if (addr & ~PAGE_MASK)
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	switch (type) {
+	case KVM_VGIC_V2_ADDR_TYPE_DIST:
+		r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
+				       addr, KVM_VGIC_V2_DIST_SIZE);
+		break;
+	case KVM_VGIC_V2_ADDR_TYPE_CPU:
+		r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
+				       addr, KVM_VGIC_V2_CPU_SIZE);
+		break;
+	default:
+		r = -ENODEV;
+	}
+
+	mutex_unlock(&kvm->lock);
+	return r;
+}
-- 
cgit v1.2.3


From 5863c2ce7269a7b24d60006430aa79a750b226ec Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 21 Jan 2013 19:36:15 -0500
Subject: ARM: KVM: VGIC interrupt injection

Plug the interrupt injection code. Interrupts can now be generated
from user space.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_vgic.h |   8 +++
 arch/arm/kvm/arm.c              |  55 +++++++++++++++----
 arch/arm/kvm/vgic.c             | 117 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 10 deletions(-)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index c2dc8574ea3a..11a6d90899f7 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -147,6 +147,8 @@ struct kvm_exit_mmio;
 int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
+			bool level);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		      struct kvm_exit_mmio *mmio);
@@ -182,6 +184,12 @@ static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
 
+static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
+				      unsigned int irq_num, bool level)
+{
+	return 0;
+}
+
 static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 {
 	return 0;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c327fd9d8ec3..3c8d6a9be123 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -810,20 +810,49 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
 
 	trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
 
-	if (irq_type != KVM_ARM_IRQ_TYPE_CPU)
-		return -EINVAL;
+	switch (irq_type) {
+	case KVM_ARM_IRQ_TYPE_CPU:
+		if (irqchip_in_kernel(kvm))
+			return -ENXIO;
 
-	if (vcpu_idx >= nrcpus)
-		return -EINVAL;
+		if (vcpu_idx >= nrcpus)
+			return -EINVAL;
 
-	vcpu = kvm_get_vcpu(kvm, vcpu_idx);
-	if (!vcpu)
-		return -EINVAL;
+		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+		if (!vcpu)
+			return -EINVAL;
 
-	if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
-		return -EINVAL;
+		if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
+			return -EINVAL;
+
+		return vcpu_interrupt_line(vcpu, irq_num, level);
+	case KVM_ARM_IRQ_TYPE_PPI:
+		if (!irqchip_in_kernel(kvm))
+			return -ENXIO;
+
+		if (vcpu_idx >= nrcpus)
+			return -EINVAL;
+
+		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+		if (!vcpu)
+			return -EINVAL;
+
+		if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
+			return -EINVAL;
 
-	return vcpu_interrupt_line(vcpu, irq_num, level);
+		return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level);
+	case KVM_ARM_IRQ_TYPE_SPI:
+		if (!irqchip_in_kernel(kvm))
+			return -ENXIO;
+
+		if (irq_num < VGIC_NR_PRIVATE_IRQS ||
+		    irq_num > KVM_ARM_IRQ_GIC_MAX)
+			return -EINVAL;
+
+		return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
+	}
+
+	return -EINVAL;
 }
 
 long kvm_arch_vcpu_ioctl(struct file *filp,
@@ -904,6 +933,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	void __user *argp = (void __user *)arg;
 
 	switch (ioctl) {
+	case KVM_CREATE_IRQCHIP: {
+		if (vgic_present)
+			return kvm_vgic_create(kvm);
+		else
+			return -ENXIO;
+	}
 	case KVM_ARM_SET_DEVICE_ADDR: {
 		struct kvm_arm_device_addr dev_addr;
 
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 2d5e29f1c28f..2e6a585c23e5 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -73,6 +73,7 @@
 
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
+static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
@@ -708,6 +709,9 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	kvm_prepare_mmio(run, mmio);
 	kvm_handle_mmio_return(vcpu, run);
 
+	if (updated_state)
+		vgic_kick_vcpus(vcpu->kvm);
+
 	return true;
 }
 
@@ -1104,6 +1108,119 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 	return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
 }
 
+static void vgic_kick_vcpus(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int c;
+
+	/*
+	 * We've injected an interrupt, time to find out who deserves
+	 * a good kick...
+	 */
+	kvm_for_each_vcpu(c, vcpu, kvm) {
+		if (kvm_vgic_vcpu_pending_irq(vcpu))
+			kvm_vcpu_kick(vcpu);
+	}
+}
+
+static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
+{
+	int is_edge = vgic_irq_is_edge(vcpu, irq);
+	int state = vgic_dist_irq_is_pending(vcpu, irq);
+
+	/*
+	 * Only inject an interrupt if:
+	 * - edge triggered and we have a rising edge
+	 * - level triggered and we change level
+	 */
+	if (is_edge)
+		return level > state;
+	else
+		return level != state;
+}
+
+static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
+				  unsigned int irq_num, bool level)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int is_edge, is_level;
+	int enabled;
+	bool ret = true;
+
+	spin_lock(&dist->lock);
+
+	vcpu = kvm_get_vcpu(kvm, cpuid);
+	is_edge = vgic_irq_is_edge(vcpu, irq_num);
+	is_level = !is_edge;
+
+	if (!vgic_validate_injection(vcpu, irq_num, level)) {
+		ret = false;
+		goto out;
+	}
+
+	if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
+		cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
+		vcpu = kvm_get_vcpu(kvm, cpuid);
+	}
+
+	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
+
+	if (level)
+		vgic_dist_irq_set(vcpu, irq_num);
+	else
+		vgic_dist_irq_clear(vcpu, irq_num);
+
+	enabled = vgic_irq_is_enabled(vcpu, irq_num);
+
+	if (!enabled) {
+		ret = false;
+		goto out;
+	}
+
+	if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
+		/*
+		 * Level interrupt in progress, will be picked up
+		 * when EOId.
+		 */
+		ret = false;
+		goto out;
+	}
+
+	if (level) {
+		vgic_cpu_irq_set(vcpu, irq_num);
+		set_bit(cpuid, &dist->irq_pending_on_cpu);
+	}
+
+out:
+	spin_unlock(&dist->lock);
+
+	return ret;
+}
+
+/**
+ * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
+ * @kvm:     The VM structure pointer
+ * @cpuid:   The CPU for PPIs
+ * @irq_num: The IRQ number that is assigned to the device
+ * @level:   Edge-triggered:  true:  to trigger the interrupt
+ *			      false: to ignore the call
+ *	     Level-sensitive  true:  activates an interrupt
+ *			      false: deactivates an interrupt
+ *
+ * The GIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts.  You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
+			bool level)
+{
+	if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
+		vgic_kick_vcpus(kvm);
+
+	return 0;
+}
+
 static bool vgic_ioaddr_overlap(struct kvm *kvm)
 {
 	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
-- 
cgit v1.2.3


From 01ac5e342f3b87a9b83b991230d96c22c4167ec9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 21 Jan 2013 19:36:16 -0500
Subject: ARM: KVM: VGIC initialisation code

Add the init code for the hypervisor, the virtual machine, and
the virtual CPUs.

An interrupt handler is also wired to allow the VGIC maintenance
interrupts, used to deal with level triggered interrupts and LR
underflows.

A CPU hotplug notifier is registered to disable/enable the interrupt
as requested.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_vgic.h |  11 ++
 arch/arm/kvm/arm.c              |  15 +++
 arch/arm/kvm/vgic.c             | 224 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 250 insertions(+)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index 11a6d90899f7..ab97207d9cd3 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -72,6 +72,7 @@ struct vgic_bytemap {
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
 	spinlock_t		lock;
+	bool			ready;
 
 	/* Virtual control interface mapping */
 	void __iomem		*vctrl_base;
@@ -145,6 +146,10 @@ struct kvm_exit_mmio;
 
 #ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
+int kvm_vgic_hyp_init(void);
+int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_create(struct kvm *kvm);
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
@@ -154,6 +159,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		      struct kvm_exit_mmio *mmio);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
+#define vgic_initialized(k)	((k)->arch.vgic.ready)
 
 #else
 static inline int kvm_vgic_hyp_init(void)
@@ -205,6 +211,11 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 {
 	return 0;
 }
+
+static inline bool vgic_initialized(struct kvm *kvm)
+{
+	return true;
+}
 #endif
 
 #endif
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c8d6a9be123..ea7383293ed0 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -641,6 +641,17 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.has_run_once = true;
 
+	/*
+	 * Initialize the VGIC before running a vcpu the first time on
+	 * this VM.
+	 */
+	if (irqchip_in_kernel(vcpu->kvm) &&
+	    unlikely(!vgic_initialized(vcpu->kvm))) {
+		int ret = kvm_vgic_init(vcpu->kvm);
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * Handle the "start in power-off" case by calling into the
 	 * PSCI code.
@@ -1085,6 +1096,10 @@ static int init_hyp_mode(void)
 	if (err)
 		goto out_free_vfp;
 
+#ifdef CONFIG_KVM_ARM_VGIC
+		vgic_present = true;
+#endif
+
 	kvm_info("Hyp mode initialized successfully\n");
 	return 0;
 out_free_vfp:
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 2e6a585c23e5..6d8407672dec 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -16,11 +16,20 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
+#include <linux/cpu.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic.h>
+
 #include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
 
 /*
  * How the whole thing works (courtesy of Christoffer Dall):
@@ -62,6 +71,14 @@
 #define VGIC_ADDR_UNDEF		(-1)
 #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
 
+/* Physical address of vgic virtual cpu interface */
+static phys_addr_t vgic_vcpu_base;
+
+/* Virtual control interface base address */
+static void __iomem *vgic_vctrl_base;
+
+static struct device_node *vgic_node;
+
 #define ACCESS_READ_VALUE	(1 << 0)
 #define ACCESS_READ_RAZ		(0 << 0)
 #define ACCESS_READ_MASK(x)	((x) & (1 << 0))
@@ -75,6 +92,9 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
 static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
+static u32 vgic_nr_lr;
+
+static unsigned int vgic_maint_irq;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
@@ -1221,6 +1241,210 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 	return 0;
 }
 
+static irqreturn_t vgic_maintenance_handler(int irq, void *data)
+{
+	/*
+	 * We cannot rely on the vgic maintenance interrupt to be
+	 * delivered synchronously. This means we can only use it to
+	 * exit the VM, and we perform the handling of EOIed
+	 * interrupts on the exit path (see vgic_process_maintenance).
+	 */
+	return IRQ_HANDLED;
+}
+
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int i;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return 0;
+
+	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
+		return -EBUSY;
+
+	for (i = 0; i < VGIC_NR_IRQS; i++) {
+		if (i < VGIC_NR_PPIS)
+			vgic_bitmap_set_irq_val(&dist->irq_enabled,
+						vcpu->vcpu_id, i, 1);
+		if (i < VGIC_NR_PRIVATE_IRQS)
+			vgic_bitmap_set_irq_val(&dist->irq_cfg,
+						vcpu->vcpu_id, i, VGIC_CFG_EDGE);
+
+		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
+	}
+
+	/*
+	 * By forcing VMCR to zero, the GIC will restore the binary
+	 * points to their reset values. Anything else resets to zero
+	 * anyway.
+	 */
+	vgic_cpu->vgic_vmcr = 0;
+
+	vgic_cpu->nr_lr = vgic_nr_lr;
+	vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
+
+	return 0;
+}
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+	enable_percpu_irq(vgic_maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+			   unsigned long action, void *cpu)
+{
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		vgic_init_maintenance_interrupt(NULL);
+		break;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		disable_percpu_irq(vgic_maint_irq);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+	.notifier_call = vgic_cpu_notify,
+};
+
+int kvm_vgic_hyp_init(void)
+{
+	int ret;
+	struct resource vctrl_res;
+	struct resource vcpu_res;
+
+	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
+	if (!vgic_node) {
+		kvm_err("error: no compatible vgic node in DT\n");
+		return -ENODEV;
+	}
+
+	vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
+	if (!vgic_maint_irq) {
+		kvm_err("error getting vgic maintenance irq from DT\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
+				 "vgic", kvm_get_running_vcpus());
+	if (ret) {
+		kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
+		goto out;
+	}
+
+	ret = register_cpu_notifier(&vgic_cpu_nb);
+	if (ret) {
+		kvm_err("Cannot register vgic CPU notifier\n");
+		goto out_free_irq;
+	}
+
+	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
+	if (ret) {
+		kvm_err("Cannot obtain VCTRL resource\n");
+		goto out_free_irq;
+	}
+
+	vgic_vctrl_base = of_iomap(vgic_node, 2);
+	if (!vgic_vctrl_base) {
+		kvm_err("Cannot ioremap VCTRL\n");
+		ret = -ENOMEM;
+		goto out_free_irq;
+	}
+
+	vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
+	vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
+
+	ret = create_hyp_io_mappings(vgic_vctrl_base,
+				     vgic_vctrl_base + resource_size(&vctrl_res),
+				     vctrl_res.start);
+	if (ret) {
+		kvm_err("Cannot map VCTRL into hyp\n");
+		goto out_unmap;
+	}
+
+	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+		 vctrl_res.start, vgic_maint_irq);
+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
+		kvm_err("Cannot obtain VCPU resource\n");
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+	vgic_vcpu_base = vcpu_res.start;
+
+	goto out;
+
+out_unmap:
+	iounmap(vgic_vctrl_base);
+out_free_irq:
+	free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
+out:
+	of_node_put(vgic_node);
+	return ret;
+}
+
+int kvm_vgic_init(struct kvm *kvm)
+{
+	int ret = 0, i;
+
+	mutex_lock(&kvm->lock);
+
+	if (vgic_initialized(kvm))
+		goto out;
+
+	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
+	    IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
+		kvm_err("Need to set vgic cpu and dist addresses first\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
+				    vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
+	if (ret) {
+		kvm_err("Unable to remap VGIC CPU to VCPU\n");
+		goto out;
+	}
+
+	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
+		vgic_set_target_reg(kvm, 0, i);
+
+	kvm->arch.vgic.ready = true;
+out:
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
+int kvm_vgic_create(struct kvm *kvm)
+{
+	int ret = 0;
+
+	mutex_lock(&kvm->lock);
+
+	if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	spin_lock_init(&kvm->arch.vgic.lock);
+	kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
+	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
+	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+
+out:
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
 static bool vgic_ioaddr_overlap(struct kvm *kvm)
 {
 	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
-- 
cgit v1.2.3


From c7e3ba64ba16eddfbfc66ec099860f40e808e124 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 23 Jan 2013 13:21:59 -0500
Subject: ARM: KVM: arch_timers: Add timer world switch

Do the necessary save/restore dance for the timers in the world
switch code. In the process, allow the guest to read the physical
counter, which is useful for its own clock_event_device.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_asm.h |  3 ++-
 arch/arm/kernel/asm-offsets.c  |  6 +++++
 arch/arm/kvm/arm.c             |  3 +++
 arch/arm/kvm/coproc.c          |  4 +++
 arch/arm/kvm/interrupts_head.S | 59 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 74 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 5e06e8177784..e4956f4e23e1 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -45,7 +45,8 @@
 #define c13_TID_URW	23	/* Thread ID, User R/W */
 #define c13_TID_URO	24	/* Thread ID, User R/O */
 #define c13_TID_PRIV	25	/* Thread ID, Privileged */
-#define NR_CP15_REGS	26	/* Number of regs (incl. invalid) */
+#define c14_CNTKCTL	26	/* Timer Control Register (PL1) */
+#define NR_CP15_REGS	27	/* Number of regs (incl. invalid) */
 
 #define ARM_EXCEPTION_RESET	  0
 #define ARM_EXCEPTION_UNDEFINED   1
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 17cea2e78d88..5ce738b43508 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -179,6 +179,12 @@ int main(void)
   DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
   DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
+#ifdef CONFIG_KVM_ARM_TIMER
+  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
+  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
+  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
+  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
+#endif
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
 #endif
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ea7383293ed0..800b2cd804d3 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -718,6 +718,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			vcpu_pause(vcpu);
 
 		kvm_vgic_flush_hwstate(vcpu);
+		kvm_timer_flush_hwstate(vcpu);
 
 		local_irq_disable();
 
@@ -731,6 +732,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
 			local_irq_enable();
+			kvm_timer_sync_hwstate(vcpu);
 			kvm_vgic_sync_hwstate(vcpu);
 			continue;
 		}
@@ -764,6 +766,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * Back from guest
 		 *************************************************************/
 
+		kvm_timer_sync_hwstate(vcpu);
 		kvm_vgic_sync_hwstate(vcpu);
 
 		ret = handle_exit(vcpu, run, ret);
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index d782638c7ec0..4ea9a982269c 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -222,6 +222,10 @@ static const struct coproc_reg cp15_regs[] = {
 			NULL, reset_unknown, c13_TID_URO },
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 4), is32,
 			NULL, reset_unknown, c13_TID_PRIV },
+
+	/* CNTKCTL: swapped by interrupt.S. */
+	{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
+			NULL, reset_val, c14_CNTKCTL, 0x00000000 },
 };
 
 /* Target specific emulation tables */
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 06f251395bec..3c8f2f0b4c5e 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -300,6 +300,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	str	r11, [vcpu, #CP15_OFFSET(c6_IFAR)]
 	str	r12, [vcpu, #CP15_OFFSET(c12_VBAR)]
 	.endif
+
+	mrc	p15, 0, r2, c14, c1, 0	@ CNTKCTL
+
+	.if \store_to_vcpu == 0
+	push	{r2}
+	.else
+	str	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+	.endif
 .endm
 
 /*
@@ -310,6 +318,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  * Assumes vcpu pointer in vcpu reg
  */
 .macro write_cp15_state read_from_vcpu
+	.if \read_from_vcpu == 0
+	pop	{r2}
+	.else
+	ldr	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
+	.endif
+
+	mcr	p15, 0, r2, c14, c1, 0	@ CNTKCTL
+
 	.if \read_from_vcpu == 0
 	pop	{r2-r12}
 	.else
@@ -461,8 +477,28 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  * for the host.
  *
  * Assumes vcpu pointer in vcpu reg
+ * Clobbers r2-r5
  */
 .macro save_timer_state
+#ifdef CONFIG_KVM_ARM_TIMER
+	ldr	r4, [vcpu, #VCPU_KVM]
+	ldr	r2, [r4, #KVM_TIMER_ENABLED]
+	cmp	r2, #0
+	beq	1f
+
+	mrc	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
+	str	r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
+	bic	r2, #1			@ Clear ENABLE
+	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
+	isb
+
+	mrrc	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	ldr	r4, =VCPU_TIMER_CNTV_CVAL
+	add	r5, vcpu, r4
+	strd	r2, r3, [r5]
+
+1:
+#endif
 	@ Allow physical timer/counter access for the host
 	mrc	p15, 4, r2, c14, c1, 0	@ CNTHCTL
 	orr	r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
@@ -474,6 +510,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  * for the host.
  *
  * Assumes vcpu pointer in vcpu reg
+ * Clobbers r2-r5
  */
 .macro restore_timer_state
 	@ Disallow physical timer access for the guest
@@ -482,6 +519,28 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	orr	r2, r2, #CNTHCTL_PL1PCTEN
 	bic	r2, r2, #CNTHCTL_PL1PCEN
 	mcr	p15, 4, r2, c14, c1, 0	@ CNTHCTL
+
+#ifdef CONFIG_KVM_ARM_TIMER
+	ldr	r4, [vcpu, #VCPU_KVM]
+	ldr	r2, [r4, #KVM_TIMER_ENABLED]
+	cmp	r2, #0
+	beq	1f
+
+	ldr	r2, [r4, #KVM_TIMER_CNTVOFF]
+	ldr	r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
+	mcrr	p15, 4, r2, r3, c14	@ CNTVOFF
+
+	ldr	r4, =VCPU_TIMER_CNTV_CVAL
+	add	r5, vcpu, r4
+	ldrd	r2, r3, [r5]
+	mcrr	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	isb
+
+	ldr	r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
+	and	r2, r2, #3
+	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
+1:
+#endif
 .endm
 
 .equ vmentry,	0
-- 
cgit v1.2.3


From 967f84275ba74eac696f798ce1a780285170b5e7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 23 Jan 2013 13:21:59 -0500
Subject: ARM: KVM: arch_timers: Wire the init code and config option

It is now possible to select CONFIG_KVM_ARM_TIMER to enable the
KVM architected timer support.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/Kconfig  |  8 ++++++++
 arch/arm/kvm/Makefile |  1 +
 arch/arm/kvm/arm.c    | 11 +++++++++++
 arch/arm/kvm/vgic.c   |  1 +
 4 files changed, 21 insertions(+)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index d8126f2b9442..49dd64e579c2 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -59,6 +59,14 @@ config KVM_ARM_VGIC
 	---help---
 	  Adds support for a hardware assisted, in-kernel GIC emulation.
 
+config KVM_ARM_TIMER
+	bool "KVM support for Architected Timers"
+	depends on KVM_ARM_VGIC && ARM_ARCH_TIMER
+	select HAVE_KVM_IRQCHIP
+	default y
+	---help---
+	  Adds support for the Architected Timers in virtual machines
+
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index dece8edc2e4a..fc96ce6f2357 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -20,3 +20,4 @@ obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o mmio.o psci.o
 obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
+obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 800b2cd804d3..9ada5549216d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -289,6 +289,7 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_caches(vcpu);
+	kvm_timer_vcpu_terminate(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
@@ -330,6 +331,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	if (ret)
 		return ret;
 
+	/* Set up the timer */
+	kvm_timer_vcpu_init(vcpu);
+
 	return 0;
 }
 
@@ -1103,6 +1107,13 @@ static int init_hyp_mode(void)
 		vgic_present = true;
 #endif
 
+	/*
+	 * Init HYP architected timer support
+	 */
+	err = kvm_timer_hyp_init();
+	if (err)
+		goto out_free_mappings;
+
 	kvm_info("Hyp mode initialized successfully\n");
 	return 0;
 out_free_vfp:
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 6d8407672dec..c9a17316e9fe 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -1418,6 +1418,7 @@ int kvm_vgic_init(struct kvm *kvm)
 	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
 		vgic_set_target_reg(kvm, 0, i);
 
+	kvm_timer_init(kvm);
 	kvm->arch.vgic.ready = true;
 out:
 	mutex_unlock(&kvm->lock);
-- 
cgit v1.2.3


From bef103aa7dd75121f66848d91f6b6179d40c1a71 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 15 Feb 2013 19:20:06 +0000
Subject: ARM: KVM: fix kvm_arch_{prepare,commit}_memory_region

Commit f82a8cfe9 (KVM: struct kvm_memory_slot.user_alloc -> bool)
broke the ARM KVM port by changing the prototype of two global
functions.

Apply the same change to fix the compilation breakage.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/arm/kvm/arm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kvm/arm.c')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9ada5549216d..5a936988eb24 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -232,7 +232,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_memory_slot old,
 				   struct kvm_userspace_memory_region *mem,
-				   int user_alloc)
+				   bool user_alloc)
 {
 	return 0;
 }
@@ -240,7 +240,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 void kvm_arch_commit_memory_region(struct kvm *kvm,
 				   struct kvm_userspace_memory_region *mem,
 				   struct kvm_memory_slot old,
-				   int user_alloc)
+				   bool user_alloc)
 {
 }
 
-- 
cgit v1.2.3