149 files changed, 4226 insertions, 1860 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index c6148166a7b4..4426e9687d89 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -841,6 +841,16 @@ config REFCOUNT_FULL
 	  against various use-after-free conditions that can be used in
 	  security flaw exploits.
 
+config HAVE_ARCH_PREL32_RELOCATIONS
+	bool
+	help
+	  May be selected by an architecture if it supports place-relative
+	  32-bit relocations, both in the toolchain and in the module loader,
+	  in which case relative references can be used in special sections
+	  for PCI fixup, initcalls etc which are only half the size on 64 bit
+	  architectures, and don't require runtime relocation on relocatable
+	  kernels.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 85b2369d6b20..27ea6dfcf2f2 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -155,8 +155,8 @@ CONFIG_THERMAL_EMULATION=y
 CONFIG_WATCHDOG=y
 CONFIG_S3C2410_WATCHDOG=y
 CONFIG_MFD_CROS_EC=y
-CONFIG_MFD_CROS_EC_I2C=y
-CONFIG_MFD_CROS_EC_SPI=y
+CONFIG_CROS_EC_I2C=y
+CONFIG_CROS_EC_SPI=y
 CONFIG_MFD_MAX14577=y
 CONFIG_MFD_MAX77686=y
 CONFIG_MFD_MAX77693=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 8f6be1982545..be732f382418 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -490,8 +490,8 @@ CONFIG_MFD_AC100=y
 CONFIG_MFD_AXP20X_I2C=y
 CONFIG_MFD_AXP20X_RSB=y
 CONFIG_MFD_CROS_EC=m
-CONFIG_MFD_CROS_EC_I2C=m
-CONFIG_MFD_CROS_EC_SPI=m
+CONFIG_CROS_EC_I2C=m
+CONFIG_CROS_EC_SPI=m
 CONFIG_MFD_DA9063=m
 CONFIG_MFD_MAX14577=y
 CONFIG_MFD_MAX77686=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index 5655a1cee87d..6bb506edb1f5 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -398,8 +398,8 @@ CONFIG_MFD_AS3711=y
 CONFIG_MFD_BCM590XX=m
 CONFIG_MFD_AXP20X=y
 CONFIG_MFD_CROS_EC=m
-CONFIG_MFD_CROS_EC_I2C=m
-CONFIG_MFD_CROS_EC_SPI=m
+CONFIG_CROS_EC_I2C=m
+CONFIG_CROS_EC_SPI=m
 CONFIG_MFD_ASIC3=y
 CONFIG_PMIC_DA903X=y
 CONFIG_HTC_EGPIO=y
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index fe2fb1ddd771..77121b713bef 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -107,9 +107,19 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
 	return (unsigned long *)&vcpu->arch.hcr;
 }
 
+static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr &= ~HCR_TWE;
+}
+
+static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr |= HCR_TWE;
+}
+
 static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
 {
-	return 1;
+	return true;
 }
 
 static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 1f1fe4109b02..79906cecb091 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -216,6 +216,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 unsigned long kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events);
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 8553d68b7c8a..265ea9cf7df7 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -75,17 +75,9 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
-static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
-{
-	*pmd = new_pmd;
-	dsb(ishst);
-}
-
-static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
-{
-	*pte = new_pte;
-	dsb(ishst);
-}
+#define kvm_mk_pmd(ptep)	__pmd(__pa(ptep) | PMD_TYPE_TABLE)
+#define kvm_mk_pud(pmdp)	__pud(__pa(pmdp) | PMD_TYPE_TABLE)
+#define kvm_mk_pgd(pudp)	({ BUILD_BUG(); 0; })
 
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 16e006f708ca..4602464ebdfb 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -125,6 +126,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 974d8d7d1bcd..3968d6c22455 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -38,25 +38,14 @@
 #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32
 #endif
 /*
- * GCC 3.0, 3.1: general bad code generation.
- * GCC 3.2.0: incorrect function argument offset calculation.
- * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c
- *            (http://gcc.gnu.org/PR8896) and incorrect structure
- *	      initialisation in fs/jffs2/erase.c
  * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854
  *	      miscompiles find_get_entry(), and can result in EXT3 and EXT4
  *	      filesystem corruption (possibly other FS too).
  */
-#ifdef __GNUC__
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-#error Your compiler is too buggy; it is known to miscompile kernels.
-#error    Known good compilers: 3.3, 4.x
-#endif
-#if GCC_VERSION >= 40800 && GCC_VERSION < 40803
+#if defined(GCC_VERSION) && GCC_VERSION >= 40800 && GCC_VERSION < 40803
 #error Your compiler is too buggy; it is known to miscompile kernels
 #error and result in filesystem corruption and oopses.
 #endif
-#endif
 
 int main(void)
 {
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d9c299133111..82ab015bf42b 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -330,16 +330,15 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
  * atomic helpers. Insert it into the gate_vma so that it is visible
  * through ptrace and /proc/<pid>/mem.
  */
-static struct vm_area_struct gate_vma = {
-	.vm_start	= 0xffff0000,
-	.vm_end		= 0xffff0000 + PAGE_SIZE,
-	.vm_flags	= VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC,
-};
+static struct vm_area_struct gate_vma;
 
 static int __init gate_vma_init(void)
 {
 	vma_init(&gate_vma, NULL);
 	gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
+	gate_vma.vm_start = 0xffff0000;
+	gate_vma.vm_end	= 0xffff0000 + PAGE_SIZE;
+	gate_vma.vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
 	return 0;
 }
 arch_initcall(gate_vma_init);
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 3a02e76699a6..450c7a4fbc8a 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -246,6 +246,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
 			   const struct coproc_reg *r)
 {
 	u64 reg;
+	bool g1;
 
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p);
@@ -253,7 +254,25 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
 	reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
 	reg |= *vcpu_reg(vcpu, p->Rt1) ;
 
-	vgic_v3_dispatch_sgi(vcpu, reg);
+	/*
+	 * In a system where GICD_CTLR.DS=1, a ICC_SGI0R access generates
+	 * Group0 SGIs only, while ICC_SGI1R can generate either group,
+	 * depending on the SGI configuration. ICC_ASGI1R is effectively
+	 * equivalent to ICC_SGI0R, as there is no "alternative" secure
+	 * group.
+	 */
+	switch (p->Op1) {
+	default:		/* Keep GCC quiet */
+	case 0:			/* ICC_SGI1R */
+		g1 = true;
+		break;
+	case 1:			/* ICC_ASGI1R */
+	case 2:			/* ICC_SGI0R */
+		g1 = false;
+		break;
+	}
+
+	vgic_v3_dispatch_sgi(vcpu, reg, g1);
 
 	return true;
 }
@@ -459,6 +478,10 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* ICC_SGI1R */
 	{ CRm64(12), Op1( 0), is64, access_gic_sgi},
+	/* ICC_ASGI1R */
+	{ CRm64(12), Op1( 1), is64, access_gic_sgi},
+	/* ICC_SGI0R */
+	{ CRm64(12), Op1( 2), is64, access_gic_sgi},
 
 	/* VBAR: swapped by interrupt.S. */
 	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index a18f33edc471..2b8de885b2bf 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -261,6 +261,29 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return -EINVAL;
 }
 
+
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events)
+{
+	events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
+
+	return 0;
+}
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events)
+{
+	bool serror_pending = events->exception.serror_pending;
+	bool has_esr = events->exception.serror_has_esr;
+
+	if (serror_pending && has_esr)
+		return -EINVAL;
+	else if (serror_pending)
+		kvm_inject_vabt(vcpu);
+
+	return 0;
+}
+
 int __attribute_const__ kvm_target_cpu(void)
 {
 	switch (read_cpuid_part()) {
diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c
index d1329f1ba4e4..bf992264060e 100644
--- a/arch/arm/probes/uprobes/core.c
+++ b/arch/arm/probes/uprobes/core.c
@@ -32,7 +32,7 @@ bool is_swbp_insn(uprobe_opcode_t *insn)
 int set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	     unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr,
+	return uprobe_write_opcode(auprobe, mm, vaddr,
 		   __opcode_to_mem_arm(auprobe->bpinsn));
 }
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d0a53cc6293a..29e75b47becd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -108,6 +108,7 @@ config ARM64
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_MMAP_RND_BITS
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+	select HAVE_ARCH_PREL32_RELOCATIONS
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_STACKLEAK
 	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 2c07e233012b..514787d45dee 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -373,8 +373,8 @@ CONFIG_UNIPHIER_WATCHDOG=y
 CONFIG_BCM2835_WDT=y
 CONFIG_MFD_AXP20X_RSB=y
 CONFIG_MFD_CROS_EC=y
-CONFIG_MFD_CROS_EC_I2C=y
-CONFIG_MFD_CROS_EC_SPI=y
+CONFIG_CROS_EC_I2C=y
+CONFIG_CROS_EC_SPI=y
 CONFIG_MFD_CROS_EC_CHARDEV=m
 CONFIG_MFD_EXYNOS_LPASS=m
 CONFIG_MFD_HI6421_PMIC=y
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index be3bf3d08916..ae1f70450fb2 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -50,7 +50,8 @@
 #define ARM64_HW_DBM				29
 #define ARM64_SSBD				30
 #define ARM64_MISMATCHED_CACHE_TYPE		31
+#define ARM64_HAS_STAGE2_FWB			32
 
-#define ARM64_NCAPS				32
+#define ARM64_NCAPS				33
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 6dd285e979c9..aa45df752a16 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,7 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
+#define HCR_FWB		(UL(1) << 46)
 #define HCR_TEA		(UL(1) << 37)
 #define HCR_TERR	(UL(1) << 36)
 #define HCR_TLOR	(UL(1) << 35)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 0c97e45d1dc3..6106a85ae0be 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -63,6 +63,8 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 		/* trap error record accesses */
 		vcpu->arch.hcr_el2 |= HCR_TERR;
 	}
+	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+		vcpu->arch.hcr_el2 |= HCR_FWB;
 
 	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
 		vcpu->arch.hcr_el2 &= ~HCR_RW;
@@ -81,6 +83,21 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
 	return (unsigned long *)&vcpu->arch.hcr_el2;
 }
 
+static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 &= ~HCR_TWE;
+}
+
+static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 |= HCR_TWE;
+}
+
+static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.vsesr_el2;
+}
+
 static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
 {
 	vcpu->arch.vsesr_el2 = vsesr;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index fe8777b12f86..f26055f2306e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -350,6 +350,11 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events);
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
@@ -378,16 +383,23 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
+
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
-void __kvm_set_tpidr_el2(u64 tpidr_el2);
 DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
 
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
-	u64 tpidr_el2;
+	/*
+	 * Calculate the raw per-cpu offset without a translation from the
+	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
+	 * so that we can use adr_l to access per-cpu variables in EL2.
+	 */
+	u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) -
+			 (u64)kvm_ksym_ref(kvm_host_cpu_state));
 
 	/*
 	 * Call initialization code, and switch to the full blown HYP code.
@@ -396,17 +408,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	 * cpus_have_const_cap() wrapper.
 	 */
 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
-	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
-
-	/*
-	 * Calculate the raw per-cpu offset without a translation from the
-	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
-	 * so that we can use adr_l to access per-cpu variables in EL2.
-	 */
-	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
-		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
-
-	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
+	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
 }
 
 static inline bool kvm_arch_check_sve_has_vhe(void)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index fb9a7127bb75..d6fff7de5539 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -169,8 +169,12 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
-#define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
-#define	kvm_set_pmd(pmdp, pmd)		set_pmd(pmdp, pmd)
+#define kvm_mk_pmd(ptep)					\
+	__pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE)
+#define kvm_mk_pud(pmdp)					\
+	__pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE)
+#define kvm_mk_pgd(pudp)					\
+	__pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
 
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
@@ -267,6 +271,15 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
 {
 	void *va = page_address(pfn_to_page(pfn));
 
+	/*
+	 * With FWB, we ensure that the guest always accesses memory using
+	 * cacheable attributes, and we don't have to clean to PoC when
+	 * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
+	 * PoU is not required either in this case.
+	 */
+	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+		return;
+
 	kvm_flush_dcache_to_poc(va, size);
 }
 
@@ -287,20 +300,26 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
 
 static inline void __kvm_flush_dcache_pte(pte_t pte)
 {
-	struct page *page = pte_page(pte);
-	kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+		struct page *page = pte_page(pte);
+		kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+	}
 }
 
 static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
 {
-	struct page *page = pmd_page(pmd);
-	kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+		struct page *page = pmd_page(pmd);
+		kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+	}
 }
 
 static inline void __kvm_flush_dcache_pud(pud_t pud)
 {
-	struct page *page = pud_page(pud);
-	kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+		struct page *page = pud_page(pud);
+		kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+	}
 }
 
 #define kvm_virt_to_phys(x)		__pa_symbol(x)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 49d99214f43c..b96442960aea 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -155,6 +155,13 @@
 #define MT_S2_NORMAL		0xf
 #define MT_S2_DEVICE_nGnRE	0x1
 
+/*
+ * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
+ * Stage-2 enforces Normal-WB and Device-nGnRE
+ */
+#define MT_S2_FWB_NORMAL	6
+#define MT_S2_FWB_DEVICE_nGnRE	1
+
 #ifdef CONFIG_ARM64_4K_PAGES
 #define IOREMAP_MAX_ORDER	(PUD_SHIFT)
 #else
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 108ecad7acc5..78b942c1bea4 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -67,8 +67,28 @@
 #define PAGE_HYP_RO		__pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
 #define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
 
-#define PAGE_S2			__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
-#define PAGE_S2_DEVICE		__pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
+#define PAGE_S2_MEMATTR(attr)						\
+	({								\
+		u64 __val;						\
+		if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))		\
+			__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr);	\
+		else							\
+			__val = PTE_S2_MEMATTR(MT_S2_ ## attr);		\
+		__val;							\
+	 })
+
+#define PAGE_S2_XN							\
+	({								\
+		u64 __val;						\
+		if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))		\
+			__val = 0;					\
+		else							\
+			__val = PTE_S2_XN;				\
+		__val;							\
+	})
+
+#define PAGE_S2			__pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN)
+#define PAGE_S2_DEVICE		__pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN)
 
 #define PAGE_NONE		__pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index e205ec8489e9..c1470931b897 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -314,6 +314,8 @@
 #define SYS_ICC_DIR_EL1			sys_reg(3, 0, 12, 11, 1)
 #define SYS_ICC_RPR_EL1			sys_reg(3, 0, 12, 11, 3)
 #define SYS_ICC_SGI1R_EL1		sys_reg(3, 0, 12, 11, 5)
+#define SYS_ICC_ASGI1R_EL1		sys_reg(3, 0, 12, 11, 6)
+#define SYS_ICC_SGI0R_EL1		sys_reg(3, 0, 12, 11, 7)
 #define SYS_ICC_IAR1_EL1		sys_reg(3, 0, 12, 12, 0)
 #define SYS_ICC_EOIR1_EL1		sys_reg(3, 0, 12, 12, 1)
 #define SYS_ICC_HPPIR1_EL1		sys_reg(3, 0, 12, 12, 2)
@@ -579,6 +581,7 @@
 #define ID_AA64MMFR1_VMIDBITS_16	2
 
 /* id_aa64mmfr2 */
+#define ID_AA64MMFR2_FWB_SHIFT		40
 #define ID_AA64MMFR2_AT_SHIFT		32
 #define ID_AA64MMFR2_LVA_SHIFT		16
 #define ID_AA64MMFR2_IESB_SHIFT		12
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 4e76630dd655..97c3478ee6e7 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -154,6 +155,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 611e8921c3d4..e238b7932096 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -192,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
@@ -1026,6 +1027,14 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
 }
 #endif
 
+static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
+{
+	u64 val = read_sysreg_s(SYS_CLIDR_EL1);
+
+	/* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
+	WARN_ON(val & (7 << 27 | 7 << 21));
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@@ -1182,6 +1191,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cache_dic,
 	},
+	{
+		.desc = "Stage-2 Force Write-Back",
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.capability = ARM64_HAS_STAGE2_FWB,
+		.sys_reg = SYS_ID_AA64MMFR2_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64MMFR2_FWB_SHIFT,
+		.min_field_value = 1,
+		.matches = has_cpuid_feature,
+		.cpu_enable = cpu_has_fwb,
+	},
 #ifdef CONFIG_ARM64_HW_AFDBM
 	{
 		/*
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index cdd4d9d6d575..07256b08226c 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -289,6 +289,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return -EINVAL;
 }
 
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events)
+{
+	events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
+	events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+
+	if (events->exception.serror_pending && events->exception.serror_has_esr)
+		events->exception.serror_esr = vcpu_get_vsesr(vcpu);
+
+	return 0;
+}
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+			      struct kvm_vcpu_events *events)
+{
+	bool serror_pending = events->exception.serror_pending;
+	bool has_esr = events->exception.serror_has_esr;
+
+	if (serror_pending && has_esr) {
+		if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+			return -EINVAL;
+
+		if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
+			kvm_set_sei_esr(vcpu, events->exception.serror_esr);
+		else
+			return -EINVAL;
+	} else if (serror_pending) {
+		kvm_inject_vabt(vcpu);
+	}
+
+	return 0;
+}
+
 int __attribute_const__ kvm_target_cpu(void)
 {
 	unsigned long implementor = read_cpuid_implementor();
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 6fd91b31a131..ea9225160786 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -57,6 +57,7 @@ __invalid:
 	 * x0: HYP pgd
 	 * x1: HYP stack
 	 * x2: HYP vectors
+	 * x3: per-CPU offset
 	 */
 __do_hyp_init:
 	/* Check for a stub HVC call */
@@ -119,9 +120,8 @@ CPU_BE(	orr	x4, x4, #SCTLR_ELx_EE)
 	mov	sp, x1
 	msr	vbar_el2, x2
 
-	/* copy tpidr_el1 into tpidr_el2 for use by HYP */
-	mrs	x1, tpidr_el1
-	msr	tpidr_el2, x1
+	/* Set tpidr_el2 for use by HYP */
+	msr	tpidr_el2, x3
 
 	/* Hello, World! */
 	eret
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 35bc16832efe..9ce223944983 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -288,8 +288,3 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.sysregs_loaded_on_cpu = false;
 }
-
-void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
-{
-	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
-}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index d8e71659ba7e..a55e91dfcf8f 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -164,9 +164,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 		inject_undef64(vcpu);
 }
 
-static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
+void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
 {
-	vcpu_set_vsesr(vcpu, esr);
+	vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
 	*vcpu_hcr(vcpu) |= HCR_VSE;
 }
 
@@ -184,5 +184,5 @@ static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
  */
 void kvm_inject_vabt(struct kvm_vcpu *vcpu)
 {
-	pend_guest_serror(vcpu, ESR_ELx_ISV);
+	kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
 }
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 4e4aedaf7ab7..e37c78bbe1ca 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -77,8 +77,12 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ARM_PMU_V3:
 		r = kvm_arm_support_pmu_v3();
 		break;
+	case KVM_CAP_ARM_INJECT_SERROR_ESR:
+		r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+		break;
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_VCPU_ATTRIBUTES:
+	case KVM_CAP_VCPU_EVENTS:
 		r = 1;
 		break;
 	default:
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a4363735d3f8..22fbbdbece3c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -194,7 +194,16 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p, r);
 
-	kvm_set_way_flush(vcpu);
+	/*
+	 * Only track S/W ops if we don't have FWB. It still indicates
+	 * that the guest is a bit broken (S/W operations should only
+	 * be done by firmware, knowing that there is only a single
+	 * CPU left in the system, and certainly not from non-secure
+	 * software).
+	 */
+	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+		kvm_set_way_flush(vcpu);
+
 	return true;
 }
 
@@ -243,10 +252,43 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
 			   struct sys_reg_params *p,
 			   const struct sys_reg_desc *r)
 {
+	bool g1;
+
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p, r);
 
-	vgic_v3_dispatch_sgi(vcpu, p->regval);
+	/*
+	 * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates
+	 * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group,
+	 * depending on the SGI configuration. ICC_ASGI1R_EL1 is effectively
+	 * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure
+	 * group.
+	 */
+	if (p->is_aarch32) {
+		switch (p->Op1) {
+		default:		/* Keep GCC quiet */
+		case 0:			/* ICC_SGI1R */
+			g1 = true;
+			break;
+		case 1:			/* ICC_ASGI1R */
+		case 2:			/* ICC_SGI0R */
+			g1 = false;
+			break;
+		}
+	} else {
+		switch (p->Op2) {
+		default:		/* Keep GCC quiet */
+		case 5:			/* ICC_SGI1R_EL1 */
+			g1 = true;
+			break;
+		case 6:			/* ICC_ASGI1R_EL1 */
+		case 7:			/* ICC_SGI0R_EL1 */
+			g1 = false;
+			break;
+		}
+	}
+
+	vgic_v3_dispatch_sgi(vcpu, p->regval, g1);
 
 	return true;
 }
@@ -1303,6 +1345,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
 	{ SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
 	{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
+	{ SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
+	{ SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi },
 	{ SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
 	{ SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
 	{ SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
@@ -1613,8 +1657,6 @@ static const struct sys_reg_desc cp14_64_regs[] = {
  * register).
  */
 static const struct sys_reg_desc cp15_regs[] = {
-	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
-
 	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
@@ -1737,8 +1779,10 @@ static const struct sys_reg_desc cp15_regs[] = {
 static const struct sys_reg_desc cp15_64_regs[] = {
 	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
-	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
+	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
 	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+	{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
+	{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
 	{ Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
 };
 
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e89d40be8cd..0b334b671e90 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -16,6 +16,7 @@ config H8300
 	select OF_IRQ
 	select OF_EARLY_FLATTREE
 	select HAVE_MEMBLOCK
+	select NO_BOOTMEM
 	select TIMER_OF
 	select H8300_TMR8
 	select HAVE_KERNEL_GZIP
diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile
index e1c02ca230cb..cc12b162c222 100644
--- a/arch/h8300/Makefile
+++ b/arch/h8300/Makefile
@@ -8,6 +8,8 @@
 # (C) Copyright 2002-2015 Yoshinori Sato <ysato@users.sourceforge.jp>
 #
 
+KBUILD_DEFCONFIG := edosk2674_defconfig
+
 cflags-$(CONFIG_CPU_H8300H)	:= -mh
 aflags-$(CONFIG_CPU_H8300H)	:= -mh -Wa,--mach=h8300h
 ldflags-$(CONFIG_CPU_H8300H)	:= -mh8300helf_linux
@@ -22,6 +24,8 @@ KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\"
 KBUILD_AFLAGS += $(aflags-y)
 LDFLAGS += $(ldflags-y)
 
+CHECKFLAGS += -msize-long
+
 ifeq ($(CROSS_COMPILE),)
 CROSS_COMPILE := h8300-unknown-linux-
 endif
diff --git a/arch/h8300/boot/dts/h8300h_sim.dts b/arch/h8300/boot/dts/h8300h_sim.dts
index f1c31cecdec8..595398b9d018 100644
--- a/arch/h8300/boot/dts/h8300h_sim.dts
+++ b/arch/h8300/boot/dts/h8300h_sim.dts
@@ -73,7 +73,7 @@
 	timer16: timer@ffff68 {
 		compatible = "renesas,16bit-timer";
 		reg = <0xffff68 8>, <0xffff60 8>;
-		interrupts = <24 0>;
+		interrupts = <26 0>;
 		renesas,channel = <0>;
 		clocks = <&fclk>;
 		clock-names = "fck";
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index ea0cb0cf6a8b..647a83bd40b7 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -29,11 +29,11 @@ static inline unsigned long ffz(unsigned long word)
 
 	result = -1;
 	__asm__("1:\n\t"
-		"shlr.l %2\n\t"
+		"shlr.l %1\n\t"
 		"adds #1,%0\n\t"
 		"bcs 1b"
-		: "=r"(result)
-		: "0"(result), "r"(word));
+		: "=r"(result),"=r"(word)
+		: "0"(result), "1"(word));
 	return result;
 }
 
@@ -66,7 +66,7 @@ H8300_GEN_BITOP(change_bit, "bnot")
 
 #undef H8300_GEN_BITOP
 
-static inline int test_bit(int nr, const unsigned long *addr)
+static inline int test_bit(int nr, const volatile unsigned long *addr)
 {
 	int ret = 0;
 	unsigned char *b_addr;
@@ -162,11 +162,11 @@ static inline unsigned long __ffs(unsigned long word)
 
 	result = -1;
 	__asm__("1:\n\t"
-		"shlr.l %2\n\t"
+		"shlr.l %1\n\t"
 		"adds #1,%0\n\t"
 		"bcc 1b"
-		: "=r" (result)
-		: "0"(result), "r"(word));
+		: "=r" (result),"=r"(word)
+		: "0"(result), "1"(word));
 	return result;
 }
 
diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h
index 313cafa85380..66d383848ff1 100644
--- a/arch/h8300/include/asm/ptrace.h
+++ b/arch/h8300/include/asm/ptrace.h
@@ -4,6 +4,8 @@
 
 #include <uapi/asm/ptrace.h>
 
+struct task_struct;
+
 #ifndef __ASSEMBLY__
 #ifndef PS_S
 #define PS_S  (0x10)
diff --git a/arch/h8300/kernel/kgdb.c b/arch/h8300/kernel/kgdb.c
index 602e478afbd5..1a1d30cb0609 100644
--- a/arch/h8300/kernel/kgdb.c
+++ b/arch/h8300/kernel/kgdb.c
@@ -129,7 +129,7 @@ void kgdb_arch_exit(void)
 	/* Nothing to do */
 }
 
-const struct kgdb_arch arch_kgdb_ops = {
+struct kgdb_arch arch_kgdb_ops = {
 	/* Breakpoint instruction: trapa #2 */
 	.gdb_bpt_instr = { 0x57, 0x20 },
 };
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index a4d0470c10a9..34e2df5c0d6d 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -23,7 +23,6 @@
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 #include <linux/of_address.h>
 #include <linux/clk-provider.h>
 #include <linux/memblock.h>
@@ -71,10 +70,6 @@ void __init h8300_fdt_init(void *fdt, char *bootargs)
 
 static void __init bootmem_init(void)
 {
-	int bootmap_size;
-	unsigned long ram_start_pfn;
-	unsigned long free_ram_start_pfn;
-	unsigned long ram_end_pfn;
 	struct memblock_region *region;
 
 	memory_end = memory_start = 0;
@@ -88,33 +83,17 @@ static void __init bootmem_init(void)
 	if (!memory_end)
 		panic("No memory!");
 
-	ram_start_pfn = PFN_UP(memory_start);
-	/* free_ram_start_pfn is first page after kernel */
-	free_ram_start_pfn = PFN_UP(__pa(_end));
-	ram_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+	/* setup bootmem globals (we use no_bootmem, but mm still depends on this) */
+	min_low_pfn = PFN_UP(memory_start);
+	max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+	max_pfn = max_low_pfn;
 
-	max_pfn = ram_end_pfn;
+	memblock_reserve(__pa(_stext), _end - _stext);
 
-	/*
-	 * give all the memory to the bootmap allocator,  tell it to put the
-	 * boot mem_map at the start of memory
-	 */
-	bootmap_size = init_bootmem_node(NODE_DATA(0),
-					 free_ram_start_pfn,
-					 0,
-					 ram_end_pfn);
-	/*
-	 * free the usable memory,  we have to make sure we do not free
-	 * the bootmem bitmap so we then reserve it after freeing it :-)
-	 */
-	free_bootmem(PFN_PHYS(free_ram_start_pfn),
-		     (ram_end_pfn - free_ram_start_pfn) << PAGE_SHIFT);
-	reserve_bootmem(PFN_PHYS(free_ram_start_pfn), bootmap_size,
-			BOOTMEM_DEFAULT);
+	early_init_fdt_reserve_self();
+	early_init_fdt_scan_reserved_mem();
 
-	for_each_memblock(reserved, region) {
-		reserve_bootmem(region->base, region->size, BOOTMEM_DEFAULT);
-	}
+	memblock_dump_all();
 }
 
 void __init setup_arch(char **cmdline_p)
@@ -188,15 +167,6 @@ const struct seq_operations cpuinfo_op = {
 	.show	= show_cpuinfo,
 };
 
-static int __init device_probe(void)
-{
-	of_platform_populate(NULL, NULL, NULL, NULL);
-
-	return 0;
-}
-
-device_initcall(device_probe);
-
 #if defined(CONFIG_CPU_H8300H)
 #define get_wait(base, addr) ({		\
 	int baddr;			\
diff --git a/arch/h8300/kernel/sim-console.c b/arch/h8300/kernel/sim-console.c
index 46138f55a9ea..03aa35b1a08c 100644
--- a/arch/h8300/kernel/sim-console.c
+++ b/arch/h8300/kernel/sim-console.c
@@ -13,12 +13,13 @@
 
 static void sim_write(struct console *con, const char *s, unsigned n)
 {
-	register const int fd __asm__("er0") = 1; /* stdout */
 	register const char *_ptr __asm__("er1") = s;
 	register const unsigned _len __asm__("er2") = n;
 
-	__asm__(".byte 0x5e,0x00,0x00,0xc7\n\t" /* jsr @0xc7 (sys_write) */
-		: : "g"(fd), "g"(_ptr), "g"(_len));
+	__asm__("sub.l er0,er0\n\t"		/* er0 = 1 (stdout) */
+		"inc.l #1,er0\n\t"
+		".byte 0x5e,0x00,0x00,0xc7\n\t" /* jsr @0xc7 (sys_write) */
+		: : "g"(_ptr), "g"(_len):"er0");
 }
 
 static int __init sim_setup(struct earlycon_device *device, const char *opt)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2bf4ef792f2c..8b4a0c1748c0 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,6 +28,7 @@ config IA64
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
+	select NO_BOOTMEM
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select ARCH_HAS_DMA_MARK_CLEAN
 	select ARCH_HAS_SG_CHAIN
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index ee5b652d320a..671ce1e3f6f2 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1805,7 +1805,7 @@ static struct ioc_iommu ioc_iommu_info[] __initdata = {
 	{ SX2000_IOC_ID, "sx2000", NULL },
 };
 
-static void ioc_init(unsigned long hpa, struct ioc *ioc)
+static void __init ioc_init(unsigned long hpa, struct ioc *ioc)
 {
 	struct ioc_iommu *info;
 
@@ -2002,7 +2002,7 @@ sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
 #endif
 }
 
-static void acpi_sba_ioc_add(struct ioc *ioc)
+static void __init acpi_sba_ioc_add(struct ioc *ioc)
 {
 	acpi_handle handle = ioc->handle;
 	acpi_status status;
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 6f952171abf9..1e6fef69bb01 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -454,6 +454,7 @@ extern void memset_io(volatile void __iomem *s, int c, long n);
 #define xlate_dev_kmem_ptr xlate_dev_kmem_ptr
 #define xlate_dev_mem_ptr xlate_dev_mem_ptr
 #include <asm-generic/io.h>
+#undef PCI_IOBASE
 
 # endif /* __KERNEL__ */
 
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index f4db2168d1b8..00e8e2a1eb19 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -50,8 +50,7 @@ void foo(void)
 
 	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
-	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
-	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+	DEFINE(IA64_TASK_THREAD_PID_OFFSET,offsetof (struct task_struct, thread_pid));
 	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
 	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
@@ -68,6 +67,7 @@ void foo(void)
 	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
 							     group_stop_count));
 	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+	DEFINE(IA64_SIGNAL_PIDS_TGID_OFFSET, offsetof (struct signal_struct, pids[PIDTYPE_TGID]));
 
 	BLANK();
 
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index fe742ffafc7a..d80c99a5f55d 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -62,16 +62,16 @@ ENTRY(fsys_getpid)
 	.prologue
 	.altrp b6
 	.body
-	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	add r17=IA64_TASK_SIGNAL_OFFSET,r16
 	;;
-	ld8 r17=[r17]				// r17 = current->group_leader
+	ld8 r17=[r17]				// r17 = current->signal
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
-	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
+	add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
+	ld8 r17=[r17]				// r17 = current->signal->pids[PIDTYPE_TGID]
 	;;
 	add r8=IA64_PID_LEVEL_OFFSET,r17
 	;;
@@ -96,11 +96,11 @@ ENTRY(fsys_set_tid_address)
 	.altrp b6
 	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
+	add r17=IA64_TASK_THREAD_PID_OFFSET,r16
 	;;
 	ld4 r9=[r9]
 	tnat.z p6,p7=r32		// check argument register for being NaT
-	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
+	ld8 r17=[r17]				// r17 = current->thread_pid
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
 	add r8=IA64_PID_LEVEL_OFFSET,r17
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index ad43cbf70628..0e6c2d9fb498 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -32,6 +32,7 @@
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/kernel.h>
+#include <linux/memblock.h>
 #include <linux/reboot.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/clock.h>
@@ -383,8 +384,16 @@ reserve_memory (void)
 
 	sort_regions(rsvd_region, num_rsvd_regions);
 	num_rsvd_regions = merge_regions(rsvd_region, num_rsvd_regions);
-}
 
+	/* reserve all regions except the end of memory marker with memblock */
+	for (n = 0; n < num_rsvd_regions - 1; n++) {
+		struct rsvd_region *region = &rsvd_region[n];
+		phys_addr_t addr = __pa(region->start);
+		phys_addr_t size = region->end - region->start;
+
+		memblock_reserve(addr, size);
+	}
+}
 
 /**
  * find_initrd - get initrd parameters from the boot parameter structure
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 7d64b30913d1..e2e40bbd391c 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -34,53 +34,6 @@ static unsigned long max_gap;
 /* physical address where the bootmem map is located */
 unsigned long bootmap_start;
 
-/**
- * find_bootmap_location - callback to find a memory area for the bootmap
- * @start: start of region
- * @end: end of region
- * @arg: unused callback data
- *
- * Find a place to put the bootmap and return its starting address in
- * bootmap_start.  This address must be page-aligned.
- */
-static int __init
-find_bootmap_location (u64 start, u64 end, void *arg)
-{
-	u64 needed = *(unsigned long *)arg;
-	u64 range_start, range_end, free_start;
-	int i;
-
-#if IGNORE_PFN0
-	if (start == PAGE_OFFSET) {
-		start += PAGE_SIZE;
-		if (start >= end)
-			return 0;
-	}
-#endif
-
-	free_start = PAGE_OFFSET;
-
-	for (i = 0; i < num_rsvd_regions; i++) {
-		range_start = max(start, free_start);
-		range_end   = min(end, rsvd_region[i].start & PAGE_MASK);
-
-		free_start = PAGE_ALIGN(rsvd_region[i].end);
-
-		if (range_end <= range_start)
-			continue; /* skip over empty range */
-
-		if (range_end - range_start >= needed) {
-			bootmap_start = __pa(range_start);
-			return -1;	/* done */
-		}
-
-		/* nothing more available in this segment */
-		if (range_end == end)
-			return 0;
-	}
-	return 0;
-}
-
 #ifdef CONFIG_SMP
 static void *cpu_data;
 /**
@@ -196,8 +149,6 @@ setup_per_cpu_areas(void)
 void __init
 find_memory (void)
 {
-	unsigned long bootmap_size;
-
 	reserve_memory();
 
 	/* first find highest page frame number */
@@ -205,21 +156,12 @@ find_memory (void)
 	max_low_pfn = 0;
 	efi_memmap_walk(find_max_min_low_pfn, NULL);
 	max_pfn = max_low_pfn;
-	/* how many bytes to cover all the pages */
-	bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
-
-	/* look for a location to hold the bootmap */
-	bootmap_start = ~0UL;
-	efi_memmap_walk(find_bootmap_location, &bootmap_size);
-	if (bootmap_start == ~0UL)
-		panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
 
-	bootmap_size = init_bootmem_node(NODE_DATA(0),
-			(bootmap_start >> PAGE_SHIFT), 0, max_pfn);
-
-	/* Free all available memory, then mark bootmem-map as being in use. */
-	efi_memmap_walk(filter_rsvd_memory, free_bootmem);
-	reserve_bootmem(bootmap_start, bootmap_size, BOOTMEM_DEFAULT);
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+	efi_memmap_walk(filter_memory, register_active_ranges);
+#else
+	memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
+#endif
 
 	find_initrd();
 
@@ -244,11 +186,9 @@ paging_init (void)
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-	efi_memmap_walk(filter_memory, register_active_ranges);
 	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
 	if (max_gap < LARGE_GAP) {
 		vmem_map = (struct page *) 0;
-		free_area_init_nodes(max_zone_pfns);
 	} else {
 		unsigned long map_size;
 
@@ -266,13 +206,10 @@ paging_init (void)
 		 */
 		NODE_DATA(0)->node_mem_map = vmem_map +
 			find_min_pfn_with_active_regions();
-		free_area_init_nodes(max_zone_pfns);
 
 		printk("Virtual mem_map starts at 0x%p\n", mem_map);
 	}
-#else /* !CONFIG_VIRTUAL_MEM_MAP */
-	memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
-	free_area_init_nodes(max_zone_pfns);
 #endif /* !CONFIG_VIRTUAL_MEM_MAP */
+	free_area_init_nodes(max_zone_pfns);
 	zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 }
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 7d9bd20319ff..1928d5719e41 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -20,6 +20,7 @@
 #include <linux/nmi.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/acpi.h>
 #include <linux/efi.h>
 #include <linux/nodemask.h>
@@ -38,9 +39,6 @@ struct early_node_data {
 	struct ia64_node_data *node_data;
 	unsigned long pernode_addr;
 	unsigned long pernode_size;
-#ifdef CONFIG_ZONE_DMA32
-	unsigned long num_dma_physpages;
-#endif
 	unsigned long min_pfn;
 	unsigned long max_pfn;
 };
@@ -60,33 +58,31 @@ pg_data_t *pgdat_list[MAX_NUMNODES];
 	     (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1)))
 
 /**
- * build_node_maps - callback to setup bootmem structs for each node
+ * build_node_maps - callback to setup mem_data structs for each node
  * @start: physical start of range
  * @len: length of range
  * @node: node where this range resides
  *
- * We allocate a struct bootmem_data for each piece of memory that we wish to
+ * Detect extents of each piece of memory that we wish to
  * treat as a virtually contiguous block (i.e. each node). Each such block
  * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
  * if necessary.  Any non-existent pages will simply be part of the virtual
- * memmap.  We also update min_low_pfn and max_low_pfn here as we receive
- * memory ranges from the caller.
+ * memmap.
  */
 static int __init build_node_maps(unsigned long start, unsigned long len,
 				  int node)
 {
 	unsigned long spfn, epfn, end = start + len;
-	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
 	spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
 
-	if (!bdp->node_low_pfn) {
-		bdp->node_min_pfn = spfn;
-		bdp->node_low_pfn = epfn;
+	if (!mem_data[node].min_pfn) {
+		mem_data[node].min_pfn = spfn;
+		mem_data[node].max_pfn = epfn;
 	} else {
-		bdp->node_min_pfn = min(spfn, bdp->node_min_pfn);
-		bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
+		mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn);
+		mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn);
 	}
 
 	return 0;
@@ -269,7 +265,6 @@ static void __init fill_pernode(int node, unsigned long pernode,
 {
 	void *cpu_data;
 	int cpus = early_nr_cpus_node(node);
-	struct bootmem_data *bdp = &bootmem_node_data[node];
 
 	mem_data[node].pernode_addr = pernode;
 	mem_data[node].pernode_size = pernodesize;
@@ -284,8 +279,6 @@ static void __init fill_pernode(int node, unsigned long pernode,
 
 	mem_data[node].node_data = __va(pernode);
 	pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
-
-	pgdat_list[node]->bdata = bdp;
 	pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
 
 	cpu_data = per_cpu_node_setup(cpu_data, node);
@@ -325,20 +318,16 @@ static int __init find_pernode_space(unsigned long start, unsigned long len,
 				     int node)
 {
 	unsigned long spfn, epfn;
-	unsigned long pernodesize = 0, pernode, pages, mapsize;
-	struct bootmem_data *bdp = &bootmem_node_data[node];
+	unsigned long pernodesize = 0, pernode;
 
 	spfn = start >> PAGE_SHIFT;
 	epfn = (start + len) >> PAGE_SHIFT;
 
-	pages = bdp->node_low_pfn - bdp->node_min_pfn;
-	mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
-
 	/*
 	 * Make sure this memory falls within this node's usable memory
 	 * since we may have thrown some away in build_maps().
 	 */
-	if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn)
+	if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn)
 		return 0;
 
 	/* Don't setup this node's local space twice... */
@@ -353,32 +342,13 @@ static int __init find_pernode_space(unsigned long start, unsigned long len,
 	pernode = NODEDATA_ALIGN(start, node);
 
 	/* Is this range big enough for what we want to store here? */
-	if (start + len > (pernode + pernodesize + mapsize))
+	if (start + len > (pernode + pernodesize))
 		fill_pernode(node, pernode, pernodesize);
 
 	return 0;
 }
 
 /**
- * free_node_bootmem - free bootmem allocator memory for use
- * @start: physical start of range
- * @len: length of range
- * @node: node where this range resides
- *
- * Simply calls the bootmem allocator to free the specified ranged from
- * the given pg_data_t's bdata struct.  After this function has been called
- * for all the entries in the EFI memory map, the bootmem allocator will
- * be ready to service allocation requests.
- */
-static int __init free_node_bootmem(unsigned long start, unsigned long len,
-				    int node)
-{
-	free_bootmem_node(pgdat_list[node], start, len);
-
-	return 0;
-}
-
-/**
  * reserve_pernode_space - reserve memory for per-node space
  *
  * Reserve the space used by the bootmem maps & per-node space in the boot
@@ -387,28 +357,17 @@ static int __init free_node_bootmem(unsigned long start, unsigned long len,
  */
 static void __init reserve_pernode_space(void)
 {
-	unsigned long base, size, pages;
-	struct bootmem_data *bdp;
+	unsigned long base, size;
 	int node;
 
 	for_each_online_node(node) {
-		pg_data_t *pdp = pgdat_list[node];
-
 		if (node_isset(node, memory_less_mask))
 			continue;
 
-		bdp = pdp->bdata;
-
-		/* First the bootmem_map itself */
-		pages = bdp->node_low_pfn - bdp->node_min_pfn;
-		size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
-		base = __pa(bdp->node_bootmem_map);
-		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
-
 		/* Now the per-node space */
 		size = mem_data[node].pernode_size;
 		base = __pa(mem_data[node].pernode_addr);
-		reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
+		memblock_reserve(base, size);
 	}
 }
 
@@ -528,6 +487,7 @@ void __init find_memory(void)
 	int node;
 
 	reserve_memory();
+	efi_memmap_walk(filter_memory, register_active_ranges);
 
 	if (num_online_nodes() == 0) {
 		printk(KERN_ERR "node info missing!\n");
@@ -544,38 +504,8 @@ void __init find_memory(void)
 	efi_memmap_walk(find_max_min_low_pfn, NULL);
 
 	for_each_online_node(node)
-		if (bootmem_node_data[node].node_low_pfn) {
+		if (mem_data[node].min_pfn)
 			node_clear(node, memory_less_mask);
-			mem_data[node].min_pfn = ~0UL;
-		}
-
-	efi_memmap_walk(filter_memory, register_active_ranges);
-
-	/*
-	 * Initialize the boot memory maps in reverse order since that's
-	 * what the bootmem allocator expects
-	 */
-	for (node = MAX_NUMNODES - 1; node >= 0; node--) {
-		unsigned long pernode, pernodesize, map;
-		struct bootmem_data *bdp;
-
-		if (!node_online(node))
-			continue;
-		else if (node_isset(node, memory_less_mask))
-			continue;
-
-		bdp = &bootmem_node_data[node];
-		pernode = mem_data[node].pernode_addr;
-		pernodesize = mem_data[node].pernode_size;
-		map = pernode + pernodesize;
-
-		init_bootmem_node(pgdat_list[node],
-				  map>>PAGE_SHIFT,
-				  bdp->node_min_pfn,
-				  bdp->node_low_pfn);
-	}
-
-	efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
 
 	reserve_pernode_space();
 	memory_less_nodes();
@@ -655,36 +585,6 @@ void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
 }
 
 /**
- * count_node_pages - callback to build per-node memory info structures
- * @start: physical start of range
- * @len: length of range
- * @node: node where this range resides
- *
- * Each node has it's own number of physical pages, DMAable pages, start, and
- * end page frame number.  This routine will be called by call_pernode_memory()
- * for each piece of usable memory and will setup these values for each node.
- * Very similar to build_maps().
- */
-static __init int count_node_pages(unsigned long start, unsigned long len, int node)
-{
-	unsigned long end = start + len;
-
-#ifdef CONFIG_ZONE_DMA32
-	if (start <= __pa(MAX_DMA_ADDRESS))
-		mem_data[node].num_dma_physpages +=
-			(min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
-#endif
-	start = GRANULEROUNDDOWN(start);
-	end = GRANULEROUNDUP(end);
-	mem_data[node].max_pfn = max(mem_data[node].max_pfn,
-				     end >> PAGE_SHIFT);
-	mem_data[node].min_pfn = min(mem_data[node].min_pfn,
-				     start >> PAGE_SHIFT);
-
-	return 0;
-}
-
-/**
  * paging_init - setup page tables
  *
  * paging_init() sets up the page tables for each node of the system and frees
@@ -700,8 +600,6 @@ void __init paging_init(void)
 
 	max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
-	efi_memmap_walk(filter_rsvd_memory, count_node_pages);
-
 	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
 
diff --git a/arch/m68k/coldfire/clk.c b/arch/m68k/coldfire/clk.c
index 849cd208e2ed..7bc666e482eb 100644
--- a/arch/m68k/coldfire/clk.c
+++ b/arch/m68k/coldfire/clk.c
@@ -129,4 +129,33 @@ unsigned long clk_get_rate(struct clk *clk)
 }
 EXPORT_SYMBOL(clk_get_rate);
 
+/* dummy functions, should not be called */
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	WARN_ON(clk);
+	return 0;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+	WARN_ON(clk);
+	return 0;
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+int clk_set_parent(struct clk *clk, struct clk *parent)
+{
+	WARN_ON(clk);
+	return 0;
+}
+EXPORT_SYMBOL(clk_set_parent);
+
+struct clk *clk_get_parent(struct clk *clk)
+{
+	WARN_ON(clk);
+	return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
 /***************************************************************************/
diff --git a/arch/m68k/include/asm/dma.h b/arch/m68k/include/asm/dma.h
index b0978a23bad1..ae2021964e32 100644
--- a/arch/m68k/include/asm/dma.h
+++ b/arch/m68k/include/asm/dma.h
@@ -390,7 +390,7 @@ static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
 
 #ifdef DEBUG_DMA
   printk("%s(%d): dmanr=%d DMR[%x]=%x DIR[%x]=%x\n", __FILE__, __LINE__,
-         dmanr, (int) &dmalp[MCFDMA_DMR], dmabp[MCFDMA_DMR],
+	 dmanr, (int) &dmalp[MCFDMA_DMR], dmalp[MCFDMA_DMR],
 	 (int) &dmawp[MCFDMA_DIR], dmawp[MCFDMA_DIR]);
 #endif
 }
@@ -421,7 +421,7 @@ static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
 
 #ifdef DEBUG_DMA
   printk("%s(%d): dmanr=%d DMR[%x]=%x SAR[%x]=%08x DAR[%x]=%08x\n",
-	__FILE__, __LINE__, dmanr, (int) &dmawp[MCFDMA_DMR], dmawp[MCFDMA_DMR],
+	__FILE__, __LINE__, dmanr, (int) &dmalp[MCFDMA_DMR], dmalp[MCFDMA_DMR],
 	(int) &dmalp[MCFDMA_DSAR], dmalp[MCFDMA_DSAR],
 	(int) &dmalp[MCFDMA_DDAR], dmalp[MCFDMA_DDAR]);
 #endif
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 6163a39ddeb6..ace5c5bf1836 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -2,6 +2,8 @@ config MICROBLAZE
 	def_bool y
 	select ARCH_NO_SWAP
 	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_NO_COHERENT_DMA_MMAP if !MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
@@ -9,6 +11,8 @@ config MICROBLAZE
 	select TIMER_OF
 	select CLONE_BACKWARDS3
 	select COMMON_CLK
+	select DMA_NONCOHERENT_OPS
+	select DMA_NONCOHERENT_MMAP
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CPU_DEVICES
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index d269dd4b8279..73330360a8e6 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -40,11 +40,11 @@ CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
 ifdef CONFIG_CPU_BIG_ENDIAN
 KBUILD_CFLAGS += -mbig-endian
 KBUILD_AFLAGS += -mbig-endian
-LD += -EB
+LDFLAGS += -EB
 else
 KBUILD_CFLAGS += -mlittle-endian
 KBUILD_AFLAGS += -mlittle-endian
-LD += -EL
+LDFLAGS += -EL
 endif
 
 CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index fe6a6c6e5003..569ba9e670c1 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += bugs.h
 generic-y += compat.h
 generic-y += device.h
 generic-y += div64.h
+generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h
deleted file mode 100644
index add50c1373bf..000000000000
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Implements the generic device dma API for microblaze and the pci
- *
- * Copyright (C) 2009-2010 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2009-2010 PetaLogix
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file is base on powerpc and x86 dma-mapping.h versions
- * Copyright (C) 2004 IBM
- */
-
-#ifndef _ASM_MICROBLAZE_DMA_MAPPING_H
-#define _ASM_MICROBLAZE_DMA_MAPPING_H
-
-/*
- * Available generic sets of operations
- */
-extern const struct dma_map_ops dma_nommu_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-	return &dma_nommu_ops;
-}
-
-#endif	/* _ASM_MICROBLAZE_DMA_MAPPING_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index db8b1fa83452..7b650ab14fa0 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -553,11 +553,6 @@ void __init *early_get_page(void);
 
 extern unsigned long ioremap_bot, ioremap_base;
 
-void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle);
-void consistent_free(size_t size, void *vaddr);
-void consistent_sync(void *vaddr, size_t size, int direction);
-void consistent_sync_page(struct page *page, unsigned long offset,
-	size_t size, int direction);
 unsigned long consistent_virt_to_pfn(void *vaddr);
 
 void setup_memory(void);
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 3145e7dc8ab1..71032cf64669 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -8,29 +8,15 @@
  */
 
 #include <linux/device.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/gfp.h>
 #include <linux/dma-debug.h>
 #include <linux/export.h>
 #include <linux/bug.h>
 #include <asm/cacheflush.h>
 
-static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t flag,
-				       unsigned long attrs)
-{
-	return consistent_alloc(flag, size, dma_handle);
-}
-
-static void dma_nommu_free_coherent(struct device *dev, size_t size,
-				     void *vaddr, dma_addr_t dma_handle,
-				     unsigned long attrs)
-{
-	consistent_free(size, vaddr);
-}
-
-static inline void __dma_sync(unsigned long paddr,
-			      size_t size, enum dma_data_direction direction)
+static void __dma_sync(struct device *dev, phys_addr_t paddr, size_t size,
+		enum dma_data_direction direction)
 {
 	switch (direction) {
 	case DMA_TO_DEVICE:
@@ -45,113 +31,21 @@ static inline void __dma_sync(unsigned long paddr,
 	}
 }
 
-static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
-			     int nents, enum dma_data_direction direction,
-			     unsigned long attrs)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	struct scatterlist *sg;
-	int i;
-
-	/* FIXME this part of code is untested */
-	for_each_sg(sgl, sg, nents, i) {
-		sg->dma_address = sg_phys(sg);
-
-		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-			continue;
-
-		__dma_sync(sg_phys(sg), sg->length, direction);
-	}
-
-	return nents;
-}
-
-static inline dma_addr_t dma_nommu_map_page(struct device *dev,
-					     struct page *page,
-					     unsigned long offset,
-					     size_t size,
-					     enum dma_data_direction direction,
-					     unsigned long attrs)
-{
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		__dma_sync(page_to_phys(page) + offset, size, direction);
-	return page_to_phys(page) + offset;
+	__dma_sync(dev, paddr, size, dir);
 }
 
-static inline void dma_nommu_unmap_page(struct device *dev,
-					 dma_addr_t dma_address,
-					 size_t size,
-					 enum dma_data_direction direction,
-					 unsigned long attrs)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-/* There is not necessary to do cache cleanup
- *
- * phys_to_virt is here because in __dma_sync_page is __virt_to_phys and
- * dma_address is physical address
- */
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		__dma_sync(dma_address, size, direction);
+	__dma_sync(dev, paddr, size, dir);
 }
 
-static inline void
-dma_nommu_sync_single_for_cpu(struct device *dev,
-			       dma_addr_t dma_handle, size_t size,
-			       enum dma_data_direction direction)
-{
-	/*
-	 * It's pointless to flush the cache as the memory segment
-	 * is given to the CPU
-	 */
-
-	if (direction == DMA_FROM_DEVICE)
-		__dma_sync(dma_handle, size, direction);
-}
-
-static inline void
-dma_nommu_sync_single_for_device(struct device *dev,
-				  dma_addr_t dma_handle, size_t size,
-				  enum dma_data_direction direction)
-{
-	/*
-	 * It's pointless to invalidate the cache if the device isn't
-	 * supposed to write to the relevant region
-	 */
-
-	if (direction == DMA_TO_DEVICE)
-		__dma_sync(dma_handle, size, direction);
-}
-
-static inline void
-dma_nommu_sync_sg_for_cpu(struct device *dev,
-			   struct scatterlist *sgl, int nents,
-			   enum dma_data_direction direction)
-{
-	struct scatterlist *sg;
-	int i;
-
-	/* FIXME this part of code is untested */
-	if (direction == DMA_FROM_DEVICE)
-		for_each_sg(sgl, sg, nents, i)
-			__dma_sync(sg->dma_address, sg->length, direction);
-}
-
-static inline void
-dma_nommu_sync_sg_for_device(struct device *dev,
-			      struct scatterlist *sgl, int nents,
-			      enum dma_data_direction direction)
-{
-	struct scatterlist *sg;
-	int i;
-
-	/* FIXME this part of code is untested */
-	if (direction == DMA_TO_DEVICE)
-		for_each_sg(sgl, sg, nents, i)
-			__dma_sync(sg->dma_address, sg->length, direction);
-}
-
-static
-int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
-			     void *cpu_addr, dma_addr_t handle, size_t size,
-			     unsigned long attrs)
+int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t handle, size_t size,
+		unsigned long attrs)
 {
 #ifdef CONFIG_MMU
 	unsigned long user_count = vma_pages(vma);
@@ -170,17 +64,3 @@ int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
 	return -ENXIO;
 #endif
 }
-
-const struct dma_map_ops dma_nommu_ops = {
-	.alloc			= dma_nommu_alloc_coherent,
-	.free			= dma_nommu_free_coherent,
-	.mmap			= dma_nommu_mmap_coherent,
-	.map_sg			= dma_nommu_map_sg,
-	.map_page		= dma_nommu_map_page,
-	.unmap_page		= dma_nommu_unmap_page,
-	.sync_single_for_cpu	= dma_nommu_sync_single_for_cpu,
-	.sync_single_for_device	= dma_nommu_sync_single_for_device,
-	.sync_sg_for_cpu	= dma_nommu_sync_sg_for_cpu,
-	.sync_sg_for_device	= dma_nommu_sync_sg_for_device,
-};
-EXPORT_SYMBOL(dma_nommu_ops);
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index 4655ff342c64..f264fdcf152a 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -341,11 +341,6 @@ start_here:
 	/* Initialize r31 with current task address */
 	addik	r31, r0, init_task
 
-	/*
-	 * Call platform dependent initialize function.
-	 * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for
-	 * the function.
-	 */
 	addik	r11, r0, machine_early_init
 	brald	r15, r11
 	nop
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index b06c3a7faf20..c9a278ac795a 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -33,6 +33,7 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/gfp.h>
+#include <linux/dma-noncoherent.h>
 
 #include <asm/pgalloc.h>
 #include <linux/io.h>
@@ -59,7 +60,8 @@
  * uncached region.  This will no doubt cause big problems if memory allocated
  * here is not also freed properly. -- JW
  */
-void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
 {
 	unsigned long order, vaddr;
 	void *ret;
@@ -154,7 +156,6 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
 
 	return ret;
 }
-EXPORT_SYMBOL(consistent_alloc);
 
 #ifdef CONFIG_MMU
 static pte_t *consistent_virt_to_pte(void *vaddr)
@@ -178,7 +179,8 @@ unsigned long consistent_virt_to_pfn(void *vaddr)
 /*
  * free page(s) as defined by the above mapping.
  */
-void consistent_free(size_t size, void *vaddr)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+		dma_addr_t dma_addr, unsigned long attrs)
 {
 	struct page *page;
 
@@ -218,49 +220,3 @@ void consistent_free(size_t size, void *vaddr)
 	flush_tlb_all();
 #endif
 }
-EXPORT_SYMBOL(consistent_free);
-
-/*
- * make an area consistent.
- */
-void consistent_sync(void *vaddr, size_t size, int direction)
-{
-	unsigned long start;
-	unsigned long end;
-
-	start = (unsigned long)vaddr;
-
-	/* Convert start address back down to unshadowed memory region */
-#ifdef CONFIG_XILINX_UNCACHED_SHADOW
-	start &= ~UNCACHED_SHADOW_MASK;
-#endif
-	end = start + size;
-
-	switch (direction) {
-	case PCI_DMA_NONE:
-		BUG();
-	case PCI_DMA_FROMDEVICE:	/* invalidate only */
-		invalidate_dcache_range(start, end);
-		break;
-	case PCI_DMA_TODEVICE:		/* writeback only */
-		flush_dcache_range(start, end);
-		break;
-	case PCI_DMA_BIDIRECTIONAL:	/* writeback and invalidate */
-		flush_dcache_range(start, end);
-		break;
-	}
-}
-EXPORT_SYMBOL(consistent_sync);
-
-/*
- * consistent_sync_page makes memory consistent. identical
- * to consistent_sync, but takes a struct page instead of a
- * virtual address
- */
-void consistent_sync_page(struct page *page, unsigned long offset,
-	size_t size, int direction)
-{
-	unsigned long start = (unsigned long)page_address(page) + offset;
-	consistent_sync((void *)start, size, direction);
-}
-EXPORT_SYMBOL(consistent_sync_page);
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index f34346d56095..2ffd171af8b6 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -597,19 +597,6 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
 
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- *
- * Why? Because some silly external IO cards only decode
- * the low 10 bits of the IO address. The 0x00-0xff region
- * is reserved for motherboard devices that decode all 16
- * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
- * but we want to try to avoid allocating at 0x2900-0x2bff
- * which might have be mirrored at 0x0100-0x03ff..
- */
 int pcibios_add_device(struct pci_dev *dev)
 {
 	dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
diff --git a/arch/microblaze/pci/xilinx_pci.c b/arch/microblaze/pci/xilinx_pci.c
index 14c7da5fd039..b800909ddccf 100644
--- a/arch/microblaze/pci/xilinx_pci.c
+++ b/arch/microblaze/pci/xilinx_pci.c
@@ -157,6 +157,7 @@ void __init xilinx_pci_init(void)
 
 	/* Set the max bus number to 255, and bus/subbus no's to 0 */
 	pci_reg = of_iomap(pci_node, 0);
+	WARN_ON(!pci_reg);
 	out_be32(pci_reg + XPLB_PCI_BUS, 0x000000ff);
 	iounmap(pci_reg);
 
diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c
index f7a0645ccb82..4aaff3b3175c 100644
--- a/arch/mips/kernel/uprobes.c
+++ b/arch/mips/kernel/uprobes.c
@@ -224,7 +224,7 @@ unsigned long arch_uretprobe_hijack_return_addr(
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+	return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
 }
 
 void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index f019d3ec0c1c..d00973aab7f1 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -235,6 +235,7 @@ typedef unsigned long elf_greg_t;
 #define SET_PERSONALITY(ex) \
 ({	\
 	set_personality((current->personality & ~PER_MASK) | PER_LINUX); \
+	clear_thread_flag(TIF_32BIT); \
 	current->thread.map_base = DEFAULT_MAP_BASE; \
 	current->thread.task_size = DEFAULT_TASK_SIZE; \
  })
@@ -243,9 +244,11 @@ typedef unsigned long elf_greg_t;
 
 #define COMPAT_SET_PERSONALITY(ex) \
 ({	\
-	set_thread_flag(TIF_32BIT); \
-	current->thread.map_base = DEFAULT_MAP_BASE32; \
-	current->thread.task_size = DEFAULT_TASK_SIZE32; \
+	if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \
+		set_thread_flag(TIF_32BIT); \
+		current->thread.map_base = DEFAULT_MAP_BASE32; \
+		current->thread.task_size = DEFAULT_TASK_SIZE32; \
+	} else clear_thread_flag(TIF_32BIT); \
  })
 
 /*
diff --git a/arch/parisc/include/asm/linkage.h b/arch/parisc/include/asm/linkage.h
index 49f6f3d772cc..cd6fe4febead 100644
--- a/arch/parisc/include/asm/linkage.h
+++ b/arch/parisc/include/asm/linkage.h
@@ -22,15 +22,6 @@
 name:		ASM_NL\
 	.export name
 
-#ifdef CONFIG_64BIT
-#define ENDPROC(name) \
-	END(name)
-#else
-#define ENDPROC(name) \
-	.type name, @function !\
-	END(name)
-#endif
-
 #define ENTRY_CFI(name, ...) \
 	ENTRY(name)	ASM_NL\
 	.proc		ASM_NL\
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 2dbe5580a1a4..2bd5e695bdad 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -256,11 +256,7 @@ on downward growing arches, it looks like this:
  * it in here from the current->personality
  */
 
-#ifdef CONFIG_64BIT
-#define USER_WIDE_MODE	(!test_thread_flag(TIF_32BIT))
-#else
-#define USER_WIDE_MODE	0
-#endif
+#define USER_WIDE_MODE	(!is_32bit_task())
 
 #define start_thread(regs, new_pc, new_sp) do {		\
 	elf_addr_t *sp = (elf_addr_t *)new_sp;		\
diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h
index e00013248907..8ecc1f0c0483 100644
--- a/arch/parisc/include/asm/traps.h
+++ b/arch/parisc/include/asm/traps.h
@@ -2,7 +2,9 @@
 #ifndef __ASM_TRAPS_H
 #define __ASM_TRAPS_H
 
-#ifdef __KERNEL__
+#define PARISC_ITLB_TRAP	6 /* defined by architecture. Do not change. */
+
+#if !defined(__ASSEMBLY__)
 struct pt_regs;
 
 /* traps.c */
diff --git a/arch/parisc/include/asm/unwind.h b/arch/parisc/include/asm/unwind.h
index f133b7efbebb..9547e5261a8b 100644
--- a/arch/parisc/include/asm/unwind.h
+++ b/arch/parisc/include/asm/unwind.h
@@ -73,8 +73,10 @@ unwind_table_remove(struct unwind_table *table);
 
 void unwind_frame_init(struct unwind_frame_info *info, struct task_struct *t, 
 		       struct pt_regs *regs);
-void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct task_struct *t);
-void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *regs);
+void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info,
+			struct task_struct *t);
+void unwind_frame_init_task(struct unwind_frame_info *info,
+			struct task_struct *task, struct pt_regs *regs);
 int unwind_once(struct unwind_frame_info *info);
 int unwind_to_user(struct unwind_frame_info *info);
 
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index c7508f5717fb..242c5ab65611 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -36,6 +36,7 @@
 #include <asm/signal.h>
 #include <asm/unistd.h>
 #include <asm/ldcw.h>
+#include <asm/traps.h>
 #include <asm/thread_info.h>
 
 #include <linux/linkage.h>
@@ -692,7 +693,7 @@ ENTRY(fault_vector_20)
 	def		 3
 	extint		 4
 	def		 5
-	itlb_20		 6
+	itlb_20		 PARISC_ITLB_TRAP
 	def		 7
 	def		 8
 	def              9
@@ -735,7 +736,7 @@ ENTRY(fault_vector_11)
 	def		 3
 	extint		 4
 	def		 5
-	itlb_11		 6
+	itlb_11		 PARISC_ITLB_TRAP
 	def		 7
 	def		 8
 	def              9
@@ -776,7 +777,7 @@ END(fault_vector_11)
 	 * copy_thread moved args into task save area.
 	 */
 
-ENTRY_CFI(ret_from_kernel_thread)
+ENTRY(ret_from_kernel_thread)
 	/* Call schedule_tail first though */
 	BL	schedule_tail, %r2
 	nop
@@ -791,7 +792,7 @@ ENTRY_CFI(ret_from_kernel_thread)
 	copy	%r31, %r2
 	b	finish_child_return
 	nop
-ENDPROC_CFI(ret_from_kernel_thread)
+END(ret_from_kernel_thread)
 
 
 	/*
@@ -815,9 +816,8 @@ ENTRY_CFI(_switch_to)
 	LDREG	TASK_THREAD_INFO(%r25), %r25
 	bv	%r0(%r2)
 	mtctl   %r25,%cr30
-ENDPROC_CFI(_switch_to)
 
-ENTRY_CFI(_switch_to_ret)
+ENTRY(_switch_to_ret)
 	mtctl	%r0, %cr0		/* Needed for single stepping */
 	callee_rest
 	callee_rest_float
@@ -825,7 +825,7 @@ ENTRY_CFI(_switch_to_ret)
 	LDREG	-RP_OFFSET(%r30), %r2
 	bv	%r0(%r2)
 	copy	%r26, %r28
-ENDPROC_CFI(_switch_to_ret)
+ENDPROC_CFI(_switch_to)
 
 	/*
 	 * Common rfi return path for interruptions, kernel execve, and
@@ -886,14 +886,12 @@ ENTRY_CFI(syscall_exit_rfi)
 	STREG   %r19,PT_SR5(%r16)
 	STREG   %r19,PT_SR6(%r16)
 	STREG   %r19,PT_SR7(%r16)
-ENDPROC_CFI(syscall_exit_rfi)
 
-ENTRY_CFI(intr_return)
+ENTRY(intr_return)
 	/* check for reschedule */
 	mfctl   %cr30,%r1
 	LDREG   TI_FLAGS(%r1),%r19	/* sched.h: TIF_NEED_RESCHED */
 	bb,<,n	%r19,31-TIF_NEED_RESCHED,intr_do_resched /* forward */
-ENDPROC_CFI(intr_return)
 
 	.import do_notify_resume,code
 intr_check_sig:
@@ -1049,6 +1047,7 @@ intr_extint:
 
 	b	do_cpu_irq_mask
 	ldo	R%intr_return(%r2), %r2	/* return to intr_return, not here */
+ENDPROC_CFI(syscall_exit_rfi)
 
 
 	/* Generic interruptions (illegal insn, unaligned, page fault, etc) */
@@ -1068,21 +1067,12 @@ ENTRY_CFI(intr_save)		/* for os_hpmc */
 	save_specials	%r29
 
 	/* If this trap is a itlb miss, skip saving/adjusting isr/ior */
-
-	/*
-	 * FIXME: 1) Use a #define for the hardwired "6" below (and in
-	 *           traps.c.
-	 *        2) Once we start executing code above 4 Gb, we need
-	 *           to adjust iasq/iaoq here in the same way we
-	 *           adjust isr/ior below.
-	 */
-
-	cmpib,COND(=),n        6,%r26,skip_save_ior
+	cmpib,COND(=),n        PARISC_ITLB_TRAP,%r26,skip_save_ior
 
 
-	mfctl           %cr20, %r16 /* isr */
+	mfctl           %isr, %r16
 	nop		/* serialize mfctl on PA 2.0 to avoid 4 cycle penalty */
-	mfctl           %cr21, %r17 /* ior */
+	mfctl           %ior, %r17
 
 
 #ifdef CONFIG_64BIT
@@ -1094,22 +1084,34 @@ ENTRY_CFI(intr_save)		/* for os_hpmc */
 	extrd,u,*<>     %r8,PSW_W_BIT,1,%r0
 	depdi           0,1,2,%r17
 
-	/*
-	 * FIXME: This code has hardwired assumptions about the split
-	 *        between space bits and offset bits. This will change
-	 *        when we allow alternate page sizes.
-	 */
-
-	/* adjust isr/ior. */
-	extrd,u         %r16,63,SPACEID_SHIFT,%r1	/* get high bits from isr for ior */
-	depd            %r1,31,SPACEID_SHIFT,%r17	/* deposit them into ior */
-	depdi           0,63,SPACEID_SHIFT,%r16		/* clear them from isr */
+	/* adjust isr/ior: get high bits from isr and deposit in ior */
+	space_adjust	%r16,%r17,%r1
 #endif
 	STREG           %r16, PT_ISR(%r29)
 	STREG           %r17, PT_IOR(%r29)
 
+#if 0 && defined(CONFIG_64BIT)
+	/* Revisit when we have 64-bit code above 4Gb */
+	b,n		intr_save2
 
 skip_save_ior:
+	/* We have a itlb miss, and when executing code above 4 Gb on ILP64, we
+	 * need to adjust iasq/iaoq here in the same way we adjusted isr/ior
+	 * above.
+	 */
+	extrd,u,*	%r8,PSW_W_BIT,1,%r1
+	cmpib,COND(=),n	1,%r1,intr_save2
+	LDREG		PT_IASQ0(%r29), %r16
+	LDREG		PT_IAOQ0(%r29), %r17
+	/* adjust iasq/iaoq */
+	space_adjust	%r16,%r17,%r1
+	STREG           %r16, PT_IASQ0(%r29)
+	STREG           %r17, PT_IAOQ0(%r29)
+#else
+skip_save_ior:
+#endif
+
+intr_save2:
 	virt_map
 	save_general	%r29
 
@@ -1747,7 +1749,7 @@ fork_like fork
 fork_like vfork
 
 	/* Set the return value for the child */
-ENTRY_CFI(child_return)
+ENTRY(child_return)
 	BL	schedule_tail, %r2
 	nop
 finish_child_return:
@@ -1759,7 +1761,7 @@ finish_child_return:
 	reg_restore %r1
 	b	syscall_exit
 	copy	%r0,%r28
-ENDPROC_CFI(child_return)
+END(child_return)
 
 ENTRY_CFI(sys_rt_sigreturn_wrapper)
 	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r26
@@ -1791,7 +1793,7 @@ ENTRY_CFI(sys_rt_sigreturn_wrapper)
 	LDREG	PT_GR28(%r1),%r28  /* reload original r28 for syscall_exit */
 ENDPROC_CFI(sys_rt_sigreturn_wrapper)
 
-ENTRY_CFI(syscall_exit)
+ENTRY(syscall_exit)
 	/* NOTE: Not all syscalls exit this way.  rt_sigreturn will exit
 	 * via syscall_exit_rfi if the signal was received while the process
 	 * was running.
@@ -1990,15 +1992,13 @@ syscall_do_resched:
 #else
 	nop
 #endif
-ENDPROC_CFI(syscall_exit)
+END(syscall_exit)
 
 
 #ifdef CONFIG_FUNCTION_TRACER
 
 	.import ftrace_function_trampoline,code
 	.align L1_CACHE_BYTES
-	.globl mcount
-	.type  mcount, @function
 ENTRY_CFI(mcount, caller)
 _mcount:
 	.export _mcount,data
@@ -2027,8 +2027,6 @@ ENDPROC_CFI(mcount)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	.align 8
-	.globl return_to_handler
-	.type  return_to_handler, @function
 ENTRY_CFI(return_to_handler, caller,frame=FRAME_SIZE)
 	.export parisc_return_to_handler,data
 parisc_return_to_handler:
@@ -2078,6 +2076,7 @@ ENDPROC_CFI(return_to_handler)
 /* void call_on_stack(unsigned long param1, void *func,
 		      unsigned long new_stack) */
 ENTRY_CFI(call_on_stack, FRAME=2*FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP)
+ENTRY(_call_on_stack)
 	copy	%sp, %r1
 
 	/* Regarding the HPPA calling conventions for function pointers,
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 45cc65902fce..82bd0d0927ce 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -288,6 +288,8 @@ void __init collect_boot_cpu_data(void)
 		printk(KERN_INFO "model %s\n",
 			boot_cpu_data.pdc.sys_model_name);
 
+	dump_stack_set_arch_desc("%s", boot_cpu_data.pdc.sys_model_name);
+
 	boot_cpu_data.hversion =  boot_cpu_data.pdc.model.hversion;
 	boot_cpu_data.sversion =  boot_cpu_data.pdc.model.sversion;
 
diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c
index 2fe914c5f533..ec5835e83a7a 100644
--- a/arch/parisc/kernel/stacktrace.c
+++ b/arch/parisc/kernel/stacktrace.c
@@ -16,20 +16,7 @@ static void dump_trace(struct task_struct *task, struct stack_trace *trace)
 {
 	struct unwind_frame_info info;
 
-	/* initialize unwind info */
-	if (task == current) {
-		unsigned long sp;
-		struct pt_regs r;
-HERE:
-		asm volatile ("copy %%r30, %0" : "=r"(sp));
-		memset(&r, 0, sizeof(struct pt_regs));
-		r.iaoq[0] = (unsigned long)&&HERE;
-		r.gr[2] = (unsigned long)__builtin_return_address(0);
-		r.gr[30] = sp;
-		unwind_frame_init(&info, task, &r);
-	} else {
-		unwind_frame_init_from_blocked_task(&info, task);
-	}
+	unwind_frame_init_task(&info, task, NULL);
 
 	/* unwind stack and save entries in stack_trace struct */
 	trace->nr_entries = 0;
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 43b308cfdf53..376ea0d1b275 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -156,11 +156,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	int do_color_align, last_mmap;
 	struct vm_unmapped_area_info info;
 
-#ifdef CONFIG_64BIT
-	/* This should only ever run for 32-bit processes.  */
-	BUG_ON(!test_thread_flag(TIF_32BIT));
-#endif
-
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE)
 		return -ENOMEM;
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 5f7e57fcaeef..f453997a7b8f 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -108,12 +108,8 @@ linux_gateway_entry:
 	mtsp	%r0,%sr6			/* get kernel space into sr6 */
 
 #ifdef CONFIG_64BIT
-	/* for now we can *always* set the W bit on entry to the syscall
-	 * since we don't support wide userland processes.  We could
-	 * also save the current SM other than in r0 and restore it on
-	 * exit from the syscall, and also use that value to know
-	 * whether to do narrow or wide syscalls. -PB
-	 */
+	/* Store W bit on entry to the syscall in case it's a wide userland
+	 * process. */
 	ssm	PSW_SM_W, %r1
 	extrd,u	%r1,PSW_W_BIT,1,%r1
 	/* sp must be aligned on 4, so deposit the W bit setting into
@@ -227,8 +223,7 @@ linux_gateway_entry:
 	or,=	%r2,%r2,%r2
 	ldo	R%sys_call_table64(%r1), %r19
 #else
-	ldil	L%sys_call_table, %r1
-	ldo     R%sys_call_table(%r1), %r19
+	load32	sys_call_table, %r19
 #endif	
 	comiclr,>>	__NR_Linux_syscalls, %r20, %r0
 	b,n	.Lsyscall_nosys
@@ -331,8 +326,6 @@ tracesys_next:
 	 *  task->thread.regs.gr[20] above.
 	 */
 	copy	%ret0,%r20
-	ldil	L%sys_call_table,%r1
-	ldo     R%sys_call_table(%r1), %r19
 
 	ldo     -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1      /* get task ptr */
 	LDREG	TI_TASK(%r1), %r1
@@ -354,6 +347,23 @@ tracesys_next:
 	comiclr,>>	__NR_Linux_syscalls, %r20, %r0
 	b,n	.Ltracesys_nosys
 
+	/* Note!  We cannot use the syscall table that is mapped
+	nearby since the gateway page is mapped execute-only. */
+
+#ifdef CONFIG_64BIT
+	LDREG	TASK_PT_GR30(%r1), %r19		/* get users sp back */
+	extrd,u	%r19,63,1,%r2			/* W hidden in bottom bit */
+
+	ldil	L%sys_call_table, %r1
+	or,=	%r2,%r2,%r2
+	addil	L%(sys_call_table64-sys_call_table), %r1
+	ldo	R%sys_call_table(%r1), %r19
+	or,=	%r2,%r2,%r2
+	ldo	R%sys_call_table64(%r1), %r19
+#else
+	load32	sys_call_table, %r19
+#endif
+
 	LDREGX  %r20(%r19), %r19
 
 	/* If this is a sys_rt_sigreturn call, and the signal was received
@@ -464,16 +474,13 @@ tracesys_sigexit:
 lws_start:
 
 #ifdef CONFIG_64BIT
-	/* FIXME: If we are a 64-bit kernel just
-	 *        turn this on unconditionally.
-	 */
 	ssm	PSW_SM_W, %r1
 	extrd,u	%r1,PSW_W_BIT,1,%r1
 	/* sp must be aligned on 4, so deposit the W bit setting into
 	 * the bottom of sp temporarily */
 	or,ev	%r1,%r30,%r30
 
-	/* Clip LWS number to a 32-bit value always */
+	/* Clip LWS number to a 32-bit value for 32-bit processes */
 	depdi	0, 31, 32, %r20
 #endif	
 
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 318815212518..68f10f87073d 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -45,7 +45,7 @@
 
 #include "../math-emu/math-emu.h"	/* for handle_fpe() */
 
-static void parisc_show_stack(struct task_struct *task, unsigned long *sp,
+static void parisc_show_stack(struct task_struct *task,
 	struct pt_regs *regs);
 
 static int printbinary(char *buf, unsigned long x, int nbits)
@@ -152,7 +152,7 @@ void show_regs(struct pt_regs *regs)
 		printk("%s IAOQ[1]: %pS\n", level, (void *) regs->iaoq[1]);
 		printk("%s RP(r2): %pS\n", level, (void *) regs->gr[2]);
 
-		parisc_show_stack(current, NULL, regs);
+		parisc_show_stack(current, regs);
 	}
 }
 
@@ -185,44 +185,19 @@ static void do_show_stack(struct unwind_frame_info *info)
 	printk(KERN_CRIT "\n");
 }
 
-static void parisc_show_stack(struct task_struct *task, unsigned long *sp,
+static void parisc_show_stack(struct task_struct *task,
 	struct pt_regs *regs)
 {
 	struct unwind_frame_info info;
-	struct task_struct *t;
 
-	t = task ? task : current;
-	if (regs) {
-		unwind_frame_init(&info, t, regs);
-		goto show_stack;
-	}
-
-	if (t == current) {
-		unsigned long sp;
-
-HERE:
-		asm volatile ("copy %%r30, %0" : "=r"(sp));
-		{
-			struct pt_regs r;
-
-			memset(&r, 0, sizeof(struct pt_regs));
-			r.iaoq[0] = (unsigned long)&&HERE;
-			r.gr[2] = (unsigned long)__builtin_return_address(0);
-			r.gr[30] = sp;
-
-			unwind_frame_init(&info, current, &r);
-		}
-	} else {
-		unwind_frame_init_from_blocked_task(&info, t);
-	}
+	unwind_frame_init_task(&info, task, regs);
 
-show_stack:
 	do_show_stack(&info);
 }
 
 void show_stack(struct task_struct *t, unsigned long *sp)
 {
-	return parisc_show_stack(t, sp, NULL);
+	parisc_show_stack(t, NULL);
 }
 
 int is_valid_bugaddr(unsigned long iaoq)
@@ -557,7 +532,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
 		cpu_lpmc(5, regs);
 		return;
 
-	case  6:
+	case  PARISC_ITLB_TRAP:
 		/* Instruction TLB miss fault/Instruction page fault */
 		fault_address = regs->iaoq[0];
 		fault_space   = regs->iasq[0];
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5cdf13069dd9..f329b466e68f 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -209,6 +209,8 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
 	 * We have to use void * instead of a function pointer, because
 	 * function pointers aren't a pointer to the function on 64-bit.
 	 * Make them const so the compiler knows they live in .text
+	 * Note: We could use dereference_kernel_function_descriptor()
+	 * instead but we want to keep it simple here.
 	 */
 	extern void * const handle_interruption;
 	extern void * const ret_from_kernel_thread;
@@ -216,7 +218,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
 	extern void * const intr_return;
 	extern void * const _switch_to_ret;
 #ifdef CONFIG_IRQSTACKS
-	extern void * const call_on_stack;
+	extern void * const _call_on_stack;
 #endif /* CONFIG_IRQSTACKS */
 
 	if (pc == (unsigned long) &handle_interruption) {
@@ -251,7 +253,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
 	}
 
 #ifdef CONFIG_IRQSTACKS
-	if (pc == (unsigned long) &call_on_stack) {
+	if (pc == (unsigned long) &_call_on_stack) {
 		info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ);
 		info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET);
 		return 1;
@@ -403,9 +405,31 @@ void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct
 	kfree(r2);
 }
 
-void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *regs)
+#define get_parisc_stackpointer() ({ \
+	unsigned long sp; \
+	__asm__("copy %%r30, %0" : "=r"(sp)); \
+	(sp); \
+})
+
+void unwind_frame_init_task(struct unwind_frame_info *info,
+	struct task_struct *task, struct pt_regs *regs)
 {
-	unwind_frame_init(info, current, regs);
+	task = task ? task : current;
+
+	if (task == current) {
+		struct pt_regs r;
+
+		if (!regs) {
+			memset(&r, 0, sizeof(r));
+			r.iaoq[0] =  _THIS_IP_;
+			r.gr[2] = _RET_IP_;
+			r.gr[30] = get_parisc_stackpointer();
+			regs = &r;
+		}
+		unwind_frame_init(info, task, &r);
+	} else {
+		unwind_frame_init_from_blocked_task(info, task);
+	}
 }
 
 int unwind_once(struct unwind_frame_info *next_frame)
@@ -442,19 +466,12 @@ int unwind_to_user(struct unwind_frame_info *info)
 unsigned long return_address(unsigned int level)
 {
 	struct unwind_frame_info info;
-	struct pt_regs r;
-	unsigned long sp;
 
 	/* initialize unwind info */
-	asm volatile ("copy %%r30, %0" : "=r"(sp));
-	memset(&r, 0, sizeof(struct pt_regs));
-	r.iaoq[0] = _THIS_IP_;
-	r.gr[2] = _RET_IP_;
-	r.gr[30] = sp;
-	unwind_frame_init(&info, current, &r);
+	unwind_frame_init_task(&info, current, NULL);
 
 	/* unwind stack */
-	++level;
+	level += 2;
 	do {
 		if (unwind_once(&info) < 0 || info.ip == 0)
 			return 0;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a80669209155..db0b6eebbfa5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -177,6 +177,7 @@ config PPC
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_MMAP_RND_BITS
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if COMPAT
+	select HAVE_ARCH_PREL32_RELOCATIONS
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_CBPF_JIT			if !PPC64
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 1f345a0b6ba2..83a9aa3cf689 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
 #define SPLIT_HACK_MASK			0xff000000
 #define SPLIT_HACK_OFFS			0xfb000000
 
+/*
+ * This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
+ * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
+ * (but not its actual threading mode, which is not available) to avoid
+ * collisions.
+ *
+ * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
+ * 0) unchanged: if the guest is filling each VCORE completely then it will be
+ * using consecutive IDs and it will fill the space without any packing.
+ *
+ * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
+ * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
+ * added to avoid collisions.
+ *
+ * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
+ * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
+ * can be safely packed into the second half of each VCORE by adding an offset
+ * of (stride / 2).
+ *
+ * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
+ * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
+ * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
+ *
+ * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
+ * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
+ * must be free to use.
+ *
+ * (The offsets for each block are stored in block_offsets[], indexed by the
+ * block number if the stride is 8. For cases where the guest's stride is less
+ * than 8, we can re-use the block_offsets array by multiplying the block
+ * number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
+ */
+static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
+{
+	const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
+	int stride = kvm->arch.emul_smt_mode;
+	int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
+	u32 packed_id;
+
+	if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
+		return 0;
+	packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
+	if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
+		return 0;
+	return packed_id;
+}
+
 #endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index fa4efa7e88f7..906bcbdfd2a1 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -42,7 +42,14 @@
 #define KVM_USER_MEM_SLOTS	512
 
 #include <asm/cputhreads.h>
-#define KVM_MAX_VCPU_ID                (threads_per_subcore * KVM_MAX_VCORES)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#include <asm/kvm_book3s_asm.h>		/* for MAX_SMT_THREADS */
+#define KVM_MAX_VCPU_ID		(MAX_SMT_THREADS * KVM_MAX_VCORES)
+
+#else
+#define KVM_MAX_VCPU_ID		KVM_MAX_VCPUS
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
@@ -672,7 +679,7 @@ struct kvm_vcpu_arch {
 	gva_t vaddr_accessed;
 	pgd_t *pgdir;
 
-	u8 io_gpr; /* GPR used as IO source/target */
+	u16 io_gpr; /* GPR used as IO source/target */
 	u8 mmio_host_swabbed;
 	u8 mmio_sign_extend;
 	/* conversion between single and double precision */
@@ -688,7 +695,6 @@ struct kvm_vcpu_arch {
 	 */
 	u8 mmio_vsx_copy_nums;
 	u8 mmio_vsx_offset;
-	u8 mmio_vsx_tx_sx_enabled;
 	u8 mmio_vmx_copy_nums;
 	u8 mmio_vmx_offset;
 	u8 mmio_copy_type;
@@ -801,14 +807,14 @@ struct kvm_vcpu_arch {
 #define KVMPPC_VCPU_BUSY_IN_HOST	2
 
 /* Values for vcpu->arch.io_gpr */
-#define KVM_MMIO_REG_MASK	0x001f
-#define KVM_MMIO_REG_EXT_MASK	0xffe0
+#define KVM_MMIO_REG_MASK	0x003f
+#define KVM_MMIO_REG_EXT_MASK	0xffc0
 #define KVM_MMIO_REG_GPR	0x0000
-#define KVM_MMIO_REG_FPR	0x0020
-#define KVM_MMIO_REG_QPR	0x0040
-#define KVM_MMIO_REG_FQPR	0x0060
-#define KVM_MMIO_REG_VSX	0x0080
-#define KVM_MMIO_REG_VMX	0x00c0
+#define KVM_MMIO_REG_FPR	0x0040
+#define KVM_MMIO_REG_QPR	0x0080
+#define KVM_MMIO_REG_FQPR	0x00c0
+#define KVM_MMIO_REG_VSX	0x0100
+#define KVM_MMIO_REG_VMX	0x0180
 
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 486b7c83b8c5..e5b314ed054e 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -163,7 +163,7 @@
 #define PSSCR_ESL		0x00200000 /* Enable State Loss */
 #define PSSCR_SD		0x00400000 /* Status Disable */
 #define PSSCR_PLS	0xf000000000000000 /* Power-saving Level Status */
-#define PSSCR_GUEST_VIS	0xf0000000000003ff /* Guest-visible PSSCR fields */
+#define PSSCR_GUEST_VIS	0xf0000000000003ffUL /* Guest-visible PSSCR fields */
 #define PSSCR_FAKE_SUSPEND	0x00000400 /* Fake-suspend bit (P9 DD2.2) */
 #define PSSCR_FAKE_SUSPEND_LG	10	   /* Fake-suspend bit position */
 
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index e8afdd4f4814..9a3f2646ecc7 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -179,7 +179,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
 		if ((tbltmp->it_page_shift <= stt->page_shift) &&
 				(tbltmp->it_offset << tbltmp->it_page_shift ==
 				 stt->offset << stt->page_shift) &&
-				(tbltmp->it_size << tbltmp->it_page_shift ==
+				(tbltmp->it_size << tbltmp->it_page_shift >=
 				 stt->size << stt->page_shift)) {
 			/*
 			 * Reference the table to avoid races with
@@ -295,7 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 {
 	struct kvmppc_spapr_tce_table *stt = NULL;
 	struct kvmppc_spapr_tce_table *siter;
-	unsigned long npages, size;
+	unsigned long npages, size = args->size;
 	int ret = -ENOMEM;
 	int i;
 
@@ -303,7 +303,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 		(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
 		return -EINVAL;
 
-	size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
 	npages = kvmppc_tce_pages(size);
 	ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
 	if (ret)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 96f1cc6c97b7..574fc1dcb2bf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -127,14 +127,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
  * and SPURR count and should be set according to the number of
  * online threads in the vcore being run.
  */
-#define RWMR_RPA_P8_1THREAD	0x164520C62609AECA
-#define RWMR_RPA_P8_2THREAD	0x7FFF2908450D8DA9
-#define RWMR_RPA_P8_3THREAD	0x164520C62609AECA
-#define RWMR_RPA_P8_4THREAD	0x199A421245058DA9
-#define RWMR_RPA_P8_5THREAD	0x164520C62609AECA
-#define RWMR_RPA_P8_6THREAD	0x164520C62609AECA
-#define RWMR_RPA_P8_7THREAD	0x164520C62609AECA
-#define RWMR_RPA_P8_8THREAD	0x164520C62609AECA
+#define RWMR_RPA_P8_1THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_2THREAD	0x7FFF2908450D8DA9UL
+#define RWMR_RPA_P8_3THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_4THREAD	0x199A421245058DA9UL
+#define RWMR_RPA_P8_5THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_6THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_7THREAD	0x164520C62609AECAUL
+#define RWMR_RPA_P8_8THREAD	0x164520C62609AECAUL
 
 static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
 	RWMR_RPA_P8_1THREAD,
@@ -1807,7 +1807,7 @@ static int threads_per_vcore(struct kvm *kvm)
 	return threads_per_subcore;
 }
 
-static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
 {
 	struct kvmppc_vcore *vcore;
 
@@ -1821,7 +1821,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 	init_swait_queue_head(&vcore->wq);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
-	vcore->first_vcpuid = core * kvm->arch.smt_mode;
+	vcore->first_vcpuid = id;
 	vcore->kvm = kvm;
 	INIT_LIST_HEAD(&vcore->preempt_list);
 
@@ -2037,12 +2037,26 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	mutex_lock(&kvm->lock);
 	vcore = NULL;
 	err = -EINVAL;
-	core = id / kvm->arch.smt_mode;
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
+			pr_devel("KVM: VCPU ID too high\n");
+			core = KVM_MAX_VCORES;
+		} else {
+			BUG_ON(kvm->arch.smt_mode != 1);
+			core = kvmppc_pack_vcpu_id(kvm, id);
+		}
+	} else {
+		core = id / kvm->arch.smt_mode;
+	}
 	if (core < KVM_MAX_VCORES) {
 		vcore = kvm->arch.vcores[core];
-		if (!vcore) {
+		if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
+			pr_devel("KVM: collision on id %u", id);
+			vcore = NULL;
+		} else if (!vcore) {
 			err = -ENOMEM;
-			vcore = kvmppc_vcore_create(kvm, core);
+			vcore = kvmppc_vcore_create(kvm,
+					id & ~(kvm->arch.smt_mode - 1));
 			kvm->arch.vcores[core] = vcore;
 			kvm->arch.online_vcores++;
 		}
@@ -4550,6 +4564,8 @@ static int kvmppc_book3s_init_hv(void)
 			pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
 			return -ENODEV;
 		}
+		/* presence of intc confirmed - node can be dropped again */
+		of_node_put(np);
 	}
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index f9818d7d3381..126f02b3ffb8 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
 	return -EBUSY;
 }
 
+static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
+{
+	return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
+}
+
 static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
 			     struct kvmppc_xive_src_block *sb,
 			     struct kvmppc_xive_irq_state *state)
@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
 	 */
 	if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
 		xive_native_configure_irq(hw_num,
-					  xive->vp_base + state->act_server,
+					  xive_vp(xive, state->act_server),
 					  MASKED, state->number);
 		/* set old_p so we can track if an H_EOI was done */
 		state->old_p = true;
@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
 	 */
 	if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
 		xive_native_configure_irq(hw_num,
-					  xive->vp_base + state->act_server,
+					  xive_vp(xive, state->act_server),
 					  state->act_priority, state->number);
 		/* If an EOI is needed, do it here */
 		if (!state->old_p)
@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
 	kvmppc_xive_select_irq(state, &hw_num, NULL);
 
 	return xive_native_configure_irq(hw_num,
-					 xive->vp_base + server,
+					 xive_vp(xive, server),
 					 prio, state->number);
 }
 
@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
 	 * which is fine for a never started interrupt.
 	 */
 	xive_native_configure_irq(hw_irq,
-				  xive->vp_base + state->act_server,
+				  xive_vp(xive, state->act_server),
 				  state->act_priority, state->number);
 
 	/*
@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
 
 	/* Reconfigure the IPI */
 	xive_native_configure_irq(state->ipi_number,
-				  xive->vp_base + state->act_server,
+				  xive_vp(xive, state->act_server),
 				  state->act_priority, state->number);
 
 	/*
@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 		pr_devel("Duplicate !\n");
 		return -EEXIST;
 	}
-	if (cpu >= KVM_MAX_VCPUS) {
+	if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
 		pr_devel("Out of bounds !\n");
 		return -EINVAL;
 	}
@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 	xc->xive = xive;
 	xc->vcpu = vcpu;
 	xc->server_num = cpu;
-	xc->vp_id = xive->vp_base + cpu;
+	xc->vp_id = xive_vp(xive, cpu);
 	xc->mfrr = 0xff;
 	xc->valid = true;
 
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index afde788be141..75dce1ef3bc8 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -106,7 +106,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	 * if mmio_vsx_tx_sx_enabled == 1, copy data between
 	 * VSR[32..63] and memory
 	 */
-	vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
 	vcpu->arch.mmio_vsx_copy_nums = 0;
 	vcpu->arch.mmio_vsx_offset = 0;
 	vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
@@ -242,8 +241,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 			}
 
 			emulated = kvmppc_handle_vsx_load(run, vcpu,
-					KVM_MMIO_REG_VSX | (op.reg & 0x1f),
-					io_size_each, 1, op.type & SIGNEXT);
+					KVM_MMIO_REG_VSX|op.reg, io_size_each,
+					1, op.type & SIGNEXT);
 			break;
 		}
 #endif
@@ -363,7 +362,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 			}
 
 			emulated = kvmppc_handle_vsx_store(run, vcpu,
-					op.reg & 0x1f, io_size_each, 1);
+					op.reg, io_size_each, 1);
 			break;
 		}
 #endif
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3ccc386b380d..eba5756d5b41 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -879,10 +879,10 @@ static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
 	if (offset == -1)
 		return;
 
-	if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
-		val.vval = VCPU_VSX_VR(vcpu, index);
+	if (index >= 32) {
+		val.vval = VCPU_VSX_VR(vcpu, index - 32);
 		val.vsxval[offset] = gpr;
-		VCPU_VSX_VR(vcpu, index) = val.vval;
+		VCPU_VSX_VR(vcpu, index - 32) = val.vval;
 	} else {
 		VCPU_VSX_FPR(vcpu, index, offset) = gpr;
 	}
@@ -894,11 +894,11 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
 	union kvmppc_one_reg val;
 	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
 
-	if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
-		val.vval = VCPU_VSX_VR(vcpu, index);
+	if (index >= 32) {
+		val.vval = VCPU_VSX_VR(vcpu, index - 32);
 		val.vsxval[0] = gpr;
 		val.vsxval[1] = gpr;
-		VCPU_VSX_VR(vcpu, index) = val.vval;
+		VCPU_VSX_VR(vcpu, index - 32) = val.vval;
 	} else {
 		VCPU_VSX_FPR(vcpu, index, 0) = gpr;
 		VCPU_VSX_FPR(vcpu, index, 1) = gpr;
@@ -911,12 +911,12 @@ static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu,
 	union kvmppc_one_reg val;
 	int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
 
-	if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+	if (index >= 32) {
 		val.vsx32val[0] = gpr;
 		val.vsx32val[1] = gpr;
 		val.vsx32val[2] = gpr;
 		val.vsx32val[3] = gpr;
-		VCPU_VSX_VR(vcpu, index) = val.vval;
+		VCPU_VSX_VR(vcpu, index - 32) = val.vval;
 	} else {
 		val.vsx32val[0] = gpr;
 		val.vsx32val[1] = gpr;
@@ -936,10 +936,10 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
 	if (offset == -1)
 		return;
 
-	if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
-		val.vval = VCPU_VSX_VR(vcpu, index);
+	if (index >= 32) {
+		val.vval = VCPU_VSX_VR(vcpu, index - 32);
 		val.vsx32val[offset] = gpr32;
-		VCPU_VSX_VR(vcpu, index) = val.vval;
+		VCPU_VSX_VR(vcpu, index - 32) = val.vval;
 	} else {
 		dword_offset = offset / 2;
 		word_offset = offset % 2;
@@ -1360,10 +1360,10 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
 			break;
 		}
 
-		if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+		if (rs < 32) {
 			*val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
 		} else {
-			reg.vval = VCPU_VSX_VR(vcpu, rs);
+			reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
 			*val = reg.vsxval[vsx_offset];
 		}
 		break;
@@ -1377,13 +1377,13 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
 			break;
 		}
 
-		if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+		if (rs < 32) {
 			dword_offset = vsx_offset / 2;
 			word_offset = vsx_offset % 2;
 			reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
 			*val = reg.vsx32val[word_offset];
 		} else {
-			reg.vval = VCPU_VSX_VR(vcpu, rs);
+			reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
 			*val = reg.vsx32val[vsx_offset];
 		}
 		break;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a6afa60074cb..054b29c9a533 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -135,6 +135,7 @@ config S390
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select HAVE_FENTRY
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
@@ -157,6 +158,7 @@ config S390
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MEMBLOCK_PHYS_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_NOP_MCOUNT
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index eee6703093c3..ba6d122526fb 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -96,13 +96,15 @@ ifdef CONFIG_EXPOLINE
 endif
 
 ifdef CONFIG_FUNCTION_TRACER
-# make use of hotpatch feature if the compiler supports it
-cc_hotpatch	:= -mhotpatch=0,3
-ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
-CC_FLAGS_FTRACE := $(cc_hotpatch)
-KBUILD_AFLAGS	+= -DCC_USING_HOTPATCH
-KBUILD_CFLAGS	+= -DCC_USING_HOTPATCH
-endif
+  ifeq ($(call cc-option-yn,-mfentry -mnop-mcount),n)
+    # make use of hotpatch feature if the compiler supports it
+    cc_hotpatch	:= -mhotpatch=0,3
+    ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
+      CC_FLAGS_FTRACE := $(cc_hotpatch)
+      KBUILD_AFLAGS	+= -DCC_USING_HOTPATCH
+      KBUILD_CFLAGS	+= -DCC_USING_HOTPATCH
+    endif
+  endif
 endif
 
 # Test CFI features of binutils
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index cfccc0edd00d..8ea270fdc7fb 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -4,7 +4,7 @@
 
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
 #define MCOUNT_INSN_SIZE	6
 #else
 #define MCOUNT_INSN_SIZE	24
@@ -42,7 +42,7 @@ struct ftrace_insn {
 static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
 {
 #ifdef CONFIG_FUNCTION_TRACER
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
 	/* brcl 0,0 */
 	insn->opc = 0xc004;
 	insn->disp = 0;
@@ -57,7 +57,7 @@ static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
 static inline int is_ftrace_nop(struct ftrace_insn *insn)
 {
 #ifdef CONFIG_FUNCTION_TRACER
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
 	if (insn->disp == 0)
 		return 1;
 #else
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a2188e309bd6..29c940bf8506 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -269,6 +269,7 @@ struct kvm_s390_sie_block {
 	__u8	reserved1c0[8];		/* 0x01c0 */
 #define ECD_HOSTREGMGMT	0x20000000
 #define ECD_MEF		0x08000000
+#define ECD_ETOKENF	0x02000000
 	__u32	ecd;			/* 0x01c8 */
 	__u8	reserved1cc[18];	/* 0x01cc */
 	__u64	pp;			/* 0x01de */
@@ -655,6 +656,7 @@ struct kvm_vcpu_arch {
 	seqcount_t cputm_seqcount;
 	__u64 cputm_start;
 	bool gs_enabled;
+	bool skey_enabled;
 };
 
 struct kvm_vm_stat {
@@ -793,12 +795,6 @@ struct kvm_s390_vsie {
 	struct page *pages[KVM_MAX_VCPUS];
 };
 
-struct kvm_s390_migration_state {
-	unsigned long bitmap_size;	/* in bits (number of guest pages) */
-	atomic64_t dirty_pages;		/* number of dirty pages */
-	unsigned long *pgste_bitmap;
-};
-
 struct kvm_arch{
 	void *sca;
 	int use_esca;
@@ -828,7 +824,8 @@ struct kvm_arch{
 	struct kvm_s390_vsie vsie;
 	u8 epdx;
 	u64 epoch;
-	struct kvm_s390_migration_state *migration_state;
+	int migration_mode;
+	atomic64_t cmma_dirty_pages;
 	/* subset of available cpu features enabled by user space */
 	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 	struct kvm_s390_gisa *gisa;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 4cdaa55fabfe..9a50f02b9894 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -4,7 +4,7 @@
 /*
  * KVM s390 specific structures and definitions
  *
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2018
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_FPRS   (1UL << 8)
 #define KVM_SYNC_GSCB   (1UL << 9)
 #define KVM_SYNC_BPBC   (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
 /* length and alignment of the sdnx as a power of two */
 #define SDNXC 8
 #define SDNXL (1UL << SDNXC)
@@ -258,6 +259,8 @@ struct kvm_sync_regs {
 		struct {
 			__u64 reserved1[2];
 			__u64 gscb[4];
+			__u64 etoken;
+			__u64 etoken_extension;
 		};
 	};
 };
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index dc76d813e420..84be7f02d0c2 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -61,7 +61,7 @@ unsigned long ftrace_plt;
 
 static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
 {
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
 	/* brcl 0,0 */
 	insn->opc = 0xc004;
 	insn->disp = 0;
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 27110f3294ed..e93fbf02490c 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -35,7 +35,7 @@ ENTRY(ftrace_caller)
 	.globl	ftrace_regs_caller
 	.set	ftrace_regs_caller,ftrace_caller
 	lgr	%r1,%r15
-#ifndef CC_USING_HOTPATCH
+#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
 	aghi	%r0,MCOUNT_RETURN_FIXUP
 #endif
 	aghi	%r15,-STACK_FRAME_SIZE
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index cb198d4a6dca..5c53e977be62 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -665,7 +665,7 @@ static void cpumsf_output_event_pid(struct perf_event *event,
 		goto out;
 
 	/* Update the process ID (see also kernel/events/core.c) */
-	data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID);
+	data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
 	data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
 
 	perf_output_sample(&handle, &header, data, event);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f9d90337e64a..91ad4a9425c0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -906,54 +906,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
  */
 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 {
-	struct kvm_s390_migration_state *mgs;
 	struct kvm_memory_slot *ms;
-	/* should be the only one */
 	struct kvm_memslots *slots;
-	unsigned long ram_pages;
+	unsigned long ram_pages = 0;
 	int slotnr;
 
 	/* migration mode already enabled */
-	if (kvm->arch.migration_state)
+	if (kvm->arch.migration_mode)
 		return 0;
-
 	slots = kvm_memslots(kvm);
 	if (!slots || !slots->used_slots)
 		return -EINVAL;
 
-	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
-	if (!mgs)
-		return -ENOMEM;
-	kvm->arch.migration_state = mgs;
-
-	if (kvm->arch.use_cmma) {
+	if (!kvm->arch.use_cmma) {
+		kvm->arch.migration_mode = 1;
+		return 0;
+	}
+	/* mark all the pages in active slots as dirty */
+	for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+		ms = slots->memslots + slotnr;
 		/*
-		 * Get the first slot. They are reverse sorted by base_gfn, so
-		 * the first slot is also the one at the end of the address
-		 * space. We have verified above that at least one slot is
-		 * present.
+		 * The second half of the bitmap is only used on x86,
+		 * and would be wasted otherwise, so we put it to good
+		 * use here to keep track of the state of the storage
+		 * attributes.
 		 */
-		ms = slots->memslots;
-		/* round up so we only use full longs */
-		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
-		/* allocate enough bytes to store all the bits */
-		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
-		if (!mgs->pgste_bitmap) {
-			kfree(mgs);
-			kvm->arch.migration_state = NULL;
-			return -ENOMEM;
-		}
-
-		mgs->bitmap_size = ram_pages;
-		atomic64_set(&mgs->dirty_pages, ram_pages);
-		/* mark all the pages in active slots as dirty */
-		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
-			ms = slots->memslots + slotnr;
-			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
-		}
-
-		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+		memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
+		ram_pages += ms->npages;
 	}
+	atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
+	kvm->arch.migration_mode = 1;
+	kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 	return 0;
 }
 
@@ -963,21 +946,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
  */
 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 {
-	struct kvm_s390_migration_state *mgs;
-
 	/* migration mode already disabled */
-	if (!kvm->arch.migration_state)
+	if (!kvm->arch.migration_mode)
 		return 0;
-	mgs = kvm->arch.migration_state;
-	kvm->arch.migration_state = NULL;
-
-	if (kvm->arch.use_cmma) {
+	kvm->arch.migration_mode = 0;
+	if (kvm->arch.use_cmma)
 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
-		/* We have to wait for the essa emulation to finish */
-		synchronize_srcu(&kvm->srcu);
-		vfree(mgs->pgste_bitmap);
-	}
-	kfree(mgs);
 	return 0;
 }
 
@@ -1005,7 +979,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 				     struct kvm_device_attr *attr)
 {
-	u64 mig = (kvm->arch.migration_state != NULL);
+	u64 mig = kvm->arch.migration_mode;
 
 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 		return -ENXIO;
@@ -1653,6 +1627,134 @@ out:
 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
 
 /*
+ * Similar to gfn_to_memslot, but returns the index of a memslot also when the
+ * address falls in a hole. In that case the index of one of the memslots
+ * bordering the hole is returned.
+ */
+static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
+{
+	int start = 0, end = slots->used_slots;
+	int slot = atomic_read(&slots->lru_slot);
+	struct kvm_memory_slot *memslots = slots->memslots;
+
+	if (gfn >= memslots[slot].base_gfn &&
+	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
+		return slot;
+
+	while (start < end) {
+		slot = start + (end - start) / 2;
+
+		if (gfn >= memslots[slot].base_gfn)
+			end = slot;
+		else
+			start = slot + 1;
+	}
+
+	if (gfn >= memslots[start].base_gfn &&
+	    gfn < memslots[start].base_gfn + memslots[start].npages) {
+		atomic_set(&slots->lru_slot, start);
+	}
+
+	return start;
+}
+
+static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+			      u8 *res, unsigned long bufsize)
+{
+	unsigned long pgstev, hva, cur_gfn = args->start_gfn;
+
+	args->count = 0;
+	while (args->count < bufsize) {
+		hva = gfn_to_hva(kvm, cur_gfn);
+		/*
+		 * We return an error if the first value was invalid, but we
+		 * return successfully if at least one value was copied.
+		 */
+		if (kvm_is_error_hva(hva))
+			return args->count ? 0 : -EFAULT;
+		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+			pgstev = 0;
+		res[args->count++] = (pgstev >> 24) & 0x43;
+		cur_gfn++;
+	}
+
+	return 0;
+}
+
+static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
+					      unsigned long cur_gfn)
+{
+	int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
+	struct kvm_memory_slot *ms = slots->memslots + slotidx;
+	unsigned long ofs = cur_gfn - ms->base_gfn;
+
+	if (ms->base_gfn + ms->npages <= cur_gfn) {
+		slotidx--;
+		/* If we are above the highest slot, wrap around */
+		if (slotidx < 0)
+			slotidx = slots->used_slots - 1;
+
+		ms = slots->memslots + slotidx;
+		ofs = 0;
+	}
+	ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
+	while ((slotidx > 0) && (ofs >= ms->npages)) {
+		slotidx--;
+		ms = slots->memslots + slotidx;
+		ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
+	}
+	return ms->base_gfn + ofs;
+}
+
+static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+			     u8 *res, unsigned long bufsize)
+{
+	unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
+	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memory_slot *ms;
+
+	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
+	ms = gfn_to_memslot(kvm, cur_gfn);
+	args->count = 0;
+	args->start_gfn = cur_gfn;
+	if (!ms)
+		return 0;
+	next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+	mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
+
+	while (args->count < bufsize) {
+		hva = gfn_to_hva(kvm, cur_gfn);
+		if (kvm_is_error_hva(hva))
+			return 0;
+		/* Decrement only if we actually flipped the bit to 0 */
+		if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+			atomic64_dec(&kvm->arch.cmma_dirty_pages);
+		if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+			pgstev = 0;
+		/* Save the value */
+		res[args->count++] = (pgstev >> 24) & 0x43;
+		/* If the next bit is too far away, stop. */
+		if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
+			return 0;
+		/* If we reached the previous "next", find the next one */
+		if (cur_gfn == next_gfn)
+			next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+		/* Reached the end of memory or of the buffer, stop */
+		if ((next_gfn >= mem_end) ||
+		    (next_gfn - args->start_gfn >= bufsize))
+			return 0;
+		cur_gfn++;
+		/* Reached the end of the current memslot, take the next one. */
+		if (cur_gfn - ms->base_gfn >= ms->npages) {
+			ms = gfn_to_memslot(kvm, cur_gfn);
+			if (!ms)
+				return 0;
+		}
+	}
+	return 0;
+}
+
+/*
  * This function searches for the next page with dirty CMMA attributes, and
  * saves the attributes in the buffer up to either the end of the buffer or
  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
@@ -1663,22 +1765,18 @@ out:
 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
 				  struct kvm_s390_cmma_log *args)
 {
-	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
-	unsigned long bufsize, hva, pgstev, i, next, cur;
-	int srcu_idx, peek, r = 0, rr;
-	u8 *res;
-
-	cur = args->start_gfn;
-	i = next = pgstev = 0;
+	unsigned long bufsize;
+	int srcu_idx, peek, ret;
+	u8 *values;
 
-	if (unlikely(!kvm->arch.use_cmma))
+	if (!kvm->arch.use_cmma)
 		return -ENXIO;
 	/* Invalid/unsupported flags were specified */
 	if (args->flags & ~KVM_S390_CMMA_PEEK)
 		return -EINVAL;
 	/* Migration mode query, and we are not doing a migration */
 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
-	if (!peek && !s)
+	if (!peek && !kvm->arch.migration_mode)
 		return -EINVAL;
 	/* CMMA is disabled or was not used, or the buffer has length zero */
 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
@@ -1686,74 +1784,35 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
 		memset(args, 0, sizeof(*args));
 		return 0;
 	}
-
-	if (!peek) {
-		/* We are not peeking, and there are no dirty pages */
-		if (!atomic64_read(&s->dirty_pages)) {
-			memset(args, 0, sizeof(*args));
-			return 0;
-		}
-		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
-				    args->start_gfn);
-		if (cur >= s->bitmap_size)	/* nothing found, loop back */
-			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
-		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
-			memset(args, 0, sizeof(*args));
-			return 0;
-		}
-		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
+	/* We are not peeking, and there are no dirty pages */
+	if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
+		memset(args, 0, sizeof(*args));
+		return 0;
 	}
 
-	res = vmalloc(bufsize);
-	if (!res)
+	values = vmalloc(bufsize);
+	if (!values)
 		return -ENOMEM;
 
-	args->start_gfn = cur;
-
 	down_read(&kvm->mm->mmap_sem);
 	srcu_idx = srcu_read_lock(&kvm->srcu);
-	while (i < bufsize) {
-		hva = gfn_to_hva(kvm, cur);
-		if (kvm_is_error_hva(hva)) {
-			r = -EFAULT;
-			break;
-		}
-		/* decrement only if we actually flipped the bit to 0 */
-		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
-			atomic64_dec(&s->dirty_pages);
-		r = get_pgste(kvm->mm, hva, &pgstev);
-		if (r < 0)
-			pgstev = 0;
-		/* save the value */
-		res[i++] = (pgstev >> 24) & 0x43;
-		/*
-		 * if the next bit is too far away, stop.
-		 * if we reached the previous "next", find the next one
-		 */
-		if (!peek) {
-			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
-				break;
-			if (cur == next)
-				next = find_next_bit(s->pgste_bitmap,
-						     s->bitmap_size, cur + 1);
-		/* reached the end of the bitmap or of the buffer, stop */
-			if ((next >= s->bitmap_size) ||
-			    (next >= args->start_gfn + bufsize))
-				break;
-		}
-		cur++;
-	}
+	if (peek)
+		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
+	else
+		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 	up_read(&kvm->mm->mmap_sem);
-	args->count = i;
-	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
 
-	rr = copy_to_user((void __user *)args->values, res, args->count);
-	if (rr)
-		r = -EFAULT;
+	if (kvm->arch.migration_mode)
+		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
+	else
+		args->remaining = 0;
 
-	vfree(res);
-	return r;
+	if (copy_to_user((void __user *)args->values, values, args->count))
+		ret = -EFAULT;
+
+	vfree(values);
+	return ret;
 }
 
 /*
@@ -2192,10 +2251,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_clear_float_irqs(kvm);
 	kvm_s390_vsie_destroy(kvm);
-	if (kvm->arch.migration_state) {
-		vfree(kvm->arch.migration_state->pgste_bitmap);
-		kfree(kvm->arch.migration_state);
-	}
 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
 }
 
@@ -2353,6 +2408,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
 	if (test_kvm_facility(vcpu->kvm, 133))
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
+	if (test_kvm_facility(vcpu->kvm, 156))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
 	 */
@@ -2602,7 +2659,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	}
 	if (test_kvm_facility(vcpu->kvm, 139))
 		vcpu->arch.sie_block->ecd |= ECD_MEF;
-
+	if (test_kvm_facility(vcpu->kvm, 156))
+		vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
 	if (vcpu->arch.sie_block->gd) {
 		vcpu->arch.sie_block->eca |= ECA_AIV;
 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
@@ -3520,6 +3578,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 		preempt_enable();
 	}
+	/* SIE will load etoken directly from SDNX and therefore kvm_run */
 
 	kvm_run->kvm_dirty_regs = 0;
 }
@@ -3559,7 +3618,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			__ctl_clear_bit(2, 4);
 		vcpu->arch.host_gscb = NULL;
 	}
-
+	/* SIE will save etoken directly into SDNX and therefore kvm_run */
 }
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index cfc5a62329f6..d68f10441a16 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -205,13 +205,10 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
 int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
 {
 	int rc;
-	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
 
 	trace_kvm_s390_skey_related_inst(vcpu);
 	/* Already enabled? */
-	if (vcpu->kvm->arch.use_skf &&
-	    !(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
-	    !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+	if (vcpu->arch.skey_enabled)
 		return 0;
 
 	rc = s390_enable_skey();
@@ -222,9 +219,10 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
 	if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
 		kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
 	if (!vcpu->kvm->arch.use_skf)
-		sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 	else
-		sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+		vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+	vcpu->arch.skey_enabled = true;
 	return 0;
 }
 
@@ -987,7 +985,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
 		if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-			if (clear_user((void __user *)vmaddr, PAGE_SIZE))
+			if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
 				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 		}
 
@@ -1024,9 +1022,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
+/*
+ * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
+ */
+static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
 {
-	struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
 	int r1, r2, nappended, entries;
 	unsigned long gfn, hva, res, pgstev, ptev;
 	unsigned long *cbrlo;
@@ -1076,10 +1076,12 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
 		cbrlo[entries] = gfn << PAGE_SHIFT;
 	}
 
-	if (orc && gfn < ms->bitmap_size) {
-		/* increment only if we are really flipping the bit to 1 */
-		if (!test_and_set_bit(gfn, ms->pgste_bitmap))
-			atomic64_inc(&ms->dirty_pages);
+	if (orc) {
+		struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
+
+		/* Increment only if we are really flipping the bit */
+		if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+			atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
 	}
 
 	return nappended;
@@ -1108,7 +1110,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 						: ESSA_SET_STABLE_IF_RESIDENT))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	if (likely(!vcpu->kvm->arch.migration_state)) {
+	if (!vcpu->kvm->arch.migration_mode) {
 		/*
 		 * CMMA is enabled in the KVM settings, but is disabled in
 		 * the SIE block and in the mm_context, and we are not doing
@@ -1136,10 +1138,16 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 		/* Retry the ESSA instruction */
 		kvm_s390_retry_instr(vcpu);
 	} else {
-		/* Account for the possible extra cbrl entry */
-		i = do_essa(vcpu, orc);
+		int srcu_idx;
+
+		down_read(&vcpu->kvm->mm->mmap_sem);
+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+		i = __do_essa(vcpu, orc);
+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+		up_read(&vcpu->kvm->mm->mmap_sem);
 		if (i < 0)
 			return i;
+		/* Account for the possible extra cbrl entry */
 		entries += i;
 	}
 	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 84c89cb9636f..63844b95c22c 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -2,7 +2,7 @@
 /*
  * kvm nested virtualization support for s390x
  *
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2018
  *
  *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
  */
@@ -378,6 +378,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	if (test_kvm_facility(vcpu->kvm, 139))
 		scb_s->ecd |= scb_o->ecd & ECD_MEF;
 
+	/* etoken */
+	if (test_kvm_facility(vcpu->kvm, 156))
+		scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
+
 	prepare_ibc(vcpu, vsie_page);
 	rc = shadow_crycb(vcpu, vsie_page);
 out:
@@ -627,7 +631,8 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		vsie_page->riccbd_gpa = gpa;
 		scb_s->riccbd = hpa;
 	}
-	if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
+	if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
+	    (scb_s->ecd & ECD_ETOKENF)) {
 		unsigned long sdnxc;
 
 		gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
@@ -818,6 +823,8 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
  *          - < 0 if an error occurred
  */
 static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+	__releases(vcpu->kvm->srcu)
+	__acquires(vcpu->kvm->srcu)
 {
 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
 	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 90a8c9e84ca6..0c85aedcf9b3 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -4,7 +4,7 @@
  * numbering scheme from the Princples of Operations: most significant bit
  * has bit number 0.
  *
- *    Copyright IBM Corp. 2015
+ *    Copyright IBM Corp. 2015, 2018
  *
  */
 
@@ -106,6 +106,7 @@ static struct facility_def facility_defs[] = {
 
 		.name = "FACILITIES_KVM_CPUMODEL",
 		.bits = (int[]){
+			156, /* etoken facility */
 			-1  /* END */
 		}
 	},
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 2d58c26bff9a..e6f2a38d2e61 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -45,9 +45,13 @@ config SPARC
 	select LOCKDEP_SMALL if LOCKDEP
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
+	select HAVE_MEMBLOCK
+	select NO_BOOTMEM
 
 config SPARC32
 	def_bool !64BIT
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select DMA_NONCOHERENT_OPS
 	select GENERIC_ATOMIC64
 	select CLZ_TAB
 	select HAVE_UID16
@@ -60,7 +64,6 @@ config SPARC64
 	select HAVE_KRETPROBES
 	select HAVE_KPROBES
 	select HAVE_RCU_TABLE_FREE if SMP
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_DYNAMIC_FTRACE
@@ -79,7 +82,6 @@ config SPARC64
 	select IRQ_PREFLOW_FASTEOI
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select HAVE_C_RECORDMCOUNT
-	select NO_BOOTMEM
 	select HAVE_ARCH_AUDITSYSCALL
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select HAVE_NMI
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 966a13d2b127..e32ef20de567 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -9,10 +9,10 @@
 # Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
 
 # We are not yet configured - so test on arch
-ifeq ($(ARCH),sparc)
-        KBUILD_DEFCONFIG := sparc32_defconfig
-else
+ifeq ($(ARCH),sparc64)
         KBUILD_DEFCONFIG := sparc64_defconfig
+else
+        KBUILD_DEFCONFIG := sparc32_defconfig
 endif
 
 ifeq ($(CONFIG_SPARC32),y)
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index 12ae33daf52f..e17566376934 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -7,7 +7,6 @@
 #include <linux/dma-debug.h>
 
 extern const struct dma_map_ops *dma_ops;
-extern const struct dma_map_ops pci32_dma_ops;
 
 extern struct bus_type pci_bus_type;
 
@@ -15,11 +14,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
 #ifdef CONFIG_SPARC_LEON
 	if (sparc_cpu_model == sparc_leon)
-		return &pci32_dma_ops;
+		return &dma_noncoherent_ops;
 #endif
 #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
 	if (bus == &pci_bus_type)
-		return &pci32_dma_ops;
+		return &dma_noncoherent_ops;
 #endif
 	return dma_ops;
 }
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index cca9134cfa7d..6799c93c9f27 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -38,6 +38,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/scatterlist.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/of_device.h>
 
 #include <asm/io.h>
@@ -434,42 +435,41 @@ arch_initcall(sparc_register_ioport);
 /* Allocate and map kernel buffer using consistent mode DMA for a device.
  * hwdev should be valid struct pci_dev pointer for PCI devices.
  */
-static void *pci32_alloc_coherent(struct device *dev, size_t len,
-				  dma_addr_t *pba, gfp_t gfp,
-				  unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
 {
-	unsigned long len_total = PAGE_ALIGN(len);
+	unsigned long len_total = PAGE_ALIGN(size);
 	void *va;
 	struct resource *res;
 	int order;
 
-	if (len == 0) {
+	if (size == 0) {
 		return NULL;
 	}
-	if (len > 256*1024) {			/* __get_free_pages() limit */
+	if (size > 256*1024) {			/* __get_free_pages() limit */
 		return NULL;
 	}
 
 	order = get_order(len_total);
 	va = (void *) __get_free_pages(gfp, order);
 	if (va == NULL) {
-		printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT);
+		printk("%s: no %ld pages\n", __func__, len_total>>PAGE_SHIFT);
 		goto err_nopages;
 	}
 
 	if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) {
-		printk("pci_alloc_consistent: no core\n");
+		printk("%s: no core\n", __func__);
 		goto err_nomem;
 	}
 
 	if (allocate_resource(&_sparc_dvma, res, len_total,
 	    _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) {
-		printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total);
+		printk("%s: cannot occupy 0x%lx", __func__, len_total);
 		goto err_nova;
 	}
 	srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total);
 
-	*pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
+	*dma_handle = virt_to_phys(va);
 	return (void *) res->start;
 
 err_nova:
@@ -481,184 +481,53 @@ err_nopages:
 }
 
 /* Free and unmap a consistent DMA buffer.
- * cpu_addr is what was returned from pci_alloc_consistent,
- * size must be the same as what as passed into pci_alloc_consistent,
- * and likewise dma_addr must be the same as what *dma_addrp was set to.
+ * cpu_addr is what was returned arch_dma_alloc, size must be the same as what
+ * was passed into arch_dma_alloc, and likewise dma_addr must be the same as
+ * what *dma_ndler was set to.
  *
  * References to the memory and mappings associated with cpu_addr/dma_addr
  * past this call are illegal.
  */
-static void pci32_free_coherent(struct device *dev, size_t n, void *p,
-				dma_addr_t ba, unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t dma_addr, unsigned long attrs)
 {
 	struct resource *res;
 
 	if ((res = lookup_resource(&_sparc_dvma,
-	    (unsigned long)p)) == NULL) {
-		printk("pci_free_consistent: cannot free %p\n", p);
+	    (unsigned long)cpu_addr)) == NULL) {
+		printk("%s: cannot free %p\n", __func__, cpu_addr);
 		return;
 	}
 
-	if (((unsigned long)p & (PAGE_SIZE-1)) != 0) {
-		printk("pci_free_consistent: unaligned va %p\n", p);
+	if (((unsigned long)cpu_addr & (PAGE_SIZE-1)) != 0) {
+		printk("%s: unaligned va %p\n", __func__, cpu_addr);
 		return;
 	}
 
-	n = PAGE_ALIGN(n);
-	if (resource_size(res) != n) {
-		printk("pci_free_consistent: region 0x%lx asked 0x%lx\n",
-		    (long)resource_size(res), (long)n);
+	size = PAGE_ALIGN(size);
+	if (resource_size(res) != size) {
+		printk("%s: region 0x%lx asked 0x%zx\n", __func__,
+		    (long)resource_size(res), size);
 		return;
 	}
 
-	dma_make_coherent(ba, n);
-	srmmu_unmapiorange((unsigned long)p, n);
+	dma_make_coherent(dma_addr, size);
+	srmmu_unmapiorange((unsigned long)cpu_addr, size);
 
 	release_resource(res);
 	kfree(res);
-	free_pages((unsigned long)phys_to_virt(ba), get_order(n));
-}
-
-/*
- * Same as pci_map_single, but with pages.
- */
-static dma_addr_t pci32_map_page(struct device *dev, struct page *page,
-				 unsigned long offset, size_t size,
-				 enum dma_data_direction dir,
-				 unsigned long attrs)
-{
-	/* IIep is write-through, not flushing. */
-	return page_to_phys(page) + offset;
-}
-
-static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
-			     enum dma_data_direction dir, unsigned long attrs)
-{
-	if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		dma_make_coherent(ba, PAGE_ALIGN(size));
-}
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA.  This is the scatter-gather version of the
- * above pci_map_single interface.  Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length.  They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
-			int nents, enum dma_data_direction dir,
-			unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int n;
-
-	/* IIep is write-through, not flushing. */
-	for_each_sg(sgl, sg, nents, n) {
-		sg->dma_address = sg_phys(sg);
-		sg->dma_length = sg->length;
-	}
-	return nents;
-}
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
-			   int nents, enum dma_data_direction dir,
-			   unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int n;
-
-	if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
-		for_each_sg(sgl, sg, nents, n) {
-			dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
-		}
-	}
-}
-
-/* Make physical memory consistent for a single
- * streaming mode DMA translation before or after a transfer.
- *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so.  At the
- * next point you give the PCI dma address back to the card, you
- * must first perform a pci_dma_sync_for_device, and then the
- * device again owns the buffer.
- */
-static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
-				      size_t size, enum dma_data_direction dir)
-{
-	if (dir != PCI_DMA_TODEVICE) {
-		dma_make_coherent(ba, PAGE_ALIGN(size));
-	}
-}
-
-static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba,
-					 size_t size, enum dma_data_direction dir)
-{
-	if (dir != PCI_DMA_TODEVICE) {
-		dma_make_coherent(ba, PAGE_ALIGN(size));
-	}
+	free_pages((unsigned long)phys_to_virt(dma_addr), get_order(size));
 }
 
-/* Make physical memory consistent for a set of streaming
- * mode DMA translations after a transfer.
- *
- * The same as pci_dma_sync_single_* but for a scatter-gather list,
- * same rules and usage.
- */
-static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
-				  int nents, enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int n;
-
-	if (dir != PCI_DMA_TODEVICE) {
-		for_each_sg(sgl, sg, nents, n) {
-			dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
-		}
-	}
-}
+/* IIep is write-through, not flushing on cpu to device transfer. */
 
-static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *sgl,
-				     int nents, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	struct scatterlist *sg;
-	int n;
-
-	if (dir != PCI_DMA_TODEVICE) {
-		for_each_sg(sgl, sg, nents, n) {
-			dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
-		}
-	}
+	if (dir != PCI_DMA_TODEVICE)
+		dma_make_coherent(paddr, PAGE_ALIGN(size));
 }
 
-/* note: leon re-uses pci32_dma_ops */
-const struct dma_map_ops pci32_dma_ops = {
-	.alloc			= pci32_alloc_coherent,
-	.free			= pci32_free_coherent,
-	.map_page		= pci32_map_page,
-	.unmap_page		= pci32_unmap_page,
-	.map_sg			= pci32_map_sg,
-	.unmap_sg		= pci32_unmap_sg,
-	.sync_single_for_cpu	= pci32_sync_single_for_cpu,
-	.sync_single_for_device	= pci32_sync_single_for_device,
-	.sync_sg_for_cpu	= pci32_sync_sg_for_cpu,
-	.sync_sg_for_device	= pci32_sync_sg_for_device,
-};
-EXPORT_SYMBOL(pci32_dma_ops);
-
 const struct dma_map_ops *dma_ops = &sbus_dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 95fe4f081ba3..92634d4e440c 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pagemap.h>
 #include <linux/poison.h>
 #include <linux/gfp.h>
@@ -101,13 +102,46 @@ static unsigned long calc_max_low_pfn(void)
 	return tmp;
 }
 
+static void __init find_ramdisk(unsigned long end_of_phys_memory)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	unsigned long size;
+
+	/* Now have to check initial ramdisk, so that it won't pass
+	 * the end of memory
+	 */
+	if (sparc_ramdisk_image) {
+		if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE)
+			sparc_ramdisk_image -= KERNBASE;
+		initrd_start = sparc_ramdisk_image + phys_base;
+		initrd_end = initrd_start + sparc_ramdisk_size;
+		if (initrd_end > end_of_phys_memory) {
+			printk(KERN_CRIT "initrd extends beyond end of memory "
+			       "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
+			       initrd_end, end_of_phys_memory);
+			initrd_start = 0;
+		} else {
+			/* Reserve the initrd image area. */
+			size = initrd_end - initrd_start;
+			memblock_reserve(initrd_start, size);
+
+			initrd_start = (initrd_start - phys_base) + PAGE_OFFSET;
+			initrd_end = (initrd_end - phys_base) + PAGE_OFFSET;
+		}
+	}
+#endif
+}
+
 unsigned long __init bootmem_init(unsigned long *pages_avail)
 {
-	unsigned long bootmap_size, start_pfn;
-	unsigned long end_of_phys_memory = 0UL;
-	unsigned long bootmap_pfn, bytes_avail, size;
+	unsigned long start_pfn, bytes_avail, size;
+	unsigned long end_of_phys_memory = 0;
+	unsigned long high_pages = 0;
 	int i;
 
+	memblock_set_bottom_up(true);
+	memblock_allow_resize();
+
 	bytes_avail = 0UL;
 	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
 		end_of_phys_memory = sp_banks[i].base_addr +
@@ -124,24 +158,25 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 				if (sp_banks[i].num_bytes == 0) {
 					sp_banks[i].base_addr = 0xdeadbeef;
 				} else {
+					memblock_add(sp_banks[i].base_addr,
+						     sp_banks[i].num_bytes);
 					sp_banks[i+1].num_bytes = 0;
 					sp_banks[i+1].base_addr = 0xdeadbeef;
 				}
 				break;
 			}
 		}
+		memblock_add(sp_banks[i].base_addr, sp_banks[i].num_bytes);
 	}
 
 	/* Start with page aligned address of last symbol in kernel
-	 * image.  
+	 * image.
 	 */
 	start_pfn  = (unsigned long)__pa(PAGE_ALIGN((unsigned long) &_end));
 
 	/* Now shift down to get the real physical page frame number. */
 	start_pfn >>= PAGE_SHIFT;
 
-	bootmap_pfn = start_pfn;
-
 	max_pfn = end_of_phys_memory >> PAGE_SHIFT;
 
 	max_low_pfn = max_pfn;
@@ -150,85 +185,19 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 	if (max_low_pfn > pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT)) {
 		highstart_pfn = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT);
 		max_low_pfn = calc_max_low_pfn();
+		high_pages = calc_highpages();
 		printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
-		    calc_highpages() >> (20 - PAGE_SHIFT));
+		    high_pages >> (20 - PAGE_SHIFT));
 	}
 
-#ifdef CONFIG_BLK_DEV_INITRD
-	/* Now have to check initial ramdisk, so that bootmap does not overwrite it */
-	if (sparc_ramdisk_image) {
-		if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE)
-			sparc_ramdisk_image -= KERNBASE;
-		initrd_start = sparc_ramdisk_image + phys_base;
-		initrd_end = initrd_start + sparc_ramdisk_size;
-		if (initrd_end > end_of_phys_memory) {
-			printk(KERN_CRIT "initrd extends beyond end of memory "
-		                 	 "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
-			       initrd_end, end_of_phys_memory);
-			initrd_start = 0;
-		}
-		if (initrd_start) {
-			if (initrd_start >= (start_pfn << PAGE_SHIFT) &&
-			    initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE)
-				bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT;
-		}
-	}
-#endif	
-	/* Initialize the boot-time allocator. */
-	bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base,
-					 max_low_pfn);
-
-	/* Now register the available physical memory with the
-	 * allocator.
-	 */
-	*pages_avail = 0;
-	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
-		unsigned long curr_pfn, last_pfn;
+	find_ramdisk(end_of_phys_memory);
 
-		curr_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
-		if (curr_pfn >= max_low_pfn)
-			break;
-
-		last_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
-		if (last_pfn > max_low_pfn)
-			last_pfn = max_low_pfn;
-
-		/*
-		 * .. finally, did all the rounding and playing
-		 * around just make the area go away?
-		 */
-		if (last_pfn <= curr_pfn)
-			continue;
-
-		size = (last_pfn - curr_pfn) << PAGE_SHIFT;
-		*pages_avail += last_pfn - curr_pfn;
-
-		free_bootmem(sp_banks[i].base_addr, size);
-	}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (initrd_start) {
-		/* Reserve the initrd image area. */
-		size = initrd_end - initrd_start;
-		reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT);
-		*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-		initrd_start = (initrd_start - phys_base) + PAGE_OFFSET;
-		initrd_end = (initrd_end - phys_base) + PAGE_OFFSET;		
-	}
-#endif
 	/* Reserve the kernel text/data/bss. */
 	size = (start_pfn << PAGE_SHIFT) - phys_base;
-	reserve_bootmem(phys_base, size, BOOTMEM_DEFAULT);
-	*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
+	memblock_reserve(phys_base, size);
 
-	/* Reserve the bootmem map.   We do not account for it
-	 * in pages_avail because we will release that memory
-	 * in free_all_bootmem.
-	 */
-	size = bootmap_size;
-	reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT);
-	*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
+	size = memblock_phys_mem_size() - memblock_reserved_size();
+	*pages_avail = (size >> PAGE_SHIFT) - high_pages;
 
 	return max_pfn;
 }
@@ -322,7 +291,7 @@ void __init mem_init(void)
 
 		map_high_region(start_pfn, end_pfn);
 	}
-	
+
 	mem_init_print_info(NULL);
 }
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b0312f8947ce..512003f16889 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -124,6 +124,7 @@ config X86
 	select HAVE_ARCH_MMAP_RND_BITS		if MMU
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if MMU && COMPAT
 	select HAVE_ARCH_COMPAT_MMAP_BASES	if MMU && COMPAT
+	select HAVE_ARCH_PREL32_RELOCATIONS
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 302517929932..d1e19f358b6e 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -23,11 +23,8 @@
  * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
  * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
  * which is meaningless and will cause compiling error in some cases.
- * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
- * as empty.
  */
-#define _LINUX_EXPORT_H
-#define EXPORT_SYMBOL(sym)
+#define __DISABLE_EXPORTS
 
 #include "misc.h"
 #include "error.h"
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index b173d404e3df..b21ee65c4101 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,2 +1,2 @@
-obj-y			:= hv_init.o mmu.o
+obj-y			:= hv_init.o mmu.o nested.o
 obj-$(CONFIG_X86_64)	+= hv_apic.o
diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
new file mode 100644
index 000000000000..b8e60cc50461
--- /dev/null
+++ b/arch/x86/hyperv/nested.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Hyper-V nested virtualization code.
+ *
+ * Copyright (C) 2018, Microsoft, Inc.
+ *
+ * Author : Lan Tianyu <Tianyu.Lan@microsoft.com>
+ */
+
+
+#include <linux/types.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+#include <asm/tlbflush.h>
+
+#include <asm/trace/hyperv.h>
+
+int hyperv_flush_guest_mapping(u64 as)
+{
+	struct hv_guest_mapping_flush **flush_pcpu;
+	struct hv_guest_mapping_flush *flush;
+	u64 status;
+	unsigned long flags;
+	int ret = -ENOTSUPP;
+
+	if (!hv_hypercall_pg)
+		goto fault;
+
+	local_irq_save(flags);
+
+	flush_pcpu = (struct hv_guest_mapping_flush **)
+		this_cpu_ptr(hyperv_pcpu_input_arg);
+
+	flush = *flush_pcpu;
+
+	if (unlikely(!flush)) {
+		local_irq_restore(flags);
+		goto fault;
+	}
+
+	flush->address_space = as;
+	flush->flags = 0;
+
+	status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE,
+				 flush, NULL);
+	local_irq_restore(flags);
+
+	if (!(status & HV_HYPERCALL_RESULT_MASK))
+		ret = 0;
+
+fault:
+	trace_hyperv_nested_flush_guest_mapping(as, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index de690c2d2e33..a0ab9ab61c75 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -8,5 +8,6 @@ generated-y += xen-hypercalls.h
 
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
+generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
deleted file mode 100644
index 2a51d66689c5..000000000000
--- a/arch/x86/include/asm/export.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include <asm-generic/export.h>
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6ced78af48da..e977b6b3a538 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -310,6 +310,7 @@ struct ms_hyperv_tsc_page {
 #define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
 
 /* Nested features (CPUID 0x4000000A) EAX */
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH	BIT(18)
 #define HV_X64_NESTED_MSR_BITMAP		BIT(19)
 
 struct hv_reenlightenment_control {
@@ -351,6 +352,7 @@ struct hv_tsc_emulation_status {
 #define HVCALL_SEND_IPI_EX			0x0015
 #define HVCALL_POST_MESSAGE			0x005c
 #define HVCALL_SIGNAL_EVENT			0x005d
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
 
 #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
@@ -742,6 +744,12 @@ struct ipi_arg_ex {
 	struct hv_vpset vp_set;
 };
 
+/* HvFlushGuestPhysicalAddressSpace hypercalls */
+struct hv_guest_mapping_flush {
+	u64 address_space;
+	u64 flags;
+};
+
 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
 struct hv_tlb_flush {
 	u64 address_space;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index acebb808c4b5..00ddb0c9e612 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -55,6 +55,7 @@
 #define KVM_REQ_TRIPLE_FAULT		KVM_ARCH_REQ(2)
 #define KVM_REQ_MMU_SYNC		KVM_ARCH_REQ(3)
 #define KVM_REQ_CLOCK_UPDATE		KVM_ARCH_REQ(4)
+#define KVM_REQ_LOAD_CR3		KVM_ARCH_REQ(5)
 #define KVM_REQ_EVENT			KVM_ARCH_REQ(6)
 #define KVM_REQ_APF_HALT		KVM_ARCH_REQ(7)
 #define KVM_REQ_STEAL_UPDATE		KVM_ARCH_REQ(8)
@@ -76,13 +77,13 @@
 #define KVM_REQ_HV_EXIT			KVM_ARCH_REQ(21)
 #define KVM_REQ_HV_STIMER		KVM_ARCH_REQ(22)
 #define KVM_REQ_LOAD_EOI_EXITMAP	KVM_ARCH_REQ(23)
+#define KVM_REQ_GET_VMCS12_PAGES	KVM_ARCH_REQ(24)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
 			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
 			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 
-#define CR3_PCID_INVD		 BIT_64(63)
 #define CR4_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
@@ -326,6 +327,16 @@ struct rsvd_bits_validate {
 	u64 bad_mt_xwr;
 };
 
+struct kvm_mmu_root_info {
+	gpa_t cr3;
+	hpa_t hpa;
+};
+
+#define KVM_MMU_ROOT_INFO_INVALID \
+	((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+
+#define KVM_MMU_NUM_PREV_ROOTS 3
+
 /*
  * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
  * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
@@ -345,7 +356,7 @@ struct kvm_mmu {
 			       struct x86_exception *exception);
 	int (*sync_page)(struct kvm_vcpu *vcpu,
 			 struct kvm_mmu_page *sp);
-	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
+	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
 	void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			   u64 *spte, const void *pte);
 	hpa_t root_hpa;
@@ -354,6 +365,7 @@ struct kvm_mmu {
 	u8 shadow_root_level;
 	u8 ept_ad;
 	bool direct_map;
+	struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
 
 	/*
 	 * Bitmap; bit set = permission fault
@@ -978,6 +990,15 @@ struct kvm_x86_ops {
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+	int  (*tlb_remote_flush)(struct kvm *kvm);
+
+	/*
+	 * Flush any TLB entries associated with the given GVA.
+	 * Does not need to flush GPA->HPA mappings.
+	 * Can potentially get non-canonical addresses through INVLPGs, which
+	 * the implementation may choose to ignore if appropriate.
+	 */
+	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
 
 	void (*run)(struct kvm_vcpu *vcpu);
 	int (*handle_exit)(struct kvm_vcpu *vcpu);
@@ -1090,6 +1111,14 @@ struct kvm_x86_ops {
 
 	void (*setup_mce)(struct kvm_vcpu *vcpu);
 
+	int (*get_nested_state)(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				unsigned user_data_size);
+	int (*set_nested_state)(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				struct kvm_nested_state *kvm_state);
+	void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+
 	int (*smi_allowed)(struct kvm_vcpu *vcpu);
 	int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
 	int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
@@ -1122,6 +1151,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 	return kvm_x86_ops->vm_free(kvm);
 }
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+{
+	if (kvm_x86_ops->tlb_remote_flush &&
+	    !kvm_x86_ops->tlb_remote_flush(kvm))
+		return 0;
+	else
+		return -ENOTSUPP;
+}
+
 int kvm_mmu_module_init(void);
 void kvm_mmu_module_exit(void);
 
@@ -1273,6 +1312,10 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
 	return !!(*irq_state);
 }
 
+#define KVM_MMU_ROOT_CURRENT		BIT(0)
+#define KVM_MMU_ROOT_PREVIOUS(i)	BIT(1+i)
+#define KVM_MMU_ROOTS_ALL		(~0UL)
+
 int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
 void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
 
@@ -1284,7 +1327,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 			   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1303,7 +1346,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
 		       void *insn, int insn_len);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
+void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
 
 void kvm_enable_tdp(void);
 void kvm_disable_tdp(void);
@@ -1418,6 +1462,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+    		    unsigned long ipi_bitmap_high, int min,
+		    unsigned long icr, int op_64_bit);
+
 u64 kvm_get_arch_capabilities(void);
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index e1771a1987dd..f37704497d8f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -347,6 +347,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs);
 void set_hv_tscchange_cb(void (*cb)(void));
 void clear_hv_tscchange_cb(void);
 void hyperv_stop_tsc_emulation(void);
+int hyperv_flush_guest_mapping(u64 as);
 
 #ifdef CONFIG_X86_64
 void hv_apic_init(void);
@@ -366,6 +367,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
 {
 	return NULL;
 }
+static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
 #endif /* CONFIG_HYPERV */
 
 #ifdef CONFIG_HYPERV_TSCPAGE
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index 9c0d4b588e3f..2e6245a023ef 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -28,6 +28,20 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others,
 		      __entry->addr, __entry->end)
 	);
 
+TRACE_EVENT(hyperv_nested_flush_guest_mapping,
+	    TP_PROTO(u64 as, int ret),
+	    TP_ARGS(as, ret),
+
+	    TP_STRUCT__entry(
+		    __field(u64, as)
+		    __field(int, ret)
+		    ),
+	    TP_fast_assign(__entry->as = as;
+			   __entry->ret = ret;
+		    ),
+	    TP_printk("address space %llx ret %d", __entry->as, __entry->ret)
+	);
+
 TRACE_EVENT(hyperv_send_ipi_mask,
 	    TP_PROTO(const struct cpumask *cpus,
 		     int vector),
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 95f9107449bf..9527ba5d62da 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -74,6 +74,7 @@
 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
 #define SECONDARY_EXEC_ENABLE_VMFUNC            0x00002000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
+#define SECONDARY_EXEC_ENCLS_EXITING		0x00008000
 #define SECONDARY_EXEC_RDSEED_EXITING		0x00010000
 #define SECONDARY_EXEC_ENABLE_PML               0x00020000
 #define SECONDARY_EXEC_XSAVES			0x00100000
@@ -213,6 +214,8 @@ enum vmcs_field {
 	VMWRITE_BITMAP_HIGH             = 0x00002029,
 	XSS_EXIT_BITMAP                 = 0x0000202C,
 	XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
+	ENCLS_EXITING_BITMAP		= 0x0000202E,
+	ENCLS_EXITING_BITMAP_HIGH	= 0x0000202F,
 	TSC_MULTIPLIER                  = 0x00002032,
 	TSC_MULTIPLIER_HIGH             = 0x00002033,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index c535c2fdea13..86299efa804a 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -378,4 +378,41 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
 
+#define KVM_STATE_NESTED_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+
+#define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_SMM_VMXON	0x00000002
+
+struct kvm_vmx_nested_state {
+	__u64 vmxon_pa;
+	__u64 vmcs_pa;
+
+	struct {
+		__u16 flags;
+	} smm;
+};
+
+/* for KVM_CAP_NESTED_STATE */
+struct kvm_nested_state {
+	/* KVM_STATE_* flags */
+	__u16 flags;
+
+	/* 0 for VMX, 1 for SVM.  */
+	__u16 format;
+
+	/* 128 for SVM, 128 + VMCS size for VMX.  */
+	__u32 size;
+
+	union {
+		/* VMXON, VMCS */
+		struct kvm_vmx_nested_state vmx;
+
+		/* Pad the header to 128 bytes.  */
+		__u8 pad[120];
+	};
+
+	__u8 data[0];
+};
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 0ede697c3961..19980ec1a316 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -28,6 +28,7 @@
 #define KVM_FEATURE_PV_UNHALT		7
 #define KVM_FEATURE_PV_TLB_FLUSH	9
 #define KVM_FEATURE_ASYNC_PF_VMEXIT	10
+#define KVM_FEATURE_PV_SEND_IPI	11
 
 #define KVM_HINTS_REALTIME      0
 
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index dde437f5d14f..158ad1483c43 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -108,7 +108,7 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
 			cx->type);
 	}
 	snprintf(cx->desc,
-			ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
+			ACPI_CX_DESC_LEN, "ACPI FFH MWAIT 0x%x",
 			cx->address);
 out:
 	return retval;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 09aaabb2bbf1..0f471bd93417 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -444,6 +444,98 @@ static void __init sev_map_percpu_data(void)
 }
 
 #ifdef CONFIG_SMP
+#define KVM_IPI_CLUSTER_SIZE	(2 * BITS_PER_LONG)
+
+static void __send_ipi_mask(const struct cpumask *mask, int vector)
+{
+	unsigned long flags;
+	int cpu, apic_id, icr;
+	int min = 0, max = 0;
+#ifdef CONFIG_X86_64
+	__uint128_t ipi_bitmap = 0;
+#else
+	u64 ipi_bitmap = 0;
+#endif
+
+	if (cpumask_empty(mask))
+		return;
+
+	local_irq_save(flags);
+
+	switch (vector) {
+	default:
+		icr = APIC_DM_FIXED | vector;
+		break;
+	case NMI_VECTOR:
+		icr = APIC_DM_NMI;
+		break;
+	}
+
+	for_each_cpu(cpu, mask) {
+		apic_id = per_cpu(x86_cpu_to_apicid, cpu);
+		if (!ipi_bitmap) {
+			min = max = apic_id;
+		} else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
+			ipi_bitmap <<= min - apic_id;
+			min = apic_id;
+		} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
+			max = apic_id < max ? max : apic_id;
+		} else {
+			kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+				(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+			min = max = apic_id;
+			ipi_bitmap = 0;
+		}
+		__set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
+	}
+
+	if (ipi_bitmap) {
+		kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+			(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+	}
+
+	local_irq_restore(flags);
+}
+
+static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+	__send_ipi_mask(mask, vector);
+}
+
+static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	struct cpumask new_mask;
+	const struct cpumask *local_mask;
+
+	cpumask_copy(&new_mask, mask);
+	cpumask_clear_cpu(this_cpu, &new_mask);
+	local_mask = &new_mask;
+	__send_ipi_mask(local_mask, vector);
+}
+
+static void kvm_send_ipi_allbutself(int vector)
+{
+	kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
+}
+
+static void kvm_send_ipi_all(int vector)
+{
+	__send_ipi_mask(cpu_online_mask, vector);
+}
+
+/*
+ * Set the IPI entry points
+ */
+static void kvm_setup_pv_ipi(void)
+{
+	apic->send_IPI_mask = kvm_send_ipi_mask;
+	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
+	apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
+	apic->send_IPI_all = kvm_send_ipi_all;
+	pr_info("KVM setup pv IPIs\n");
+}
+
 static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
 {
 	native_smp_prepare_cpus(max_cpus);
@@ -611,13 +703,27 @@ static uint32_t __init kvm_detect(void)
 	return kvm_cpuid_base();
 }
 
+static void __init kvm_apic_init(void)
+{
+#if defined(CONFIG_SMP)
+	if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
+		kvm_setup_pv_ipi();
+#endif
+}
+
+static void __init kvm_init_platform(void)
+{
+	kvmclock_init();
+	x86_platform.apic_post_init = kvm_apic_init;
+}
+
 const __initconst struct hypervisor_x86 x86_hyper_kvm = {
 	.name			= "KVM",
 	.detect			= kvm_detect,
 	.type			= X86_HYPER_KVM,
-	.init.init_platform	= kvmclock_init,
 	.init.guest_late_init	= kvm_guest_init,
 	.init.x2apic_available	= kvm_para_available,
+	.init.init_platform	= kvm_init_platform,
 };
 
 static __init int activate_jump_labels(void)
@@ -736,6 +842,10 @@ void __init kvm_spinlock_init(void)
 	if (kvm_para_has_hint(KVM_HINTS_REALTIME))
 		return;
 
+	/* Don't use the pvqspinlock code if there is only 1 vCPU. */
+	if (num_possible_cpus() == 1)
+		return;
+
 	__pv_init_lock_hash();
 	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
 	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e042e3d47fd..7bcfa61375c0 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
 			     (1 << KVM_FEATURE_PV_UNHALT) |
 			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
-			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
+			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
+			     (1 << KVM_FEATURE_PV_SEND_IPI);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4c4f4263420c..106482da6388 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4191,7 +4191,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 				maxphyaddr = 36;
 			rsvd = rsvd_bits(maxphyaddr, 63);
 			if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE)
-				rsvd &= ~CR3_PCID_INVD;
+				rsvd &= ~X86_CR3_PCID_NOFLUSH;
 		}
 
 		if (new_val & rsvd)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index af8caf965baa..01d209ab5481 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -235,7 +235,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
 	int ret;
 
-	if (!synic->active)
+	if (!synic->active && !host)
 		return 1;
 
 	trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
@@ -295,11 +295,12 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 	return ret;
 }
 
-static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
+static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
+			 bool host)
 {
 	int ret;
 
-	if (!synic->active)
+	if (!synic->active && !host)
 		return 1;
 
 	ret = 0;
@@ -1014,6 +1015,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 	case HV_X64_MSR_TSC_EMULATION_STATUS:
 		hv->hv_tsc_emulation_status = data;
 		break;
+	case HV_X64_MSR_TIME_REF_COUNT:
+		/* read-only, but still ignore it if host-initiated */
+		if (!host)
+			return 1;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
@@ -1101,6 +1107,12 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 		return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
 					data, host);
 	}
+	case HV_X64_MSR_TSC_FREQUENCY:
+	case HV_X64_MSR_APIC_FREQUENCY:
+		/* read-only, but still ignore it if host-initiated */
+		if (!host)
+			return 1;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
@@ -1156,7 +1168,8 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	return 0;
 }
 
-static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
+			  bool host)
 {
 	u64 data = 0;
 	struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
@@ -1183,7 +1196,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_SIMP:
 	case HV_X64_MSR_EOM:
 	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
-		return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
+		return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host);
 	case HV_X64_MSR_STIMER0_CONFIG:
 	case HV_X64_MSR_STIMER1_CONFIG:
 	case HV_X64_MSR_STIMER2_CONFIG:
@@ -1229,7 +1242,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 		return kvm_hv_set_msr(vcpu, msr, data, host);
 }
 
-int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
 {
 	if (kvm_hv_msr_partition_wide(msr)) {
 		int r;
@@ -1239,7 +1252,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
 		return r;
 	} else
-		return kvm_hv_get_msr(vcpu, msr, pdata);
+		return kvm_hv_get_msr(vcpu, msr, pdata, host);
 }
 
 static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 837465d69c6d..d6aa969e20f1 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -48,7 +48,7 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
 }
 
 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
-int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host);
 
 bool kvm_hv_hypercall_enabled(struct kvm *kvm);
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d536d457517b..0cefba28c864 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 			irq->level, irq->trig_mode, dest_map);
 }
 
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+    		    unsigned long ipi_bitmap_high, int min,
+		    unsigned long icr, int op_64_bit)
+{
+	int i;
+	struct kvm_apic_map *map;
+	struct kvm_vcpu *vcpu;
+	struct kvm_lapic_irq irq = {0};
+	int cluster_size = op_64_bit ? 64 : 32;
+	int count = 0;
+
+	irq.vector = icr & APIC_VECTOR_MASK;
+	irq.delivery_mode = icr & APIC_MODE_MASK;
+	irq.level = (icr & APIC_INT_ASSERT) != 0;
+	irq.trig_mode = icr & APIC_INT_LEVELTRIG;
+
+	if (icr & APIC_DEST_MASK)
+		return -KVM_EINVAL;
+	if (icr & APIC_SHORT_MASK)
+		return -KVM_EINVAL;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	/* Bits above cluster_size are masked in the caller.  */
+	for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
+		vcpu = map->phys_map[min + i]->vcpu;
+		count += kvm_apic_set_irq(vcpu, &irq, NULL);
+	}
+
+	min += cluster_size;
+	for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
+		vcpu = map->phys_map[min + i]->vcpu;
+		count += kvm_apic_set_irq(vcpu, &irq, NULL);
+	}
+
+	rcu_read_unlock();
+	return count;
+}
+
 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 {
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a44e568363a4..a282321329b5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -178,7 +178,24 @@ struct kvm_shadow_walk_iterator {
 	unsigned index;
 };
 
-#define for_each_shadow_entry(_vcpu, _addr, _walker)    \
+static const union kvm_mmu_page_role mmu_base_role_mask = {
+	.cr0_wp = 1,
+	.cr4_pae = 1,
+	.nxe = 1,
+	.smep_andnot_wp = 1,
+	.smap_andnot_wp = 1,
+	.smm = 1,
+	.guest_mode = 1,
+	.ad_disabled = 1,
+};
+
+#define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker)     \
+	for (shadow_walk_init_using_root(&(_walker), (_vcpu),              \
+					 (_root), (_addr));                \
+	     shadow_walk_okay(&(_walker));			           \
+	     shadow_walk_next(&(_walker)))
+
+#define for_each_shadow_entry(_vcpu, _addr, _walker)            \
 	for (shadow_walk_init(&(_walker), _vcpu, _addr);	\
 	     shadow_walk_okay(&(_walker));			\
 	     shadow_walk_next(&(_walker)))
@@ -221,7 +238,20 @@ static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
 						    PT64_EPT_EXECUTABLE_MASK;
 static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
 
+/*
+ * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order
+ * to guard against L1TF attacks.
+ */
+static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
+
+/*
+ * The number of high-order 1 bits to use in the mask above.
+ */
+static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
+
 static void mmu_spte_set(u64 *sptep, u64 spte);
+static union kvm_mmu_page_role
+kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
 
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
 {
@@ -308,9 +338,13 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
 {
 	unsigned int gen = kvm_current_mmio_generation(vcpu);
 	u64 mask = generation_mmio_spte_mask(gen);
+	u64 gpa = gfn << PAGE_SHIFT;
 
 	access &= ACC_WRITE_MASK | ACC_USER_MASK;
-	mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT;
+	mask |= shadow_mmio_value | access;
+	mask |= gpa | shadow_nonpresent_or_rsvd_mask;
+	mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
+		<< shadow_nonpresent_or_rsvd_mask_len;
 
 	trace_mark_mmio_spte(sptep, gfn, access, gen);
 	mmu_spte_set(sptep, mask);
@@ -323,8 +357,14 @@ static bool is_mmio_spte(u64 spte)
 
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
-	return (spte & ~mask) >> PAGE_SHIFT;
+	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask |
+		   shadow_nonpresent_or_rsvd_mask;
+	u64 gpa = spte & ~mask;
+
+	gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
+	       & shadow_nonpresent_or_rsvd_mask;
+
+	return gpa >> PAGE_SHIFT;
 }
 
 static unsigned get_mmio_spte_access(u64 spte)
@@ -381,7 +421,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
-static void kvm_mmu_clear_all_pte_masks(void)
+static void kvm_mmu_reset_all_pte_masks(void)
 {
 	shadow_user_mask = 0;
 	shadow_accessed_mask = 0;
@@ -391,6 +431,18 @@ static void kvm_mmu_clear_all_pte_masks(void)
 	shadow_mmio_mask = 0;
 	shadow_present_mask = 0;
 	shadow_acc_track_mask = 0;
+
+	/*
+	 * If the CPU has 46 or less physical address bits, then set an
+	 * appropriate mask to guard against L1TF attacks. Otherwise, it is
+	 * assumed that the CPU is not vulnerable to L1TF.
+	 */
+	if (boot_cpu_data.x86_phys_bits <
+	    52 - shadow_nonpresent_or_rsvd_mask_len)
+		shadow_nonpresent_or_rsvd_mask =
+			rsvd_bits(boot_cpu_data.x86_phys_bits -
+				  shadow_nonpresent_or_rsvd_mask_len,
+				  boot_cpu_data.x86_phys_bits - 1);
 }
 
 static int is_cpuid_PSE36(void)
@@ -1986,7 +2038,7 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
 {
 }
 
@@ -2117,12 +2169,8 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			    struct list_head *invalid_list)
 {
-	if (sp->role.cr4_pae != !!is_pae(vcpu)) {
-		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
-		return false;
-	}
-
-	if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
+	if (sp->role.cr4_pae != !!is_pae(vcpu)
+	    || vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
 		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
 		return false;
 	}
@@ -2392,11 +2440,12 @@ out:
 	return sp;
 }
 
-static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
-			     struct kvm_vcpu *vcpu, u64 addr)
+static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator,
+					struct kvm_vcpu *vcpu, hpa_t root,
+					u64 addr)
 {
 	iterator->addr = addr;
-	iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
+	iterator->shadow_addr = root;
 	iterator->level = vcpu->arch.mmu.shadow_root_level;
 
 	if (iterator->level == PT64_ROOT_4LEVEL &&
@@ -2405,6 +2454,12 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
 		--iterator->level;
 
 	if (iterator->level == PT32E_ROOT_LEVEL) {
+		/*
+		 * prev_root is currently only used for 64-bit hosts. So only
+		 * the active root_hpa is valid here.
+		 */
+		BUG_ON(root != vcpu->arch.mmu.root_hpa);
+
 		iterator->shadow_addr
 			= vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
 		iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
@@ -2414,6 +2469,13 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
 	}
 }
 
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+			     struct kvm_vcpu *vcpu, u64 addr)
+{
+	shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu.root_hpa,
+				    addr);
+}
+
 static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
 {
 	if (iterator->level < PT_PAGE_TABLE_LEVEL)
@@ -2702,6 +2764,45 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 		kvm_unsync_page(vcpu, sp);
 	}
 
+	/*
+	 * We need to ensure that the marking of unsync pages is visible
+	 * before the SPTE is updated to allow writes because
+	 * kvm_mmu_sync_roots() checks the unsync flags without holding
+	 * the MMU lock and so can race with this. If the SPTE was updated
+	 * before the page had been marked as unsync-ed, something like the
+	 * following could happen:
+	 *
+	 * CPU 1                    CPU 2
+	 * ---------------------------------------------------------------------
+	 * 1.2 Host updates SPTE
+	 *     to be writable
+	 *                      2.1 Guest writes a GPTE for GVA X.
+	 *                          (GPTE being in the guest page table shadowed
+	 *                           by the SP from CPU 1.)
+	 *                          This reads SPTE during the page table walk.
+	 *                          Since SPTE.W is read as 1, there is no
+	 *                          fault.
+	 *
+	 *                      2.2 Guest issues TLB flush.
+	 *                          That causes a VM Exit.
+	 *
+	 *                      2.3 kvm_mmu_sync_pages() reads sp->unsync.
+	 *                          Since it is false, so it just returns.
+	 *
+	 *                      2.4 Guest accesses GVA X.
+	 *                          Since the mapping in the SP was not updated,
+	 *                          so the old mapping for GVA X incorrectly
+	 *                          gets used.
+	 * 1.1 Host marks SP
+	 *     as unsync
+	 *     (sp->unsync = true)
+	 *
+	 * The write barrier below ensures that 1.1 happens before 1.2 and thus
+	 * the situation in 2.4 does not arise. The implicit barrier in 2.2
+	 * pairs with this write barrier.
+	 */
+	smp_wmb();
+
 	return false;
 }
 
@@ -2724,6 +2825,10 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
 	return true;
 }
 
+/* Bits which may be returned by set_spte() */
+#define SET_SPTE_WRITE_PROTECTED_PT	BIT(0)
+#define SET_SPTE_NEED_REMOTE_TLB_FLUSH	BIT(1)
+
 static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		    unsigned pte_access, int level,
 		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
@@ -2800,7 +2905,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %llx, marking ro\n",
 				 __func__, gfn);
-			ret = 1;
+			ret |= SET_SPTE_WRITE_PROTECTED_PT;
 			pte_access &= ~ACC_WRITE_MASK;
 			spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
 		}
@@ -2816,7 +2921,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 set_pte:
 	if (mmu_spte_update(sptep, spte))
-		kvm_flush_remote_tlbs(vcpu->kvm);
+		ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
 done:
 	return ret;
 }
@@ -2827,7 +2932,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
 {
 	int was_rmapped = 0;
 	int rmap_count;
+	int set_spte_ret;
 	int ret = RET_PF_RETRY;
+	bool flush = false;
 
 	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
 		 *sptep, write_fault, gfn);
@@ -2844,22 +2951,25 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
 
 			child = page_header(pte & PT64_BASE_ADDR_MASK);
 			drop_parent_pte(child, sptep);
-			kvm_flush_remote_tlbs(vcpu->kvm);
+			flush = true;
 		} else if (pfn != spte_to_pfn(*sptep)) {
 			pgprintk("hfn old %llx new %llx\n",
 				 spte_to_pfn(*sptep), pfn);
 			drop_spte(vcpu->kvm, sptep);
-			kvm_flush_remote_tlbs(vcpu->kvm);
+			flush = true;
 		} else
 			was_rmapped = 1;
 	}
 
-	if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
-	      true, host_writable)) {
+	set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn,
+				speculative, true, host_writable);
+	if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
 		if (write_fault)
 			ret = RET_PF_EMULATE;
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	}
+	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush)
+		kvm_flush_remote_tlbs(vcpu->kvm);
 
 	if (unlikely(is_mmio_spte(*sptep)))
 		ret = RET_PF_EMULATE;
@@ -3358,26 +3468,47 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 	*root_hpa = INVALID_PAGE;
 }
 
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+/* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
 {
 	int i;
 	LIST_HEAD(invalid_list);
 	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+	bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
 
-	if (!VALID_PAGE(mmu->root_hpa))
-		return;
+	BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
+
+	/* Before acquiring the MMU lock, see if we need to do any real work. */
+	if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
+			    VALID_PAGE(mmu->prev_roots[i].hpa))
+				break;
+
+		if (i == KVM_MMU_NUM_PREV_ROOTS)
+			return;
+	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 
-	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
-	    (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
-		mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
-	} else {
-		for (i = 0; i < 4; ++i)
-			if (mmu->pae_root[i] != 0)
-				mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i],
-						   &invalid_list);
-		mmu->root_hpa = INVALID_PAGE;
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
+			mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
+					   &invalid_list);
+
+	if (free_active_root) {
+		if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+		    (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
+			mmu_free_root_page(vcpu->kvm, &mmu->root_hpa,
+					   &invalid_list);
+		} else {
+			for (i = 0; i < 4; ++i)
+				if (mmu->pae_root[i] != 0)
+					mmu_free_root_page(vcpu->kvm,
+							   &mmu->pae_root[i],
+							   &invalid_list);
+			mmu->root_hpa = INVALID_PAGE;
+		}
 	}
 
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@@ -3546,7 +3677,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
 		return mmu_alloc_shadow_roots(vcpu);
 }
 
-static void mmu_sync_roots(struct kvm_vcpu *vcpu)
+void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	int i;
 	struct kvm_mmu_page *sp;
@@ -3558,14 +3689,39 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
-	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+
 	if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
+
 		sp = page_header(root);
+
+		/*
+		 * Even if another CPU was marking the SP as unsync-ed
+		 * simultaneously, any guest page table changes are not
+		 * guaranteed to be visible anyway until this VCPU issues a TLB
+		 * flush strictly after those changes are made. We only need to
+		 * ensure that the other CPU sets these flags before any actual
+		 * changes to the page tables are made. The comments in
+		 * mmu_need_write_protect() describe what could go wrong if this
+		 * requirement isn't satisfied.
+		 */
+		if (!smp_load_acquire(&sp->unsync) &&
+		    !smp_load_acquire(&sp->unsync_children))
+			return;
+
+		spin_lock(&vcpu->kvm->mmu_lock);
+		kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+
 		mmu_sync_children(vcpu, sp);
+
 		kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
+		spin_unlock(&vcpu->kvm->mmu_lock);
 		return;
 	}
+
+	spin_lock(&vcpu->kvm->mmu_lock);
+	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+
 	for (i = 0; i < 4; ++i) {
 		hpa_t root = vcpu->arch.mmu.pae_root[i];
 
@@ -3575,13 +3731,8 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 			mmu_sync_children(vcpu, sp);
 		}
 	}
-	kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
-}
 
-void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
-{
-	spin_lock(&vcpu->kvm->mmu_lock);
-	mmu_sync_roots(vcpu);
+	kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
@@ -3948,16 +4099,107 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->update_pte = nonpaging_update_pte;
 	context->root_level = 0;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
-	context->root_hpa = INVALID_PAGE;
 	context->direct_map = true;
 	context->nx = false;
 }
 
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
+/*
+ * Find out if a previously cached root matching the new CR3/role is available.
+ * The current root is also inserted into the cache.
+ * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
+ * returned.
+ * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
+ * false is returned. This root should now be freed by the caller.
+ */
+static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+				  union kvm_mmu_page_role new_role)
+{
+	uint i;
+	struct kvm_mmu_root_info root;
+	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+	root.cr3 = mmu->get_cr3(vcpu);
+	root.hpa = mmu->root_hpa;
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+		swap(root, mmu->prev_roots[i]);
+
+		if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
+		    page_header(root.hpa) != NULL &&
+		    new_role.word == page_header(root.hpa)->role.word)
+			break;
+	}
+
+	mmu->root_hpa = root.hpa;
+
+	return i < KVM_MMU_NUM_PREV_ROOTS;
+}
+
+static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+			    union kvm_mmu_page_role new_role,
+			    bool skip_tlb_flush)
 {
-	kvm_mmu_free_roots(vcpu);
+	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+	/*
+	 * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
+	 * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
+	 * later if necessary.
+	 */
+	if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+	    mmu->root_level >= PT64_ROOT_4LEVEL) {
+		if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
+			return false;
+
+		if (cached_root_available(vcpu, new_cr3, new_role)) {
+			/*
+			 * It is possible that the cached previous root page is
+			 * obsolete because of a change in the MMU
+			 * generation number. However, that is accompanied by
+			 * KVM_REQ_MMU_RELOAD, which will free the root that we
+			 * have set here and allocate a new one.
+			 */
+
+			kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
+			if (!skip_tlb_flush) {
+				kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+				kvm_x86_ops->tlb_flush(vcpu, true);
+			}
+
+			/*
+			 * The last MMIO access's GVA and GPA are cached in the
+			 * VCPU. When switching to a new CR3, that GVA->GPA
+			 * mapping may no longer be valid. So clear any cached
+			 * MMIO info even when we don't need to sync the shadow
+			 * page tables.
+			 */
+			vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
+
+			__clear_sp_write_flooding_count(
+				page_header(mmu->root_hpa));
+
+			return true;
+		}
+	}
+
+	return false;
 }
 
+static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+			      union kvm_mmu_page_role new_role,
+			      bool skip_tlb_flush)
+{
+	if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush))
+		kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_CURRENT);
+}
+
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush)
+{
+	__kvm_mmu_new_cr3(vcpu, new_cr3, kvm_mmu_calc_root_page_role(vcpu),
+			  skip_tlb_flush);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3);
+
 static unsigned long get_cr3(struct kvm_vcpu *vcpu)
 {
 	return kvm_read_cr3(vcpu);
@@ -4432,7 +4674,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->invlpg = paging64_invlpg;
 	context->update_pte = paging64_update_pte;
 	context->shadow_root_level = level;
-	context->root_hpa = INVALID_PAGE;
 	context->direct_map = false;
 }
 
@@ -4462,7 +4703,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
 	context->invlpg = paging32_invlpg;
 	context->update_pte = paging32_update_pte;
 	context->shadow_root_level = PT32E_ROOT_LEVEL;
-	context->root_hpa = INVALID_PAGE;
 	context->direct_map = false;
 }
 
@@ -4472,20 +4712,32 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
 	paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
 }
 
+static union kvm_mmu_page_role
+kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu)
+{
+	union kvm_mmu_page_role role = {0};
+
+	role.guest_mode = is_guest_mode(vcpu);
+	role.smm = is_smm(vcpu);
+	role.ad_disabled = (shadow_accessed_mask == 0);
+	role.level = kvm_x86_ops->get_tdp_level(vcpu);
+	role.direct = true;
+	role.access = ACC_ALL;
+
+	return role;
+}
+
 static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
-	context->base_role.word = 0;
-	context->base_role.guest_mode = is_guest_mode(vcpu);
-	context->base_role.smm = is_smm(vcpu);
-	context->base_role.ad_disabled = (shadow_accessed_mask == 0);
+	context->base_role.word = mmu_base_role_mask.word &
+				  kvm_calc_tdp_mmu_root_page_role(vcpu).word;
 	context->page_fault = tdp_page_fault;
 	context->sync_page = nonpaging_sync_page;
 	context->invlpg = nonpaging_invlpg;
 	context->update_pte = nonpaging_update_pte;
 	context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
-	context->root_hpa = INVALID_PAGE;
 	context->direct_map = true;
 	context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
 	context->get_cr3 = get_cr3;
@@ -4520,13 +4772,36 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
-void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
+static union kvm_mmu_page_role
+kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu)
 {
+	union kvm_mmu_page_role role = {0};
 	bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
 	bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
-	struct kvm_mmu *context = &vcpu->arch.mmu;
 
-	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
+	role.nxe = is_nx(vcpu);
+	role.cr4_pae = !!is_pae(vcpu);
+	role.cr0_wp  = is_write_protection(vcpu);
+	role.smep_andnot_wp = smep && !is_write_protection(vcpu);
+	role.smap_andnot_wp = smap && !is_write_protection(vcpu);
+	role.guest_mode = is_guest_mode(vcpu);
+	role.smm = is_smm(vcpu);
+	role.direct = !is_paging(vcpu);
+	role.access = ACC_ALL;
+
+	if (!is_long_mode(vcpu))
+		role.level = PT32E_ROOT_LEVEL;
+	else if (is_la57_mode(vcpu))
+		role.level = PT64_ROOT_5LEVEL;
+	else
+		role.level = PT64_ROOT_4LEVEL;
+
+	return role;
+}
+
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu *context = &vcpu->arch.mmu;
 
 	if (!is_paging(vcpu))
 		nonpaging_init_context(vcpu, context);
@@ -4537,26 +4812,34 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 	else
 		paging32_init_context(vcpu, context);
 
-	context->base_role.nxe = is_nx(vcpu);
-	context->base_role.cr4_pae = !!is_pae(vcpu);
-	context->base_role.cr0_wp  = is_write_protection(vcpu);
-	context->base_role.smep_andnot_wp
-		= smep && !is_write_protection(vcpu);
-	context->base_role.smap_andnot_wp
-		= smap && !is_write_protection(vcpu);
-	context->base_role.guest_mode = is_guest_mode(vcpu);
-	context->base_role.smm = is_smm(vcpu);
+	context->base_role.word = mmu_base_role_mask.word &
+				  kvm_calc_shadow_mmu_root_page_role(vcpu).word;
 	reset_shadow_zero_bits_mask(vcpu, context);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
+static union kvm_mmu_page_role
+kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
+{
+	union kvm_mmu_page_role role = vcpu->arch.mmu.base_role;
+
+	role.level = PT64_ROOT_4LEVEL;
+	role.direct = false;
+	role.ad_disabled = !accessed_dirty;
+	role.guest_mode = true;
+	role.access = ACC_ALL;
+
+	return role;
+}
+
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
-			     bool accessed_dirty)
+			     bool accessed_dirty, gpa_t new_eptp)
 {
 	struct kvm_mmu *context = &vcpu->arch.mmu;
+	union kvm_mmu_page_role root_page_role =
+		kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty);
 
-	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
-
+	__kvm_mmu_new_cr3(vcpu, new_eptp, root_page_role, false);
 	context->shadow_root_level = PT64_ROOT_4LEVEL;
 
 	context->nx = true;
@@ -4567,10 +4850,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->invlpg = ept_invlpg;
 	context->update_pte = ept_update_pte;
 	context->root_level = PT64_ROOT_4LEVEL;
-	context->root_hpa = INVALID_PAGE;
 	context->direct_map = false;
-	context->base_role.ad_disabled = !accessed_dirty;
-	context->base_role.guest_mode = 1;
+	context->base_role.word = root_page_role.word & mmu_base_role_mask.word;
 	update_permission_bitmask(vcpu, context, true);
 	update_pkru_bitmask(vcpu, context, true);
 	update_last_nonleaf_level(vcpu, context);
@@ -4633,8 +4914,17 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	update_last_nonleaf_level(vcpu, g_context);
 }
 
-static void init_kvm_mmu(struct kvm_vcpu *vcpu)
+void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
 {
+	if (reset_roots) {
+		uint i;
+
+		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+	}
+
 	if (mmu_is_nested(vcpu))
 		init_kvm_nested_mmu(vcpu);
 	else if (tdp_enabled)
@@ -4642,11 +4932,21 @@ static void init_kvm_mmu(struct kvm_vcpu *vcpu)
 	else
 		init_kvm_softmmu(vcpu);
 }
+EXPORT_SYMBOL_GPL(kvm_init_mmu);
+
+static union kvm_mmu_page_role
+kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
+{
+	if (tdp_enabled)
+		return kvm_calc_tdp_mmu_root_page_role(vcpu);
+	else
+		return kvm_calc_shadow_mmu_root_page_role(vcpu);
+}
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_unload(vcpu);
-	init_kvm_mmu(vcpu);
+	kvm_init_mmu(vcpu, true);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
 
@@ -4661,8 +4961,8 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
 	kvm_mmu_sync_roots(vcpu);
 	if (r)
 		goto out;
-	/* set_cr3() should ensure TLB has been flushed */
-	vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+	kvm_mmu_load_cr3(vcpu);
+	kvm_x86_ops->tlb_flush(vcpu, true);
 out:
 	return r;
 }
@@ -4670,7 +4970,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
-	kvm_mmu_free_roots(vcpu);
+	kvm_mmu_free_roots(vcpu, KVM_MMU_ROOTS_ALL);
 	WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@ -4823,16 +5123,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	u64 entry, gentry, *spte;
 	int npte;
 	bool remote_flush, local_flush;
-	union kvm_mmu_page_role mask = { };
-
-	mask.cr0_wp = 1;
-	mask.cr4_pae = 1;
-	mask.nxe = 1;
-	mask.smep_andnot_wp = 1;
-	mask.smap_andnot_wp = 1;
-	mask.smm = 1;
-	mask.guest_mode = 1;
-	mask.ad_disabled = 1;
 
 	/*
 	 * If we don't have indirect shadow pages, it means no page is
@@ -4876,7 +5166,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 			mmu_page_zap_pte(vcpu->kvm, sp, spte);
 			if (gentry &&
 			      !((sp->role.word ^ vcpu->arch.mmu.base_role.word)
-			      & mask.word) && rmap_can_add(vcpu))
+			      & mmu_base_role_mask.word) && rmap_can_add(vcpu))
 				mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
 			if (need_remote_flush(entry, *spte))
 				remote_flush = true;
@@ -5001,12 +5291,67 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	vcpu->arch.mmu.invlpg(vcpu, gva);
-	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+	int i;
+
+	/* INVLPG on a * non-canonical address is a NOP according to the SDM.  */
+	if (is_noncanonical_address(gva, vcpu))
+		return;
+
+	mmu->invlpg(vcpu, gva, mmu->root_hpa);
+
+	/*
+	 * INVLPG is required to invalidate any global mappings for the VA,
+	 * irrespective of PCID. Since it would take us roughly similar amount
+	 * of work to determine whether any of the prev_root mappings of the VA
+	 * is marked global, or to just sync it blindly, so we might as well
+	 * just always sync it.
+	 *
+	 * Mappings not reachable via the current cr3 or the prev_roots will be
+	 * synced when switching to that cr3, so nothing needs to be done here
+	 * for them.
+	 */
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		if (VALID_PAGE(mmu->prev_roots[i].hpa))
+			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+
+	kvm_x86_ops->tlb_flush_gva(vcpu, gva);
 	++vcpu->stat.invlpg;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
 
+void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
+{
+	struct kvm_mmu *mmu = &vcpu->arch.mmu;
+	bool tlb_flush = false;
+	uint i;
+
+	if (pcid == kvm_get_active_pcid(vcpu)) {
+		mmu->invlpg(vcpu, gva, mmu->root_hpa);
+		tlb_flush = true;
+	}
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+		if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
+		    pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
+			mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+			tlb_flush = true;
+		}
+	}
+
+	if (tlb_flush)
+		kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+
+	++vcpu->stat.invlpg;
+
+	/*
+	 * Mappings not reachable via the current cr3 or the prev_roots will be
+	 * synced when switching to that cr3, so nothing needs to be done here
+	 * for them.
+	 */
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
+
 void kvm_enable_tdp(void)
 {
 	tdp_enabled = true;
@@ -5030,6 +5375,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 	struct page *page;
 	int i;
 
+	if (tdp_enabled)
+		return 0;
+
 	/*
 	 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
 	 * Therefore we need to allocate shadow page tables in the first
@@ -5048,11 +5396,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
+	uint i;
+
 	vcpu->arch.walk_mmu = &vcpu->arch.mmu;
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 	vcpu->arch.mmu.translate_gpa = translate_gpa;
 	vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
 
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+
 	return alloc_mmu_pages(vcpu);
 }
 
@@ -5060,7 +5413,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
 {
 	MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
 
-	init_kvm_mmu(vcpu);
+	kvm_init_mmu(vcpu, true);
 }
 
 static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
@@ -5500,7 +5853,7 @@ int kvm_mmu_module_init(void)
 {
 	int ret = -ENOMEM;
 
-	kvm_mmu_clear_all_pte_masks();
+	kvm_mmu_reset_all_pte_masks();
 
 	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
 					    sizeof(struct pte_list_desc),
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 5b408c0ad612..1fab69c0b2f3 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -61,9 +61,10 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
 
+void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots);
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
-			     bool accessed_dirty);
+			     bool accessed_dirty, gpa_t new_eptp);
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
 int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
 				u64 fault_address, char *insn, int insn_len);
@@ -85,6 +86,27 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 	return kvm_mmu_load(vcpu);
 }
 
+static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
+{
+	BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0);
+
+	return kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)
+	       ? cr3 & X86_CR3_PCID_MASK
+	       : 0;
+}
+
+static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
+{
+	return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
+}
+
+static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
+{
+	if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
+		vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa |
+					     kvm_get_active_pcid(vcpu));
+}
+
 /*
  * Currently, we have two sorts of write-protection, a) the first one
  * write-protects guest page to sync the guest modification, b) another one is
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6288e9d7068e..14ffd973df54 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -181,7 +181,7 @@ no_present:
  * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
  * to signify readability since it isn't used in the EPT case
  */
-static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
+static inline unsigned FNAME(gpte_access)(u64 gpte)
 {
 	unsigned access;
 #if PTTYPE == PTTYPE_EPT
@@ -394,8 +394,8 @@ retry_walk:
 	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
 
 	/* Convert to ACC_*_MASK flags for struct guest_walker.  */
-	walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
-	walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask);
+	walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
+	walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
 	errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
 	if (unlikely(errcode))
 		goto error;
@@ -508,7 +508,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
 
 	gfn = gpte_to_gfn(gpte);
-	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+	pte_access = sp->role.access & FNAME(gpte_access)(gpte);
 	FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
 	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 			no_dirty_log && (pte_access & ACC_WRITE_MASK));
@@ -856,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
 	return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
 }
 
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
+static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
 {
 	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
@@ -871,13 +871,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 	 */
 	mmu_topup_memory_caches(vcpu);
 
-	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
+	if (!VALID_PAGE(root_hpa)) {
 		WARN_ON(1);
 		return;
 	}
 
 	spin_lock(&vcpu->kvm->mmu_lock);
-	for_each_shadow_entry(vcpu, gva, iterator) {
+	for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
 		level = iterator.level;
 		sptep = iterator.sptep;
 
@@ -968,6 +968,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	int i, nr_present = 0;
 	bool host_writable;
 	gpa_t first_pte_gpa;
+	int set_spte_ret = 0;
 
 	/* direct kvm_mmu_page can not be unsync. */
 	BUG_ON(sp->role.direct);
@@ -1002,7 +1003,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		gfn = gpte_to_gfn(gpte);
 		pte_access = sp->role.access;
-		pte_access &= FNAME(gpte_access)(vcpu, gpte);
+		pte_access &= FNAME(gpte_access)(gpte);
 		FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
 
 		if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
@@ -1024,12 +1025,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
 
-		set_spte(vcpu, &sp->spt[i], pte_access,
-			 PT_PAGE_TABLE_LEVEL, gfn,
-			 spte_to_pfn(sp->spt[i]), true, false,
-			 host_writable);
+		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
+					 pte_access, PT_PAGE_TABLE_LEVEL,
+					 gfn, spte_to_pfn(sp->spt[i]),
+					 true, false, host_writable);
 	}
 
+	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
+		kvm_flush_remote_tlbs(vcpu->kvm);
+
 	return nr_present;
 }
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f059a73f0fd0..6276140044d0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2884,7 +2884,6 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
 
 	svm->vmcb->control.nested_cr3 = __sme_set(root);
 	mark_dirty(svm->vmcb, VMCB_NPT);
-	svm_flush_tlb(vcpu, true);
 }
 
 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@ -5435,6 +5434,13 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
 		svm->asid_generation--;
 }
 
+static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	invlpga(gva, svm->vmcb->control.asid);
+}
+
 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 {
 }
@@ -5580,8 +5586,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	clgi();
 
-	local_irq_enable();
-
 	/*
 	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
 	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
@@ -5590,6 +5594,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	 */
 	x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
 
+	local_irq_enable();
+
 	asm volatile (
 		"push %%" _ASM_BP "; \n\t"
 		"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@@ -5712,12 +5718,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
 		svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
-	x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
-
 	reload_tss(vcpu);
 
 	local_irq_disable();
 
+	x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+
 	vcpu->arch.cr2 = svm->vmcb->save.cr2;
 	vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
 	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
@@ -5766,7 +5772,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 
 	svm->vmcb->save.cr3 = __sme_set(root);
 	mark_dirty(svm->vmcb, VMCB_CR);
-	svm_flush_tlb(vcpu, true);
 }
 
 static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
@@ -5779,8 +5784,6 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 	/* Also sync guest cr3 here in case we live migrate */
 	svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
 	mark_dirty(svm->vmcb, VMCB_CR);
-
-	svm_flush_tlb(vcpu, true);
 }
 
 static int is_disabled(void)
@@ -7090,6 +7093,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.set_rflags = svm_set_rflags,
 
 	.tlb_flush = svm_flush_tlb,
+	.tlb_flush_gva = svm_flush_tlb_gva,
 
 	.run = svm_vcpu_run,
 	.handle_exit = handle_exit,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 46b428c0990e..8dae47e7267a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -38,6 +38,7 @@
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
+#include <asm/asm.h>
 #include <asm/cpu.h>
 #include <asm/io.h>
 #include <asm/desc.h>
@@ -197,12 +198,14 @@ static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_
 
 static const struct {
 	const char *option;
-	enum vmx_l1d_flush_state cmd;
+	bool for_parse;
 } vmentry_l1d_param[] = {
-	{"auto",	VMENTER_L1D_FLUSH_AUTO},
-	{"never",	VMENTER_L1D_FLUSH_NEVER},
-	{"cond",	VMENTER_L1D_FLUSH_COND},
-	{"always",	VMENTER_L1D_FLUSH_ALWAYS},
+	[VMENTER_L1D_FLUSH_AUTO]	 = {"auto", true},
+	[VMENTER_L1D_FLUSH_NEVER]	 = {"never", true},
+	[VMENTER_L1D_FLUSH_COND]	 = {"cond", true},
+	[VMENTER_L1D_FLUSH_ALWAYS]	 = {"always", true},
+	[VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
+	[VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
 };
 
 #define L1D_CACHE_ORDER 4
@@ -218,15 +221,15 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 		return 0;
 	}
 
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-	       u64 msr;
+	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+		u64 msr;
 
-	       rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-	       if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
-		       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
-		       return 0;
-	       }
-       }
+		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+		if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+			l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+			return 0;
+		}
+	}
 
 	/* If set to auto use the default l1tf mitigation method */
 	if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
@@ -286,8 +289,9 @@ static int vmentry_l1d_flush_parse(const char *s)
 
 	if (s) {
 		for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
-			if (sysfs_streq(s, vmentry_l1d_param[i].option))
-				return vmentry_l1d_param[i].cmd;
+			if (vmentry_l1d_param[i].for_parse &&
+			    sysfs_streq(s, vmentry_l1d_param[i].option))
+				return i;
 		}
 	}
 	return -EINVAL;
@@ -297,13 +301,13 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 {
 	int l1tf, ret;
 
-	if (!boot_cpu_has(X86_BUG_L1TF))
-		return 0;
-
 	l1tf = vmentry_l1d_flush_parse(s);
 	if (l1tf < 0)
 		return l1tf;
 
+	if (!boot_cpu_has(X86_BUG_L1TF))
+		return 0;
+
 	/*
 	 * Has vmx_init() run already? If not then this is the pre init
 	 * parameter parsing. In that case just store the value and let
@@ -323,6 +327,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 
 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
 {
+	if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
+		return sprintf(s, "???\n");
+
 	return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
 }
 
@@ -332,23 +339,54 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
 };
 module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
 
+enum ept_pointers_status {
+	EPT_POINTERS_CHECK = 0,
+	EPT_POINTERS_MATCH = 1,
+	EPT_POINTERS_MISMATCH = 2
+};
+
 struct kvm_vmx {
 	struct kvm kvm;
 
 	unsigned int tss_addr;
 	bool ept_identity_pagetable_done;
 	gpa_t ept_identity_map_addr;
+
+	enum ept_pointers_status ept_pointers_match;
+	spinlock_t ept_pointer_lock;
 };
 
 #define NR_AUTOLOAD_MSRS 8
 
+struct vmcs_hdr {
+	u32 revision_id:31;
+	u32 shadow_vmcs:1;
+};
+
 struct vmcs {
-	u32 revision_id;
+	struct vmcs_hdr hdr;
 	u32 abort;
 	char data[0];
 };
 
 /*
+ * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
+ * and whose values change infrequently, but are not constant.  I.e. this is
+ * used as a write-through cache of the corresponding VMCS fields.
+ */
+struct vmcs_host_state {
+	unsigned long cr3;	/* May not match real cr3 */
+	unsigned long cr4;	/* May not match real cr4 */
+	unsigned long gs_base;
+	unsigned long fs_base;
+
+	u16           fs_sel, gs_sel, ldt_sel;
+#ifdef CONFIG_X86_64
+	u16           ds_sel, es_sel;
+#endif
+};
+
+/*
  * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
  * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
  * loaded on this CPU (so we can clear them if the CPU goes down).
@@ -359,14 +397,13 @@ struct loaded_vmcs {
 	int cpu;
 	bool launched;
 	bool nmi_known_unmasked;
-	unsigned long vmcs_host_cr3;	/* May not match real cr3 */
-	unsigned long vmcs_host_cr4;	/* May not match real cr4 */
 	/* Support for vnmi-less CPUs */
 	int soft_vnmi_blocked;
 	ktime_t entry_time;
 	s64 vnmi_blocked_time;
 	unsigned long *msr_bitmap;
 	struct list_head loaded_vmcss_on_cpu_link;
+	struct vmcs_host_state host_state;
 };
 
 struct shared_msr_entry {
@@ -397,7 +434,7 @@ struct __packed vmcs12 {
 	/* According to the Intel spec, a VMCS region must start with the
 	 * following two fields. Then follow implementation-specific data.
 	 */
-	u32 revision_id;
+	struct vmcs_hdr hdr;
 	u32 abort;
 
 	u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
@@ -565,7 +602,7 @@ struct __packed vmcs12 {
 		"Offset of " #field " in struct vmcs12 has changed.")
 
 static inline void vmx_check_vmcs12_offsets(void) {
-	CHECK_OFFSET(revision_id, 0);
+	CHECK_OFFSET(hdr, 0);
 	CHECK_OFFSET(abort, 4);
 	CHECK_OFFSET(launch_state, 8);
 	CHECK_OFFSET(io_bitmap_a, 40);
@@ -784,6 +821,12 @@ struct nested_vmx {
 	 */
 	struct vmcs12 *cached_vmcs12;
 	/*
+	 * Cache of the guest's shadow VMCS, existing outside of guest
+	 * memory. Loaded from guest memory during VM entry. Flushed
+	 * to guest memory during VM exit.
+	 */
+	struct vmcs12 *cached_shadow_vmcs12;
+	/*
 	 * Indicates if the shadow vmcs must be updated with the
 	 * data hold by vmcs12
 	 */
@@ -933,25 +976,20 @@ struct vcpu_vmx {
 	/*
 	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
 	 * non-nested (L1) guest, it always points to vmcs01. For a nested
-	 * guest (L2), it points to a different VMCS.
+	 * guest (L2), it points to a different VMCS.  loaded_cpu_state points
+	 * to the VMCS whose state is loaded into the CPU registers that only
+	 * need to be switched when transitioning to/from the kernel; a NULL
+	 * value indicates that host state is loaded.
 	 */
 	struct loaded_vmcs    vmcs01;
 	struct loaded_vmcs   *loaded_vmcs;
+	struct loaded_vmcs   *loaded_cpu_state;
 	bool                  __launched; /* temporary, used in vmx_vcpu_run */
 	struct msr_autoload {
 		struct vmx_msrs guest;
 		struct vmx_msrs host;
 	} msr_autoload;
-	struct {
-		int           loaded;
-		u16           fs_sel, gs_sel, ldt_sel;
-#ifdef CONFIG_X86_64
-		u16           ds_sel, es_sel;
-#endif
-		int           gs_ldt_reload_needed;
-		int           fs_reload_needed;
-		u64           msr_host_bndcfgs;
-	} host_state;
+
 	struct {
 		int vm86_active;
 		ulong save_rflags;
@@ -1001,6 +1039,7 @@ struct vcpu_vmx {
 	 */
 	u64 msr_ia32_feature_control;
 	u64 msr_ia32_feature_control_valid_bits;
+	u64 ept_pointer;
 };
 
 enum segment_cache_field {
@@ -1220,6 +1259,11 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
 	return to_vmx(vcpu)->nested.cached_vmcs12;
 }
 
+static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
+{
+	return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
+}
+
 static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
 static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
 static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
@@ -1490,6 +1534,48 @@ static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
 	 *	GUEST_IA32_RTIT_CTL		= 0x00002814,
 	 */
 }
+
+/* check_ept_pointer() should be under protection of ept_pointer_lock. */
+static void check_ept_pointer_match(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	u64 tmp_eptp = INVALID_PAGE;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!VALID_PAGE(tmp_eptp)) {
+			tmp_eptp = to_vmx(vcpu)->ept_pointer;
+		} else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
+			to_kvm_vmx(kvm)->ept_pointers_match
+				= EPT_POINTERS_MISMATCH;
+			return;
+		}
+	}
+
+	to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
+}
+
+static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
+{
+	int ret;
+
+	spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+
+	if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
+		check_ept_pointer_match(kvm);
+
+	if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
+		ret = -ENOTSUPP;
+		goto out;
+	}
+
+	ret = hyperv_flush_guest_mapping(
+			to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
+
+out:
+	spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+	return ret;
+}
 #else /* !IS_ENABLED(CONFIG_HYPERV) */
 static inline void evmcs_write64(unsigned long field, u64 value) {}
 static inline void evmcs_write32(unsigned long field, u32 value) {}
@@ -1604,6 +1690,12 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 }
 
+static inline bool cpu_has_vmx_encls_vmexit(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENCLS_EXITING;
+}
+
 /*
  * Comment's format: document - errata name - stepping - processor name.
  * Refer from
@@ -1864,6 +1956,12 @@ static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
 			CPU_BASED_MONITOR_TRAP_FLAG;
 }
 
+static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu)
+{
+	return to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
+		SECONDARY_EXEC_SHADOW_VMCS;
+}
+
 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
 {
 	return vmcs12->cpu_based_vm_exec_control & bit;
@@ -1944,6 +2042,11 @@ static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
 		 VMX_VMFUNC_EPTP_SWITCHING);
 }
 
+static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS);
+}
+
 static inline bool is_nmi(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1974,11 +2077,12 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
 	u64 rsvd : 48;
 	u64 gva;
     } operand = { vpid, 0, gva };
+    bool error;
 
-    asm volatile (__ex(ASM_VMX_INVVPID)
-		  /* CF==1 or ZF==1 --> rc = -1 */
-		  "; ja 1f ; ud2 ; 1:"
-		  : : "a"(&operand), "c"(ext) : "cc", "memory");
+    asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
+		  : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
+		  : "memory");
+    BUG_ON(error);
 }
 
 static inline void __invept(int ext, u64 eptp, gpa_t gpa)
@@ -1986,11 +2090,12 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
 	struct {
 		u64 eptp, gpa;
 	} operand = {eptp, gpa};
+	bool error;
 
-	asm volatile (__ex(ASM_VMX_INVEPT)
-			/* CF==1 or ZF==1 --> rc = -1 */
-			"; ja 1f ; ud2 ; 1:\n"
-			: : "a" (&operand), "c" (ext) : "cc", "memory");
+	asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na)
+		      : CC_OUT(na) (error) : "a" (&operand), "c" (ext)
+		      : "memory");
+	BUG_ON(error);
 }
 
 static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
@@ -2006,12 +2111,12 @@ static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
 static void vmcs_clear(struct vmcs *vmcs)
 {
 	u64 phys_addr = __pa(vmcs);
-	u8 error;
+	bool error;
 
-	asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
-		      : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
-		      : "cc", "memory");
-	if (error)
+	asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na)
+		      : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
+		      : "memory");
+	if (unlikely(error))
 		printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
 		       vmcs, phys_addr);
 }
@@ -2028,15 +2133,15 @@ static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
 static void vmcs_load(struct vmcs *vmcs)
 {
 	u64 phys_addr = __pa(vmcs);
-	u8 error;
+	bool error;
 
 	if (static_branch_unlikely(&enable_evmcs))
 		return evmcs_load(phys_addr);
 
-	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
-			: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
-			: "cc", "memory");
-	if (error)
+	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na)
+		      : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
+		      : "memory");
+	if (unlikely(error))
 		printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
 		       vmcs, phys_addr);
 }
@@ -2114,6 +2219,19 @@ static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
 			 __loaded_vmcs_clear, loaded_vmcs, 1);
 }
 
+static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
+{
+	if (vpid == 0)
+		return true;
+
+	if (cpu_has_vmx_invvpid_individual_addr()) {
+		__invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
+		return true;
+	}
+
+	return false;
+}
+
 static inline void vpid_sync_vcpu_single(int vpid)
 {
 	if (vpid == 0)
@@ -2248,10 +2366,10 @@ static noinline void vmwrite_error(unsigned long field, unsigned long value)
 
 static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
 {
-	u8 error;
+	bool error;
 
-	asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
-		       : "=q"(error) : "a"(value), "d"(field) : "cc");
+	asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na)
+		      : CC_OUT(na) (error) : "a"(value), "d"(field));
 	if (unlikely(error))
 		vmwrite_error(field, value);
 }
@@ -2735,121 +2853,150 @@ static unsigned long segment_base(u16 selector)
 }
 #endif
 
-static void vmx_save_host_state(struct kvm_vcpu *vcpu)
+static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs_host_state *host_state;
 #ifdef CONFIG_X86_64
 	int cpu = raw_smp_processor_id();
-	unsigned long fs_base, kernel_gs_base;
 #endif
+	unsigned long fs_base, gs_base;
+	u16 fs_sel, gs_sel;
 	int i;
 
-	if (vmx->host_state.loaded)
+	if (vmx->loaded_cpu_state)
 		return;
 
-	vmx->host_state.loaded = 1;
+	vmx->loaded_cpu_state = vmx->loaded_vmcs;
+	host_state = &vmx->loaded_cpu_state->host_state;
+
 	/*
 	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
 	 * allow segment selectors with cpl > 0 or ti == 1.
 	 */
-	vmx->host_state.ldt_sel = kvm_read_ldt();
-	vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
+	host_state->ldt_sel = kvm_read_ldt();
 
 #ifdef CONFIG_X86_64
+	savesegment(ds, host_state->ds_sel);
+	savesegment(es, host_state->es_sel);
+
+	gs_base = cpu_kernelmode_gs_base(cpu);
 	if (likely(is_64bit_mm(current->mm))) {
 		save_fsgs_for_kvm();
-		vmx->host_state.fs_sel = current->thread.fsindex;
-		vmx->host_state.gs_sel = current->thread.gsindex;
+		fs_sel = current->thread.fsindex;
+		gs_sel = current->thread.gsindex;
 		fs_base = current->thread.fsbase;
-		kernel_gs_base = current->thread.gsbase;
+		vmx->msr_host_kernel_gs_base = current->thread.gsbase;
 	} else {
-#endif
-		savesegment(fs, vmx->host_state.fs_sel);
-		savesegment(gs, vmx->host_state.gs_sel);
-#ifdef CONFIG_X86_64
+		savesegment(fs, fs_sel);
+		savesegment(gs, gs_sel);
 		fs_base = read_msr(MSR_FS_BASE);
-		kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
-	}
-#endif
-	if (!(vmx->host_state.fs_sel & 7)) {
-		vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
-		vmx->host_state.fs_reload_needed = 0;
-	} else {
-		vmcs_write16(HOST_FS_SELECTOR, 0);
-		vmx->host_state.fs_reload_needed = 1;
-	}
-	if (!(vmx->host_state.gs_sel & 7))
-		vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
-	else {
-		vmcs_write16(HOST_GS_SELECTOR, 0);
-		vmx->host_state.gs_ldt_reload_needed = 1;
+		vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
 	}
 
-#ifdef CONFIG_X86_64
-	savesegment(ds, vmx->host_state.ds_sel);
-	savesegment(es, vmx->host_state.es_sel);
-
-	vmcs_writel(HOST_FS_BASE, fs_base);
-	vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
-
-	vmx->msr_host_kernel_gs_base = kernel_gs_base;
 	if (is_long_mode(&vmx->vcpu))
 		wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
 #else
-	vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
-	vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
+	savesegment(fs, fs_sel);
+	savesegment(gs, gs_sel);
+	fs_base = segment_base(fs_sel);
+	gs_base = segment_base(gs_sel);
 #endif
-	if (boot_cpu_has(X86_FEATURE_MPX))
-		rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
+
+	if (unlikely(fs_sel != host_state->fs_sel)) {
+		if (!(fs_sel & 7))
+			vmcs_write16(HOST_FS_SELECTOR, fs_sel);
+		else
+			vmcs_write16(HOST_FS_SELECTOR, 0);
+		host_state->fs_sel = fs_sel;
+	}
+	if (unlikely(gs_sel != host_state->gs_sel)) {
+		if (!(gs_sel & 7))
+			vmcs_write16(HOST_GS_SELECTOR, gs_sel);
+		else
+			vmcs_write16(HOST_GS_SELECTOR, 0);
+		host_state->gs_sel = gs_sel;
+	}
+	if (unlikely(fs_base != host_state->fs_base)) {
+		vmcs_writel(HOST_FS_BASE, fs_base);
+		host_state->fs_base = fs_base;
+	}
+	if (unlikely(gs_base != host_state->gs_base)) {
+		vmcs_writel(HOST_GS_BASE, gs_base);
+		host_state->gs_base = gs_base;
+	}
+
 	for (i = 0; i < vmx->save_nmsrs; ++i)
 		kvm_set_shared_msr(vmx->guest_msrs[i].index,
 				   vmx->guest_msrs[i].data,
 				   vmx->guest_msrs[i].mask);
 }
 
-static void __vmx_load_host_state(struct vcpu_vmx *vmx)
+static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
 {
-	if (!vmx->host_state.loaded)
+	struct vmcs_host_state *host_state;
+
+	if (!vmx->loaded_cpu_state)
 		return;
 
+	WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
+	host_state = &vmx->loaded_cpu_state->host_state;
+
 	++vmx->vcpu.stat.host_state_reload;
-	vmx->host_state.loaded = 0;
+	vmx->loaded_cpu_state = NULL;
+
 #ifdef CONFIG_X86_64
 	if (is_long_mode(&vmx->vcpu))
 		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
 #endif
-	if (vmx->host_state.gs_ldt_reload_needed) {
-		kvm_load_ldt(vmx->host_state.ldt_sel);
+	if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
+		kvm_load_ldt(host_state->ldt_sel);
 #ifdef CONFIG_X86_64
-		load_gs_index(vmx->host_state.gs_sel);
+		load_gs_index(host_state->gs_sel);
 #else
-		loadsegment(gs, vmx->host_state.gs_sel);
+		loadsegment(gs, host_state->gs_sel);
 #endif
 	}
-	if (vmx->host_state.fs_reload_needed)
-		loadsegment(fs, vmx->host_state.fs_sel);
+	if (host_state->fs_sel & 7)
+		loadsegment(fs, host_state->fs_sel);
 #ifdef CONFIG_X86_64
-	if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) {
-		loadsegment(ds, vmx->host_state.ds_sel);
-		loadsegment(es, vmx->host_state.es_sel);
+	if (unlikely(host_state->ds_sel | host_state->es_sel)) {
+		loadsegment(ds, host_state->ds_sel);
+		loadsegment(es, host_state->es_sel);
 	}
 #endif
 	invalidate_tss_limit();
 #ifdef CONFIG_X86_64
 	wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
 #endif
-	if (vmx->host_state.msr_host_bndcfgs)
-		wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
 	load_fixmap_gdt(raw_smp_processor_id());
 }
 
-static void vmx_load_host_state(struct vcpu_vmx *vmx)
+#ifdef CONFIG_X86_64
+static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
 {
-	preempt_disable();
-	__vmx_load_host_state(vmx);
-	preempt_enable();
+	if (is_long_mode(&vmx->vcpu)) {
+		preempt_disable();
+		if (vmx->loaded_cpu_state)
+			rdmsrl(MSR_KERNEL_GS_BASE,
+			       vmx->msr_guest_kernel_gs_base);
+		preempt_enable();
+	}
+	return vmx->msr_guest_kernel_gs_base;
 }
 
+static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
+{
+	if (is_long_mode(&vmx->vcpu)) {
+		preempt_disable();
+		if (vmx->loaded_cpu_state)
+			wrmsrl(MSR_KERNEL_GS_BASE, data);
+		preempt_enable();
+	}
+	vmx->msr_guest_kernel_gs_base = data;
+}
+#endif
+
 static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -2991,7 +3138,7 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	vmx_vcpu_pi_put(vcpu);
 
-	__vmx_load_host_state(to_vmx(vcpu));
+	vmx_prepare_switch_to_host(to_vmx(vcpu));
 }
 
 static bool emulation_required(struct kvm_vcpu *vcpu)
@@ -3212,7 +3359,7 @@ static bool vmx_rdtscp_supported(void)
 
 static bool vmx_invpcid_supported(void)
 {
-	return cpu_has_vmx_invpcid() && enable_ept;
+	return cpu_has_vmx_invpcid();
 }
 
 /*
@@ -3455,6 +3602,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
 		SECONDARY_EXEC_APIC_REGISTER_VIRT |
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 		SECONDARY_EXEC_WBINVD_EXITING;
+	/*
+	 * We can emulate "VMCS shadowing," even if the hardware
+	 * doesn't support it.
+	 */
+	msrs->secondary_ctls_high |=
+		SECONDARY_EXEC_SHADOW_VMCS;
 
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
@@ -3922,8 +4075,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = vmcs_readl(GUEST_GS_BASE);
 		break;
 	case MSR_KERNEL_GS_BASE:
-		vmx_load_host_state(vmx);
-		msr_info->data = vmx->msr_guest_kernel_gs_base;
+		msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
 		break;
 #endif
 	case MSR_EFER:
@@ -4023,8 +4175,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vmcs_writel(GUEST_GS_BASE, data);
 		break;
 	case MSR_KERNEL_GS_BASE:
-		vmx_load_host_state(vmx);
-		vmx->msr_guest_kernel_gs_base = data;
+		vmx_write_guest_kernel_gs_base(vmx, data);
 		break;
 #endif
 	case MSR_IA32_SYSENTER_CS:
@@ -4412,7 +4563,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_RDRAND_EXITING |
 			SECONDARY_EXEC_ENABLE_PML |
 			SECONDARY_EXEC_TSC_SCALING |
-			SECONDARY_EXEC_ENABLE_VMFUNC;
+			SECONDARY_EXEC_ENABLE_VMFUNC |
+			SECONDARY_EXEC_ENCLS_EXITING;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -4559,7 +4711,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	return 0;
 }
 
-static struct vmcs *alloc_vmcs_cpu(int cpu)
+static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
 {
 	int node = cpu_to_node(cpu);
 	struct page *pages;
@@ -4573,10 +4725,12 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
 
 	/* KVM supports Enlightened VMCS v1 only */
 	if (static_branch_unlikely(&enable_evmcs))
-		vmcs->revision_id = KVM_EVMCS_VERSION;
+		vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
 	else
-		vmcs->revision_id = vmcs_config.revision_id;
+		vmcs->hdr.revision_id = vmcs_config.revision_id;
 
+	if (shadow)
+		vmcs->hdr.shadow_vmcs = 1;
 	return vmcs;
 }
 
@@ -4600,14 +4754,14 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 	WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
 }
 
-static struct vmcs *alloc_vmcs(void)
+static struct vmcs *alloc_vmcs(bool shadow)
 {
-	return alloc_vmcs_cpu(raw_smp_processor_id());
+	return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
 }
 
 static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 {
-	loaded_vmcs->vmcs = alloc_vmcs();
+	loaded_vmcs->vmcs = alloc_vmcs(false);
 	if (!loaded_vmcs->vmcs)
 		return -ENOMEM;
 
@@ -4629,6 +4783,9 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
 			evmcs->hv_enlightenments_control.msr_bitmap = 1;
 		}
 	}
+
+	memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
+
 	return 0;
 
 out_vmcs:
@@ -4738,7 +4895,7 @@ static __init int alloc_kvm_area(void)
 	for_each_possible_cpu(cpu) {
 		struct vmcs *vmcs;
 
-		vmcs = alloc_vmcs_cpu(cpu);
+		vmcs = alloc_vmcs_cpu(false, cpu);
 		if (!vmcs) {
 			free_kvm_area();
 			return -ENOMEM;
@@ -4755,7 +4912,7 @@ static __init int alloc_kvm_area(void)
 		 * physical CPU.
 		 */
 		if (static_branch_unlikely(&enable_evmcs))
-			vmcs->revision_id = vmcs_config.revision_id;
+			vmcs->hdr.revision_id = vmcs_config.revision_id;
 
 		per_cpu(vmxarea, cpu) = vmcs;
 	}
@@ -4912,10 +5069,18 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 		return;
 
 	/*
-	 * Force kernel_gs_base reloading before EFER changes, as control
-	 * of this msr depends on is_long_mode().
+	 * MSR_KERNEL_GS_BASE is not intercepted when the guest is in
+	 * 64-bit mode as a 64-bit kernel may frequently access the
+	 * MSR.  This means we need to manually save/restore the MSR
+	 * when switching between guest and host state, but only if
+	 * the guest is in 64-bit mode.  Sync our cached value if the
+	 * guest is transitioning to 32-bit mode and the CPU contains
+	 * guest state, i.e. the cache is stale.
 	 */
-	vmx_load_host_state(to_vmx(vcpu));
+#ifdef CONFIG_X86_64
+	if (!(efer & EFER_LMA))
+		(void)vmx_read_guest_kernel_gs_base(vmx);
+#endif
 	vcpu->arch.efer = efer;
 	if (efer & EFER_LMA) {
 		vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
@@ -4972,6 +5137,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
 	__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
 }
 
+static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+{
+	int vpid = to_vmx(vcpu)->vpid;
+
+	if (!vpid_sync_vcpu_addr(vpid, addr))
+		vpid_sync_context(vpid);
+
+	/*
+	 * If VPIDs are not supported or enabled, then the above is a no-op.
+	 * But we don't really need a TLB flush in that case anyway, because
+	 * each VM entry/exit includes an implicit flush when VPID is 0.
+	 */
+}
+
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
 	ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -5153,6 +5332,7 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
 
 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+	struct kvm *kvm = vcpu->kvm;
 	unsigned long guest_cr3;
 	u64 eptp;
 
@@ -5160,15 +5340,23 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 	if (enable_ept) {
 		eptp = construct_eptp(vcpu, cr3);
 		vmcs_write64(EPT_POINTER, eptp);
+
+		if (kvm_x86_ops->tlb_remote_flush) {
+			spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+			to_vmx(vcpu)->ept_pointer = eptp;
+			to_kvm_vmx(kvm)->ept_pointers_match
+				= EPT_POINTERS_CHECK;
+			spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+		}
+
 		if (enable_unrestricted_guest || is_paging(vcpu) ||
 		    is_guest_mode(vcpu))
 			guest_cr3 = kvm_read_cr3(vcpu);
 		else
-			guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr;
+			guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
 		ept_load_pdptrs(vcpu);
 	}
 
-	vmx_flush_tlb(vcpu, true);
 	vmcs_writel(GUEST_CR3, guest_cr3);
 }
 
@@ -6104,19 +6292,19 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 	 */
 	cr3 = __read_cr3();
 	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
-	vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+	vmx->loaded_vmcs->host_state.cr3 = cr3;
 
 	/* Save the most likely value for this task's CR4 in the VMCS. */
 	cr4 = cr4_read_shadow();
 	vmcs_writel(HOST_CR4, cr4);			/* 22.2.3, 22.2.5 */
-	vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+	vmx->loaded_vmcs->host_state.cr4 = cr4;
 
 	vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
 #ifdef CONFIG_X86_64
 	/*
 	 * Load null selectors, so we can avoid reloading them in
-	 * __vmx_load_host_state(), in case userspace uses the null selectors
-	 * too (the expected case).
+	 * vmx_prepare_switch_to_host(), in case userspace uses
+	 * the null selectors too (the expected case).
 	 */
 	vmcs_write16(HOST_DS_SELECTOR, 0);
 	vmcs_write16(HOST_ES_SELECTOR, 0);
@@ -6241,8 +6429,6 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 	if (!enable_ept) {
 		exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
 		enable_unrestricted_guest = 0;
-		/* Enable INVPCID for non-ept guests may cause performance regression. */
-		exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
 	}
 	if (!enable_unrestricted_guest)
 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
@@ -6371,9 +6557,6 @@ static void ept_set_mmio_spte_mask(void)
  */
 static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 {
-#ifdef CONFIG_X86_64
-	unsigned long a;
-#endif
 	int i;
 
 	if (enable_shadow_vmcs) {
@@ -6428,15 +6611,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
 	vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
 	vmx_set_constant_host_state(vmx);
-#ifdef CONFIG_X86_64
-	rdmsrl(MSR_FS_BASE, a);
-	vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
-	rdmsrl(MSR_GS_BASE, a);
-	vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */
-#else
 	vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
 	vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
-#endif
 
 	if (cpu_has_vmx_vmfunc())
 		vmcs_write64(VM_FUNCTION_CONTROL, 0);
@@ -6485,6 +6661,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
 		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
 	}
+
+	if (cpu_has_vmx_encls_vmexit())
+		vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
 }
 
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -7670,6 +7849,7 @@ static void vmx_enable_tdp(void)
 
 static __init int hardware_setup(void)
 {
+	unsigned long host_bndcfgs;
 	int r = -ENOMEM, i;
 
 	rdmsrl_safe(MSR_EFER, &host_efer);
@@ -7694,6 +7874,11 @@ static __init int hardware_setup(void)
 	if (boot_cpu_has(X86_FEATURE_NX))
 		kvm_enable_efer_bits(EFER_NX);
 
+	if (boot_cpu_has(X86_FEATURE_MPX)) {
+		rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
+		WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
+	}
+
 	if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
 		!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
 		enable_vpid = 0;
@@ -7730,6 +7915,12 @@ static __init int hardware_setup(void)
 	if (enable_ept && !cpu_has_vmx_ept_2m_page())
 		kvm_disable_largepages();
 
+#if IS_ENABLED(CONFIG_HYPERV)
+	if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
+	    && enable_ept)
+		kvm_x86_ops->tlb_remote_flush = vmx_hv_remote_flush_tlb;
+#endif
+
 	if (!cpu_has_vmx_ple()) {
 		ple_gap = 0;
 		ple_window = 0;
@@ -7756,6 +7947,11 @@ static __init int hardware_setup(void)
 	else
 		kvm_disable_tdp();
 
+	if (!nested) {
+		kvm_x86_ops->get_nested_state = NULL;
+		kvm_x86_ops->set_nested_state = NULL;
+	}
+
 	/*
 	 * Only enable PML when hardware supports PML feature, and both EPT
 	 * and EPT A/D bit features are enabled -- PML depends on them to work.
@@ -8032,10 +8228,35 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
 	return 0;
 }
 
+/*
+ * Allocate a shadow VMCS and associate it with the currently loaded
+ * VMCS, unless such a shadow VMCS already exists. The newly allocated
+ * VMCS is also VMCLEARed, so that it is ready for use.
+ */
+static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
+
+	/*
+	 * We should allocate a shadow vmcs for vmcs01 only when L1
+	 * executes VMXON and free it when L1 executes VMXOFF.
+	 * As it is invalid to execute VMXON twice, we shouldn't reach
+	 * here when vmcs01 already have an allocated shadow vmcs.
+	 */
+	WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
+
+	if (!loaded_vmcs->shadow_vmcs) {
+		loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
+		if (loaded_vmcs->shadow_vmcs)
+			vmcs_clear(loaded_vmcs->shadow_vmcs);
+	}
+	return loaded_vmcs->shadow_vmcs;
+}
+
 static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct vmcs *shadow_vmcs;
 	int r;
 
 	r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
@@ -8046,16 +8267,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 	if (!vmx->nested.cached_vmcs12)
 		goto out_cached_vmcs12;
 
-	if (enable_shadow_vmcs) {
-		shadow_vmcs = alloc_vmcs();
-		if (!shadow_vmcs)
-			goto out_shadow_vmcs;
-		/* mark vmcs as shadow */
-		shadow_vmcs->revision_id |= (1u << 31);
-		/* init shadow vmcs */
-		vmcs_clear(shadow_vmcs);
-		vmx->vmcs01.shadow_vmcs = shadow_vmcs;
-	}
+	vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+	if (!vmx->nested.cached_shadow_vmcs12)
+		goto out_cached_shadow_vmcs12;
+
+	if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
+		goto out_shadow_vmcs;
 
 	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
 		     HRTIMER_MODE_REL_PINNED);
@@ -8067,6 +8284,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
 	return 0;
 
 out_shadow_vmcs:
+	kfree(vmx->nested.cached_shadow_vmcs12);
+
+out_cached_shadow_vmcs12:
 	kfree(vmx->nested.cached_vmcs12);
 
 out_cached_vmcs12:
@@ -8109,7 +8329,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 
 	/* CPL=0 must be checked manually. */
 	if (vmx_get_cpl(vcpu)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
+		kvm_inject_gp(vcpu, 0);
 		return 1;
 	}
 
@@ -8172,15 +8392,16 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
  */
 static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
 {
-	if (vmx_get_cpl(vcpu)) {
+	if (!to_vmx(vcpu)->nested.vmxon) {
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 0;
 	}
 
-	if (!to_vmx(vcpu)->nested.vmxon) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
+	if (vmx_get_cpl(vcpu)) {
+		kvm_inject_gp(vcpu, 0);
 		return 0;
 	}
+
 	return 1;
 }
 
@@ -8233,6 +8454,7 @@ static void free_nested(struct vcpu_vmx *vmx)
 		vmx->vmcs01.shadow_vmcs = NULL;
 	}
 	kfree(vmx->nested.cached_vmcs12);
+	kfree(vmx->nested.cached_shadow_vmcs12);
 	/* Unpin physical memory we referred to in the vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		kvm_release_page_dirty(vmx->nested.apic_access_page);
@@ -8318,7 +8540,7 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
  * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
  * 64-bit fields are to be returned).
  */
-static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
+static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
 				  unsigned long field, u64 *ret)
 {
 	short offset = vmcs_field_to_offset(field);
@@ -8327,7 +8549,7 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
 	if (offset < 0)
 		return offset;
 
-	p = ((char *)(get_vmcs12(vcpu))) + offset;
+	p = (char *)vmcs12 + offset;
 
 	switch (vmcs_field_width(field)) {
 	case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
@@ -8349,10 +8571,10 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
 }
 
 
-static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
+static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
 				   unsigned long field, u64 field_value){
 	short offset = vmcs_field_to_offset(field);
-	char *p = ((char *) get_vmcs12(vcpu)) + offset;
+	char *p = (char *)vmcs12 + offset;
 	if (offset < 0)
 		return offset;
 
@@ -8405,7 +8627,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 		for (i = 0; i < max_fields[q]; i++) {
 			field = fields[q][i];
 			field_value = __vmcs_readl(field);
-			vmcs12_write_any(&vmx->vcpu, field, field_value);
+			vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value);
 		}
 		/*
 		 * Skip the VM-exit information fields if they are read-only.
@@ -8440,7 +8662,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 	for (q = 0; q < ARRAY_SIZE(fields); q++) {
 		for (i = 0; i < max_fields[q]; i++) {
 			field = fields[q][i];
-			vmcs12_read_any(&vmx->vcpu, field, &field_value);
+			vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
 			__vmcs_writel(field, field_value);
 		}
 	}
@@ -8470,6 +8692,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 	gva_t gva = 0;
+	struct vmcs12 *vmcs12;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
@@ -8477,10 +8700,24 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_vmcs12(vcpu))
 		return kvm_skip_emulated_instruction(vcpu);
 
+	if (!is_guest_mode(vcpu))
+		vmcs12 = get_vmcs12(vcpu);
+	else {
+		/*
+		 * When vmcs->vmcs_link_pointer is -1ull, any VMREAD
+		 * to shadowed-field sets the ALU flags for VMfailInvalid.
+		 */
+		if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
+			nested_vmx_failInvalid(vcpu);
+			return kvm_skip_emulated_instruction(vcpu);
+		}
+		vmcs12 = get_shadow_vmcs12(vcpu);
+	}
+
 	/* Decode instruction info and find the field to read */
 	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
 	/* Read the field, zero-extended to a u64 field_value */
-	if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
+	if (vmcs12_read_any(vmcs12, field, &field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		return kvm_skip_emulated_instruction(vcpu);
 	}
@@ -8522,6 +8759,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 	 */
 	u64 field_value = 0;
 	struct x86_exception e;
+	struct vmcs12 *vmcs12;
 
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
@@ -8556,23 +8794,44 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 
-	if (vmcs12_write_any(vcpu, field, field_value) < 0) {
+	if (!is_guest_mode(vcpu))
+		vmcs12 = get_vmcs12(vcpu);
+	else {
+		/*
+		 * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
+		 * to shadowed-field sets the ALU flags for VMfailInvalid.
+		 */
+		if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
+			nested_vmx_failInvalid(vcpu);
+			return kvm_skip_emulated_instruction(vcpu);
+		}
+		vmcs12 = get_shadow_vmcs12(vcpu);
+
+	}
+
+	if (vmcs12_write_any(vmcs12, field, field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 
-	switch (field) {
+	/*
+	 * Do not track vmcs12 dirty-state if in guest-mode
+	 * as we actually dirty shadow vmcs12 instead of vmcs12.
+	 */
+	if (!is_guest_mode(vcpu)) {
+		switch (field) {
 #define SHADOW_FIELD_RW(x) case x:
 #include "vmx_shadow_fields.h"
-		/*
-		 * The fields that can be updated by L1 without a vmexit are
-		 * always updated in the vmcs02, the others go down the slow
-		 * path of prepare_vmcs02.
-		 */
-		break;
-	default:
-		vmx->nested.dirty_vmcs12 = true;
-		break;
+			/*
+			 * The fields that can be updated by L1 without a vmexit are
+			 * always updated in the vmcs02, the others go down the slow
+			 * path of prepare_vmcs02.
+			 */
+			break;
+		default:
+			vmx->nested.dirty_vmcs12 = true;
+			break;
+		}
 	}
 
 	nested_vmx_succeed(vcpu);
@@ -8623,7 +8882,9 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 			return kvm_skip_emulated_instruction(vcpu);
 		}
 		new_vmcs12 = kmap(page);
-		if (new_vmcs12->revision_id != VMCS12_REVISION) {
+		if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
+		    (new_vmcs12->hdr.shadow_vmcs &&
+		     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
 			kunmap(page);
 			kvm_release_page_clean(page);
 			nested_vmx_failValid(vcpu,
@@ -8821,6 +9082,105 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	return kvm_skip_emulated_instruction(vcpu);
 }
 
+static int handle_invpcid(struct kvm_vcpu *vcpu)
+{
+	u32 vmx_instruction_info;
+	unsigned long type;
+	bool pcid_enabled;
+	gva_t gva;
+	struct x86_exception e;
+	unsigned i;
+	unsigned long roots_to_free = 0;
+	struct {
+		u64 pcid;
+		u64 gla;
+	} operand;
+
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+	if (type > 3) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	/* According to the Intel instruction reference, the memory operand
+	 * is read even if it isn't needed (e.g., for type==all)
+	 */
+	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+				vmx_instruction_info, false, &gva))
+		return 1;
+
+	if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
+		kvm_inject_page_fault(vcpu, &e);
+		return 1;
+	}
+
+	if (operand.pcid >> 12 != 0) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
+	pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+
+	switch (type) {
+	case INVPCID_TYPE_INDIV_ADDR:
+		if ((!pcid_enabled && (operand.pcid != 0)) ||
+		    is_noncanonical_address(operand.gla, vcpu)) {
+			kvm_inject_gp(vcpu, 0);
+			return 1;
+		}
+		kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
+		return kvm_skip_emulated_instruction(vcpu);
+
+	case INVPCID_TYPE_SINGLE_CTXT:
+		if (!pcid_enabled && (operand.pcid != 0)) {
+			kvm_inject_gp(vcpu, 0);
+			return 1;
+		}
+
+		if (kvm_get_active_pcid(vcpu) == operand.pcid) {
+			kvm_mmu_sync_roots(vcpu);
+			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+		}
+
+		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+			if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
+			    == operand.pcid)
+				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+
+		kvm_mmu_free_roots(vcpu, roots_to_free);
+		/*
+		 * If neither the current cr3 nor any of the prev_roots use the
+		 * given PCID, then nothing needs to be done here because a
+		 * resync will happen anyway before switching to any other CR3.
+		 */
+
+		return kvm_skip_emulated_instruction(vcpu);
+
+	case INVPCID_TYPE_ALL_NON_GLOBAL:
+		/*
+		 * Currently, KVM doesn't mark global entries in the shadow
+		 * page tables, so a non-global flush just degenerates to a
+		 * global flush. If needed, we could optimize this later by
+		 * keeping track of global entries in shadow page tables.
+		 */
+
+		/* fall-through */
+	case INVPCID_TYPE_ALL_INCL_GLOBAL:
+		kvm_mmu_unload(vcpu);
+		return kvm_skip_emulated_instruction(vcpu);
+
+	default:
+		BUG(); /* We have already checked above that type <= 3 */
+	}
+}
+
 static int handle_pml_full(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
@@ -8970,6 +9330,17 @@ fail:
 	return 1;
 }
 
+static int handle_encls(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * SGX virtualization is not yet supported.  There is no software
+	 * enable bit for SGX, so we have to trap ENCLS and inject a #UD
+	 * to prevent the guest from executing ENCLS.
+	 */
+	kvm_queue_exception(vcpu, UD_VECTOR);
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -9024,8 +9395,10 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_XSAVES]                  = handle_xsaves,
 	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 	[EXIT_REASON_PML_FULL]		      = handle_pml_full,
+	[EXIT_REASON_INVPCID]                 = handle_invpcid,
 	[EXIT_REASON_VMFUNC]                  = handle_vmfunc,
 	[EXIT_REASON_PREEMPTION_TIMER]	      = handle_preemption_timer,
+	[EXIT_REASON_ENCLS]		      = handle_encls,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -9196,6 +9569,30 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
 	return false;
 }
 
+static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
+	struct vmcs12 *vmcs12, gpa_t bitmap)
+{
+	u32 vmx_instruction_info;
+	unsigned long field;
+	u8 b;
+
+	if (!nested_cpu_has_shadow_vmcs(vmcs12))
+		return true;
+
+	/* Decode instruction info and find the field to access */
+	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+	field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
+
+	/* Out-of-range fields always cause a VM exit from L2 to L1 */
+	if (field >> 15)
+		return true;
+
+	if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
+		return true;
+
+	return 1 & (b >> (field & 7));
+}
+
 /*
  * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
  * should handle it ourselves in L0 (and then continue L2). Only call this
@@ -9280,10 +9677,15 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
 	case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
 		return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
+	case EXIT_REASON_VMREAD:
+		return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
+			vmcs12->vmread_bitmap);
+	case EXIT_REASON_VMWRITE:
+		return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
+			vmcs12->vmwrite_bitmap);
 	case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
 	case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
-	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
-	case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
+	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
 	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
 	case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
 		/*
@@ -9367,6 +9769,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
 	case EXIT_REASON_VMFUNC:
 		/* VM functions are emulated through L2->L0 vmexits. */
 		return false;
+	case EXIT_REASON_ENCLS:
+		/* SGX is never exposed to L1 */
+		return false;
 	default:
 		return true;
 	}
@@ -10244,15 +10649,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
 	cr3 = __get_current_cr3_fast();
-	if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
+	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
 		vmcs_writel(HOST_CR3, cr3);
-		vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+		vmx->loaded_vmcs->host_state.cr3 = cr3;
 	}
 
 	cr4 = cr4_read_shadow();
-	if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
+	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
 		vmcs_writel(HOST_CR4, cr4);
-		vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+		vmx->loaded_vmcs->host_state.cr4 = cr4;
 	}
 
 	/* When single-stepping over STI and MOV SS, we must clear the
@@ -10448,9 +10853,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	 * The sysexit path does not restore ds/es, so we must set them to
 	 * a reasonable value ourselves.
 	 *
-	 * We can't defer this to vmx_load_host_state() since that function
-	 * may be executed in interrupt context, which saves and restore segments
-	 * around it, nullifying its effect.
+	 * We can't defer this to vmx_prepare_switch_to_host() since that
+	 * function may be executed in interrupt context, which saves and
+	 * restore segments around it, nullifying its effect.
 	 */
 	loadsegment(ds, __USER_DS);
 	loadsegment(es, __USER_DS);
@@ -10511,8 +10916,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 		return;
 
 	cpu = get_cpu();
-	vmx->loaded_vmcs = vmcs;
 	vmx_vcpu_put(vcpu);
+	vmx->loaded_vmcs = vmcs;
 	vmx_vcpu_load(vcpu, cpu);
 	put_cpu();
 }
@@ -10652,6 +11057,8 @@ free_vcpu:
 
 static int vmx_vm_init(struct kvm *kvm)
 {
+	spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
+
 	if (!ple_gap)
 		kvm->arch.pause_in_guest = true;
 
@@ -10876,11 +11283,11 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 	if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
 		return 1;
 
-	kvm_mmu_unload(vcpu);
 	kvm_init_shadow_ept_mmu(vcpu,
 			to_vmx(vcpu)->nested.msrs.ept_caps &
 			VMX_EPT_EXECUTE_ONLY_BIT,
-			nested_ept_ad_enabled(vcpu));
+			nested_ept_ad_enabled(vcpu),
+			nested_ept_get_cr3(vcpu));
 	vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
 	vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
 	vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
@@ -10928,9 +11335,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 						 struct vmcs12 *vmcs12);
 
-static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
-					struct vmcs12 *vmcs12)
+static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 {
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct page *page;
 	u64 hpa;
@@ -11171,6 +11578,38 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
+				       struct vmcs12 *vmcs12)
+{
+	struct vmcs12 *shadow;
+	struct page *page;
+
+	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
+	    vmcs12->vmcs_link_pointer == -1ull)
+		return;
+
+	shadow = get_shadow_vmcs12(vcpu);
+	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
+
+	memcpy(shadow, kmap(page), VMCS12_SIZE);
+
+	kunmap(page);
+	kvm_release_page_clean(page);
+}
+
+static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
+					      struct vmcs12 *vmcs12)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
+	    vmcs12->vmcs_link_pointer == -1ull)
+		return;
+
+	kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
+			get_shadow_vmcs12(vcpu), VMCS12_SIZE);
+}
+
 static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
 					  struct vmcs12 *vmcs12)
 {
@@ -11228,11 +11667,12 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
 				       unsigned long count_field,
 				       unsigned long addr_field)
 {
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	int maxphyaddr;
 	u64 count, addr;
 
-	if (vmcs12_read_any(vcpu, count_field, &count) ||
-	    vmcs12_read_any(vcpu, addr_field, &addr)) {
+	if (vmcs12_read_any(vmcs12, count_field, &count) ||
+	    vmcs12_read_any(vmcs12, addr_field, &addr)) {
 		WARN_ON(1);
 		return -EINVAL;
 	}
@@ -11282,6 +11722,19 @@ static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
+						 struct vmcs12 *vmcs12)
+{
+	if (!nested_cpu_has_shadow_vmcs(vmcs12))
+		return 0;
+
+	if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
+	    !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
 				       struct vmx_msr_entry *e)
 {
@@ -11431,12 +11884,16 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 				return 1;
 			}
 		}
-
-		vcpu->arch.cr3 = cr3;
-		__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
 	}
 
-	kvm_mmu_reset_context(vcpu);
+	if (!nested_ept)
+		kvm_mmu_new_cr3(vcpu, cr3, false);
+
+	vcpu->arch.cr3 = cr3;
+	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+
+	kvm_init_mmu(vcpu, false);
+
 	return 0;
 }
 
@@ -11523,7 +11980,8 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 * Set host-state according to L0's settings (vmcs12 is irrelevant here)
 	 * Some constant fields are set here by vmx_set_constant_host_state().
 	 * Other fields are different per CPU, and will be set later when
-	 * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
+	 * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest()
+	 * is called.
 	 */
 	vmx_set_constant_host_state(vmx);
 
@@ -11595,11 +12053,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
 	vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
 
-	/*
-	 * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR,
-	 * HOST_FS_BASE, HOST_GS_BASE.
-	 */
-
 	if (vmx->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
 		kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
@@ -11664,6 +12117,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 			exec_control |= vmcs12_exec_ctrl;
 		}
 
+		/* VMCS shadowing for L2 is emulated for now */
+		exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
+
 		if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
 			vmcs_write16(GUEST_INTR_STATUS,
 				vmcs12->guest_intr_status);
@@ -11676,6 +12132,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
 			vmcs_write64(APIC_ACCESS_ADDR, -1ull);
 
+		if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
+			vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
+
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
 	}
 
@@ -11883,6 +12342,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	if (nested_vmx_check_pml_controls(vcpu, vmcs12))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+	if (nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12))
+		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
 	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
 				vmx->nested.msrs.procbased_ctls_low,
 				vmx->nested.msrs.procbased_ctls_high) ||
@@ -11983,6 +12445,33 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	return 0;
 }
 
+static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
+					  struct vmcs12 *vmcs12)
+{
+	int r;
+	struct page *page;
+	struct vmcs12 *shadow;
+
+	if (vmcs12->vmcs_link_pointer == -1ull)
+		return 0;
+
+	if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
+		return -EINVAL;
+
+	page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
+	if (is_error_page(page))
+		return -EINVAL;
+
+	r = 0;
+	shadow = kmap(page);
+	if (shadow->hdr.revision_id != VMCS12_REVISION ||
+	    shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
+		r = -EINVAL;
+	kunmap(page);
+	kvm_release_page_clean(page);
+	return r;
+}
+
 static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 				  u32 *exit_qual)
 {
@@ -11994,8 +12483,7 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	    !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
 		return 1;
 
-	if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS) &&
-	    vmcs12->vmcs_link_pointer != -1ull) {
+	if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
 		*exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
 		return 1;
 	}
@@ -12042,12 +12530,17 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	return 0;
 }
 
-static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
+/*
+ * If exit_qual is NULL, this is being called from state restore (either RSM
+ * or KVM_SET_NESTED_STATE).  Otherwise it's called from vmlaunch/vmresume.
+ */
+static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-	u32 exit_qual;
-	int r;
+	bool from_vmentry = !!exit_qual;
+	u32 dummy_exit_qual;
+	int r = 0;
 
 	enter_guest_mode(vcpu);
 
@@ -12061,17 +12554,28 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
 		vcpu->arch.tsc_offset += vmcs12->tsc_offset;
 
 	r = EXIT_REASON_INVALID_STATE;
-	if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
+	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual))
 		goto fail;
 
-	nested_get_vmcs12_pages(vcpu, vmcs12);
+	if (from_vmentry) {
+		nested_get_vmcs12_pages(vcpu);
 
-	r = EXIT_REASON_MSR_LOAD_FAIL;
-	exit_qual = nested_vmx_load_msr(vcpu,
-					vmcs12->vm_entry_msr_load_addr,
-					vmcs12->vm_entry_msr_load_count);
-	if (exit_qual)
-		goto fail;
+		r = EXIT_REASON_MSR_LOAD_FAIL;
+		*exit_qual = nested_vmx_load_msr(vcpu,
+	     					 vmcs12->vm_entry_msr_load_addr,
+					      	 vmcs12->vm_entry_msr_load_count);
+		if (*exit_qual)
+			goto fail;
+	} else {
+		/*
+		 * The MMU is not initialized to point at the right entities yet and
+		 * "get pages" would need to read data from the guest (i.e. we will
+		 * need to perform gpa to hpa translation). Request a call
+		 * to nested_get_vmcs12_pages before the next VM-entry.  The MSRs
+		 * have already been set at vmentry time and should not be reset.
+		 */
+		kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
+	}
 
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -12086,8 +12590,7 @@ fail:
 		vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
 	leave_guest_mode(vcpu);
 	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
-	nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual);
-	return 1;
+	return r;
 }
 
 /*
@@ -12110,6 +12613,17 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	vmcs12 = get_vmcs12(vcpu);
 
+	/*
+	 * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
+	 * that there *is* a valid VMCS pointer, RFLAGS.CF is set
+	 * rather than RFLAGS.ZF, and no error number is stored to the
+	 * VM-instruction error field.
+	 */
+	if (vmcs12->hdr.shadow_vmcs) {
+		nested_vmx_failInvalid(vcpu);
+		goto out;
+	}
+
 	if (enable_shadow_vmcs)
 		copy_shadow_to_vmcs12(vmx);
 
@@ -12164,16 +12678,29 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	 */
 
 	vmx->nested.nested_run_pending = 1;
-	ret = enter_vmx_non_root_mode(vcpu);
+	ret = enter_vmx_non_root_mode(vcpu, &exit_qual);
 	if (ret) {
+		nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual);
 		vmx->nested.nested_run_pending = 0;
-		return ret;
+		return 1;
 	}
 
 	/* Hide L1D cache contents from the nested guest.  */
 	vmx->vcpu.arch.l1tf_flush_l1d = true;
 
 	/*
+	 * Must happen outside of enter_vmx_non_root_mode() as it will
+	 * also be used as part of restoring nVMX state for
+	 * snapshot restore (migration).
+	 *
+	 * In this flow, it is assumed that vmcs12 cache was
+	 * trasferred as part of captured nVMX state and should
+	 * therefore not be read from guest memory (which may not
+	 * exist on destination host yet).
+	 */
+	nested_cache_shadow_vmcs12(vcpu, vmcs12);
+
+	/*
 	 * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
 	 * by event injection, halt vcpu.
 	 */
@@ -12682,6 +13209,17 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 			prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
 				       exit_qualification);
 
+		/*
+		 * Must happen outside of sync_vmcs12() as it will
+		 * also be used to capture vmcs12 cache as part of
+		 * capturing nVMX state for snapshot (migration).
+		 *
+		 * Otherwise, this flush will dirty guest memory at a
+		 * point it is already assumed by user-space to be
+		 * immutable.
+		 */
+		nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
+
 		if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
 					 vmcs12->vm_exit_msr_store_count))
 			nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
@@ -13256,7 +13794,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
 
 	if (vmx->nested.smm.guest_mode) {
 		vcpu->arch.hflags &= ~HF_SMM_MASK;
-		ret = enter_vmx_non_root_mode(vcpu);
+		ret = enter_vmx_non_root_mode(vcpu, NULL);
 		vcpu->arch.hflags |= HF_SMM_MASK;
 		if (ret)
 			return ret;
@@ -13271,6 +13809,199 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				u32 user_data_size)
+{
+	struct vcpu_vmx *vmx;
+	struct vmcs12 *vmcs12;
+	struct kvm_nested_state kvm_state = {
+		.flags = 0,
+		.format = 0,
+		.size = sizeof(kvm_state),
+		.vmx.vmxon_pa = -1ull,
+		.vmx.vmcs_pa = -1ull,
+	};
+
+	if (!vcpu)
+		return kvm_state.size + 2 * VMCS12_SIZE;
+
+	vmx = to_vmx(vcpu);
+	vmcs12 = get_vmcs12(vcpu);
+	if (nested_vmx_allowed(vcpu) &&
+	    (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
+		kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
+		kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
+
+		if (vmx->nested.current_vmptr != -1ull) {
+			kvm_state.size += VMCS12_SIZE;
+
+			if (is_guest_mode(vcpu) &&
+			    nested_cpu_has_shadow_vmcs(vmcs12) &&
+			    vmcs12->vmcs_link_pointer != -1ull)
+				kvm_state.size += VMCS12_SIZE;
+		}
+
+		if (vmx->nested.smm.vmxon)
+			kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
+
+		if (vmx->nested.smm.guest_mode)
+			kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
+
+		if (is_guest_mode(vcpu)) {
+			kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
+
+			if (vmx->nested.nested_run_pending)
+				kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+		}
+	}
+
+	if (user_data_size < kvm_state.size)
+		goto out;
+
+	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
+		return -EFAULT;
+
+	if (vmx->nested.current_vmptr == -1ull)
+		goto out;
+
+	/*
+	 * When running L2, the authoritative vmcs12 state is in the
+	 * vmcs02. When running L1, the authoritative vmcs12 state is
+	 * in the shadow vmcs linked to vmcs01, unless
+	 * sync_shadow_vmcs is set, in which case, the authoritative
+	 * vmcs12 state is in the vmcs12 already.
+	 */
+	if (is_guest_mode(vcpu))
+		sync_vmcs12(vcpu, vmcs12);
+	else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs)
+		copy_shadow_to_vmcs12(vmx);
+
+	if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
+		return -EFAULT;
+
+	if (nested_cpu_has_shadow_vmcs(vmcs12) &&
+	    vmcs12->vmcs_link_pointer != -1ull) {
+		if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
+				 get_shadow_vmcs12(vcpu), sizeof(*vmcs12)))
+			return -EFAULT;
+	}
+
+out:
+	return kvm_state.size;
+}
+
+static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				struct kvm_nested_state *kvm_state)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs12 *vmcs12;
+	u32 exit_qual;
+	int ret;
+
+	if (kvm_state->format != 0)
+		return -EINVAL;
+
+	if (!nested_vmx_allowed(vcpu))
+		return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
+
+	if (kvm_state->vmx.vmxon_pa == -1ull) {
+		if (kvm_state->vmx.smm.flags)
+			return -EINVAL;
+
+		if (kvm_state->vmx.vmcs_pa != -1ull)
+			return -EINVAL;
+
+		vmx_leave_nested(vcpu);
+		return 0;
+	}
+
+	if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
+		return -EINVAL;
+
+	if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+		return -EINVAL;
+
+	if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
+	    !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
+		return -EINVAL;
+
+	if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+	    (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+		return -EINVAL;
+
+	if (kvm_state->vmx.smm.flags &
+	    ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
+		return -EINVAL;
+
+	if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+	    !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
+		return -EINVAL;
+
+	vmx_leave_nested(vcpu);
+	if (kvm_state->vmx.vmxon_pa == -1ull)
+		return 0;
+
+	vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
+	ret = enter_vmx_operation(vcpu);
+	if (ret)
+		return ret;
+
+	set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
+
+	if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
+		vmx->nested.smm.vmxon = true;
+		vmx->nested.vmxon = false;
+
+		if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
+			vmx->nested.smm.guest_mode = true;
+	}
+
+	vmcs12 = get_vmcs12(vcpu);
+	if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12)))
+		return -EFAULT;
+
+	if (vmcs12->hdr.revision_id != VMCS12_REVISION)
+		return -EINVAL;
+
+	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+		return 0;
+
+	vmx->nested.nested_run_pending =
+		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
+	if (nested_cpu_has_shadow_vmcs(vmcs12) &&
+	    vmcs12->vmcs_link_pointer != -1ull) {
+		struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
+		if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
+			return -EINVAL;
+
+		if (copy_from_user(shadow_vmcs12,
+				   user_kvm_nested_state->data + VMCS12_SIZE,
+				   sizeof(*vmcs12)))
+			return -EFAULT;
+
+		if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
+		    !shadow_vmcs12->hdr.shadow_vmcs)
+			return -EINVAL;
+	}
+
+	if (check_vmentry_prereqs(vcpu, vmcs12) ||
+	    check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+		return -EINVAL;
+
+	if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
+		vmx->nested.nested_run_pending = 1;
+
+	vmx->nested.dirty_vmcs12 = true;
+	ret = enter_vmx_non_root_mode(vcpu, NULL);
+	if (ret)
+		return -EINVAL;
+
+	return 0;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -13290,7 +14021,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.vcpu_free = vmx_free_vcpu,
 	.vcpu_reset = vmx_vcpu_reset,
 
-	.prepare_guest_switch = vmx_save_host_state,
+	.prepare_guest_switch = vmx_prepare_switch_to_guest,
 	.vcpu_load = vmx_vcpu_load,
 	.vcpu_put = vmx_vcpu_put,
 
@@ -13323,6 +14054,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.set_rflags = vmx_set_rflags,
 
 	.tlb_flush = vmx_flush_tlb,
+	.tlb_flush_gva = vmx_flush_tlb_gva,
 
 	.run = vmx_vcpu_run,
 	.handle_exit = vmx_handle_exit,
@@ -13405,6 +14137,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 
 	.setup_mce = vmx_setup_mce,
 
+	.get_nested_state = vmx_get_nested_state,
+	.set_nested_state = vmx_set_nested_state,
+	.get_vmcs12_pages = nested_get_vmcs12_pages,
+
 	.smi_allowed = vmx_smi_allowed,
 	.pre_enter_smm = vmx_pre_enter_smm,
 	.pre_leave_smm = vmx_pre_leave_smm,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a5caa5e5480c..506bd2b4b8bb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -848,16 +848,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+	bool skip_tlb_flush = false;
 #ifdef CONFIG_X86_64
 	bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
 
-	if (pcid_enabled)
-		cr3 &= ~CR3_PCID_INVD;
+	if (pcid_enabled) {
+		skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
+		cr3 &= ~X86_CR3_PCID_NOFLUSH;
+	}
 #endif
 
 	if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
-		kvm_mmu_sync_roots(vcpu);
-		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+		if (!skip_tlb_flush) {
+			kvm_mmu_sync_roots(vcpu);
+			kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+		}
 		return 0;
 	}
 
@@ -868,9 +873,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
 		return 1;
 
+	kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
 	vcpu->arch.cr3 = cr3;
 	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-	kvm_mmu_new_cr3(vcpu);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2185,10 +2191,11 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu->arch.mcg_status = data;
 		break;
 	case MSR_IA32_MCG_CTL:
-		if (!(mcg_cap & MCG_CTL_P))
+		if (!(mcg_cap & MCG_CTL_P) &&
+		    (data || !msr_info->host_initiated))
 			return 1;
 		if (data != 0 && data != ~(u64)0)
-			return -1;
+			return 1;
 		vcpu->arch.mcg_ctl = data;
 		break;
 	default:
@@ -2576,7 +2583,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 }
 EXPORT_SYMBOL_GPL(kvm_get_msr);
 
-static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
 {
 	u64 data;
 	u64 mcg_cap = vcpu->arch.mcg_cap;
@@ -2591,7 +2598,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		data = vcpu->arch.mcg_cap;
 		break;
 	case MSR_IA32_MCG_CTL:
-		if (!(mcg_cap & MCG_CTL_P))
+		if (!(mcg_cap & MCG_CTL_P) && !host)
 			return 1;
 		data = vcpu->arch.mcg_ctl;
 		break;
@@ -2724,7 +2731,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_MCG_CTL:
 	case MSR_IA32_MCG_STATUS:
 	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
-		return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
+		return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
+				   msr_info->host_initiated);
 	case MSR_K7_CLK_CTL:
 		/*
 		 * Provide expected ramp-up count for K7. All other
@@ -2745,7 +2753,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case HV_X64_MSR_TSC_EMULATION_CONTROL:
 	case HV_X64_MSR_TSC_EMULATION_STATUS:
 		return kvm_hv_get_msr_common(vcpu,
-					     msr_info->index, &msr_info->data);
+					     msr_info->index, &msr_info->data,
+					     msr_info->host_initiated);
 		break;
 	case MSR_IA32_BBL_CR_CTL3:
 		/* This legacy MSR exists but isn't fully documented in current
@@ -2969,6 +2978,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_X2APIC_API:
 		r = KVM_X2APIC_API_VALID_FLAGS;
 		break;
+	case KVM_CAP_NESTED_STATE:
+		r = kvm_x86_ops->get_nested_state ?
+			kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
+		break;
 	default:
 		break;
 	}
@@ -3985,6 +3998,56 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
+	case KVM_GET_NESTED_STATE: {
+		struct kvm_nested_state __user *user_kvm_nested_state = argp;
+		u32 user_data_size;
+
+		r = -EINVAL;
+		if (!kvm_x86_ops->get_nested_state)
+			break;
+
+		BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
+		if (get_user(user_data_size, &user_kvm_nested_state->size))
+			return -EFAULT;
+
+		r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
+						  user_data_size);
+		if (r < 0)
+			return r;
+
+		if (r > user_data_size) {
+			if (put_user(r, &user_kvm_nested_state->size))
+				return -EFAULT;
+			return -E2BIG;
+		}
+		r = 0;
+		break;
+	}
+	case KVM_SET_NESTED_STATE: {
+		struct kvm_nested_state __user *user_kvm_nested_state = argp;
+		struct kvm_nested_state kvm_state;
+
+		r = -EINVAL;
+		if (!kvm_x86_ops->set_nested_state)
+			break;
+
+		if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
+			return -EFAULT;
+
+		if (kvm_state.size < sizeof(kvm_state))
+			return -EINVAL;
+
+		if (kvm_state.flags &
+		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
+			return -EINVAL;
+
+		/* nested_run_pending implies guest_mode.  */
+		if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
+			return -EINVAL;
+
+		r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -6503,20 +6566,22 @@ static void kvm_set_mmio_spte_mask(void)
 	 * Set the reserved bits and the present bit of an paging-structure
 	 * entry to generate page fault with PFER.RSV = 1.
 	 */
-	 /* Mask the reserved physical address bits. */
-	mask = rsvd_bits(maxphyaddr, 51);
+
+	/*
+	 * Mask the uppermost physical address bit, which would be reserved as
+	 * long as the supported physical address width is less than 52.
+	 */
+	mask = 1ull << 51;
 
 	/* Set the present bit. */
 	mask |= 1ull;
 
-#ifdef CONFIG_X86_64
 	/*
 	 * If reserved bit is not supported, clear the present bit to disable
 	 * mmio page fault.
 	 */
-	if (maxphyaddr == 52)
+	if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
 		mask &= ~1ull;
-#endif
 
 	kvm_mmu_set_mmio_spte_mask(mask, mask);
 }
@@ -6769,6 +6834,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_CLOCK_PAIRING:
 		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
 		break;
+	case KVM_HC_SEND_IPI:
+		ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
+		break;
 #endif
 	default:
 		ret = -KVM_ENOSYS;
@@ -7235,8 +7303,9 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
 }
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-		unsigned long start, unsigned long end)
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end,
+		bool blockable)
 {
 	unsigned long apic_address;
 
@@ -7247,6 +7316,8 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 	apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
 	if (start <= apic_address && apic_address < end)
 		kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+
+	return 0;
 }
 
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
@@ -7287,6 +7358,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	bool req_immediate_exit = false;
 
 	if (kvm_request_pending(vcpu)) {
+		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
+			kvm_x86_ops->get_vmcs12_pages(vcpu);
 		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
 			kvm_mmu_unload(vcpu);
 		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -7302,6 +7375,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
 			kvm_mmu_sync_roots(vcpu);
+		if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
+			kvm_mmu_load_cr3(vcpu);
 		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
 			kvm_vcpu_flush_tlb(vcpu, true);
 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
@@ -8013,6 +8088,10 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
 
 static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+			(sregs->cr4 & X86_CR4_OSXSAVE))
+		return  -EINVAL;
+
 	if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
 		/*
 		 * When EFER.LME and CR0.PG are set, the processor is in
@@ -8043,10 +8122,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	struct desc_ptr dt;
 	int ret = -EINVAL;
 
-	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-			(sregs->cr4 & X86_CR4_OSXSAVE))
-		goto out;
-
 	if (kvm_valid_sregs(vcpu, sregs))
 		goto out;
 
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 801491e98890..04d038f3b6fa 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -4,12 +4,15 @@ config ZONE_DMA
 
 config XTENSA
 	def_bool y
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_COHERENT_DMA_MMAP if !MMU
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
+	select DMA_NONCOHERENT_OPS
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IRQ_SHOW
@@ -72,9 +75,6 @@ config TRACE_IRQFLAGS_SUPPORT
 config MMU
 	def_bool n
 
-config VARIANT_IRQ_SWITCH
-	def_bool n
-
 config HAVE_XTENSA_GPIO32
 	def_bool n
 
@@ -244,6 +244,23 @@ config INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
 
 	  If in doubt, say Y.
 
+config MEMMAP_CACHEATTR
+	hex "Cache attributes for the memory address space"
+	depends on !MMU
+	default 0x22222222
+	help
+	  These cache attributes are set up for noMMU systems. Each hex digit
+	  specifies cache attributes for the corresponding 512MB memory
+	  region: bits 0..3 -- for addresses 0x00000000..0x1fffffff,
+	  bits 4..7 -- for addresses 0x20000000..0x3fffffff, and so on.
+
+	  Cache attribute values are specific for the MMU type, so e.g.
+	  for region protection MMUs: 2 is cache bypass, 4 is WB cached,
+	  1 is WT cached, f is illegal. For ful MMU: bit 0 makes it executable,
+	  bit 1 makes it writable, bits 2..3 meaning is 0: cache bypass,
+	  1: WB cache, 2: WT cache, 3: special (c and e are illegal, f is
+	  reserved).
+
 config KSEG_PADDR
 	hex "Physical address of the KSEG mapping"
 	depends on INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX && MMU
@@ -413,6 +430,10 @@ config XTENSA_PLATFORM_XTFPGA
 
 endchoice
 
+config PLATFORM_NR_IRQS
+	int
+	default 3 if XTENSA_PLATFORM_XT2000
+	default 0
 
 config XTENSA_CPU_CLOCK
 	int "CPU clock rate [MHz]"
@@ -450,6 +471,15 @@ config BUILTIN_DTB
 	string "DTB to build into the kernel image"
 	depends on OF
 
+config PARSE_BOOTPARAM
+	bool "Parse bootparam block"
+	default y
+	help
+	  Parse parameters passed to the kernel from the bootloader. It may
+	  be disabled if the kernel is known to run without the bootloader.
+
+	  If unsure, say Y.
+
 config BLK_DEV_SIMDISK
 	tristate "Host file-based simulated block device support"
 	default n
@@ -506,25 +536,13 @@ config PLATFORM_WANT_DEFAULT_MEM
 	def_bool n
 
 config DEFAULT_MEM_START
-	hex "Physical address of the default memory area start"
-	depends on PLATFORM_WANT_DEFAULT_MEM
-	default 0x00000000 if MMU
-	default 0x60000000 if !MMU
-	help
-	  This is the base address of the default memory area.
-	  Default memory area has platform-specific meaning, it may be used
-	  for e.g. early cache initialization.
-
-	  If unsure, leave the default value here.
-
-config DEFAULT_MEM_SIZE
-	hex "Maximal size of the default memory area"
-	depends on PLATFORM_WANT_DEFAULT_MEM
-	default 0x04000000
+	hex
+	prompt "PAGE_OFFSET/PHYS_OFFSET" if !MMU && PLATFORM_WANT_DEFAULT_MEM
+	default 0x60000000 if PLATFORM_WANT_DEFAULT_MEM
+	default 0x00000000
 	help
-	  This is the size of the default memory area.
-	  Default memory area has platform-specific meaning, it may be used
-	  for e.g. early cache initialization.
+	  This is the base address used for both PAGE_OFFSET and PHYS_OFFSET
+	  in noMMU configurations.
 
 	  If unsure, leave the default value here.
 
diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S
index b6aa85328ac0..29c68426ab56 100644
--- a/arch/xtensa/boot/boot-elf/bootstrap.S
+++ b/arch/xtensa/boot/boot-elf/bootstrap.S
@@ -15,10 +15,6 @@
  */
 
 #include <asm/bootparam.h>
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/cacheasm.h>
 #include <asm/initialize_mmu.h>
 #include <asm/vectors.h>
 #include <linux/linkage.h>
@@ -33,16 +29,18 @@ _ResetVector:
 	.begin  no-absolute-literals
 	.literal_position
 
-	.align 4
-RomInitAddr:
 #if defined(CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX) && \
 	XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
-	.word	CONFIG_KERNEL_LOAD_ADDRESS
+	.literal RomInitAddr, CONFIG_KERNEL_LOAD_ADDRESS
 #else
-	.word	KERNELOFFSET
+	.literal RomInitAddr, KERNELOFFSET
 #endif
-RomBootParam:
-	.word _bootparam
+#ifndef CONFIG_PARSE_BOOTPARAM
+	.literal RomBootParam, 0
+#else
+	.literal RomBootParam, _bootparam
+
+	.align 4
 _bootparam:
 	.short	BP_TAG_FIRST
 	.short	4
@@ -50,6 +48,7 @@ _bootparam:
 	.short	BP_TAG_LAST
 	.short	0
 	.long	0
+#endif
 
 	.align  4
 _SetupMMU:
diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig
index 624f9b3a3878..f3fc4f970ca8 100644
--- a/arch/xtensa/configs/nommu_kc705_defconfig
+++ b/arch/xtensa/configs/nommu_kc705_defconfig
@@ -33,13 +33,13 @@ CONFIG_XTENSA_VARIANT_CUSTOM_NAME="de212"
 # CONFIG_XTENSA_VARIANT_MMU is not set
 CONFIG_XTENSA_UNALIGNED_USER=y
 CONFIG_PREEMPT=y
+CONFIG_MEMMAP_CACHEATTR=0xfff2442f
 # CONFIG_PCI is not set
 CONFIG_XTENSA_PLATFORM_XTFPGA=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0x9d050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=256M@0x60000000"
 CONFIG_USE_OF=y
 CONFIG_BUILTIN_DTB="kc705_nommu"
-CONFIG_DEFAULT_MEM_SIZE=0x10000000
 CONFIG_BINFMT_FLAT=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index e5e1e61c538c..82c756431b49 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += compat.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += dma-contiguous.h
+generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
diff --git a/arch/xtensa/include/asm/cacheasm.h b/arch/xtensa/include/asm/cacheasm.h
index 2041abb10a23..34545ecfdd6b 100644
--- a/arch/xtensa/include/asm/cacheasm.h
+++ b/arch/xtensa/include/asm/cacheasm.h
@@ -31,16 +31,32 @@
  *
  */
 
-	.macro	__loop_cache_all ar at insn size line_width
 
-	movi	\ar, 0
+	.macro	__loop_cache_unroll ar at insn size line_width max_immed
+
+	.if	(1 << (\line_width)) > (\max_immed)
+	.set	_reps, 1
+	.elseif	(2 << (\line_width)) > (\max_immed)
+	.set	_reps, 2
+	.else
+	.set	_reps, 4
+	.endif
+
+	__loopi	\ar, \at, \size, (_reps << (\line_width))
+	.set	_index, 0
+	.rep	_reps
+	\insn	\ar, _index << (\line_width)
+	.set	_index, _index + 1
+	.endr
+	__endla	\ar, \at, _reps << (\line_width)
+
+	.endm
+
 
-	__loopi	\ar, \at, \size, (4 << (\line_width))
-	\insn	\ar, 0 << (\line_width)
-	\insn	\ar, 1 << (\line_width)
-	\insn	\ar, 2 << (\line_width)
-	\insn	\ar, 3 << (\line_width)
-	__endla	\ar, \at, 4 << (\line_width)
+	.macro	__loop_cache_all ar at insn size line_width max_immed
+
+	movi	\ar, 0
+	__loop_cache_unroll \ar, \at, \insn, \size, \line_width, \max_immed
 
 	.endm
 
@@ -57,14 +73,9 @@
 	.endm
 
 
-	.macro	__loop_cache_page ar at insn line_width
+	.macro	__loop_cache_page ar at insn line_width max_immed
 
-	__loopi	\ar, \at, PAGE_SIZE, 4 << (\line_width)
-	\insn	\ar, 0 << (\line_width)
-	\insn	\ar, 1 << (\line_width)
-	\insn	\ar, 2 << (\line_width)
-	\insn	\ar, 3 << (\line_width)
-	__endla	\ar, \at, 4 << (\line_width)
+	__loop_cache_unroll \ar, \at, \insn, PAGE_SIZE, \line_width, \max_immed
 
 	.endm
 
@@ -72,7 +83,8 @@
 	.macro	___unlock_dcache_all ar at
 
 #if XCHAL_DCACHE_LINE_LOCKABLE && XCHAL_DCACHE_SIZE
-	__loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH
+	__loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE \
+		XCHAL_DCACHE_LINEWIDTH 240
 #endif
 
 	.endm
@@ -81,7 +93,8 @@
 	.macro	___unlock_icache_all ar at
 
 #if XCHAL_ICACHE_LINE_LOCKABLE && XCHAL_ICACHE_SIZE
-	__loop_cache_all \ar \at iiu XCHAL_ICACHE_SIZE XCHAL_ICACHE_LINEWIDTH
+	__loop_cache_all \ar \at iiu XCHAL_ICACHE_SIZE \
+		XCHAL_ICACHE_LINEWIDTH 240
 #endif
 
 	.endm
@@ -90,7 +103,8 @@
 	.macro	___flush_invalidate_dcache_all ar at
 
 #if XCHAL_DCACHE_SIZE
-	__loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH
+	__loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE \
+		XCHAL_DCACHE_LINEWIDTH 240
 #endif
 
 	.endm
@@ -99,7 +113,8 @@
 	.macro	___flush_dcache_all ar at
 
 #if XCHAL_DCACHE_SIZE
-	__loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH
+	__loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE \
+		XCHAL_DCACHE_LINEWIDTH 240
 #endif
 
 	.endm
@@ -108,8 +123,8 @@
 	.macro	___invalidate_dcache_all ar at
 
 #if XCHAL_DCACHE_SIZE
-	__loop_cache_all \ar \at dii __stringify(DCACHE_WAY_SIZE) \
-			 XCHAL_DCACHE_LINEWIDTH
+	__loop_cache_all \ar \at dii XCHAL_DCACHE_SIZE \
+			 XCHAL_DCACHE_LINEWIDTH 1020
 #endif
 
 	.endm
@@ -118,8 +133,8 @@
 	.macro	___invalidate_icache_all ar at
 
 #if XCHAL_ICACHE_SIZE
-	__loop_cache_all \ar \at iii __stringify(ICACHE_WAY_SIZE) \
-			 XCHAL_ICACHE_LINEWIDTH
+	__loop_cache_all \ar \at iii XCHAL_ICACHE_SIZE \
+			 XCHAL_ICACHE_LINEWIDTH 1020
 #endif
 
 	.endm
@@ -166,7 +181,7 @@
 	.macro	___flush_invalidate_dcache_page ar as
 
 #if XCHAL_DCACHE_SIZE
-	__loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH
+	__loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH 1020
 #endif
 
 	.endm
@@ -175,7 +190,7 @@
 	.macro ___flush_dcache_page ar as
 
 #if XCHAL_DCACHE_SIZE
-	__loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH
+	__loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH 1020
 #endif
 
 	.endm
@@ -184,7 +199,7 @@
 	.macro	___invalidate_dcache_page ar as
 
 #if XCHAL_DCACHE_SIZE
-	__loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH
+	__loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH 1020
 #endif
 
 	.endm
@@ -193,7 +208,7 @@
 	.macro	___invalidate_icache_page ar as
 
 #if XCHAL_ICACHE_SIZE
-	__loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH
+	__loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH 1020
 #endif
 
 	.endm
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
deleted file mode 100644
index 44098800dad7..000000000000
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2003 - 2005 Tensilica Inc.
- * Copyright (C) 2015 Cadence Design Systems Inc.
- */
-
-#ifndef _XTENSA_DMA_MAPPING_H
-#define _XTENSA_DMA_MAPPING_H
-
-#include <asm/cache.h>
-#include <asm/io.h>
-
-#include <linux/mm.h>
-#include <linux/scatterlist.h>
-
-extern const struct dma_map_ops xtensa_dma_map_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-	return &xtensa_dma_map_ops;
-}
-
-#endif	/* _XTENSA_DMA_MAPPING_H */
diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h
index 42410f253597..10e9852b2fb4 100644
--- a/arch/xtensa/include/asm/initialize_mmu.h
+++ b/arch/xtensa/include/asm/initialize_mmu.h
@@ -177,36 +177,36 @@
 #endif /* defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU &&
 	  XCHAL_HAVE_SPANNING_WAY */
 
-#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS && \
-		(XCHAL_DCACHE_SIZE || XCHAL_ICACHE_SIZE)
-	/* Enable data and instruction cache in the DEFAULT_MEMORY region
-	 * if the processor has DTLB and ITLB.
-	 */
+	.endm
+
+	.macro	initialize_cacheattr
 
-	movi	a5, PLATFORM_DEFAULT_MEM_START | XCHAL_SPANNING_WAY
+#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS
+#if CONFIG_MEMMAP_CACHEATTR == 0x22222222 && XCHAL_HAVE_PTP_MMU
+#error Default MEMMAP_CACHEATTR of 0x22222222 does not work with full MMU.
+#endif
+
+	movi	a5, XCHAL_SPANNING_WAY
 	movi	a6, ~_PAGE_ATTRIB_MASK
-	movi	a7, CA_WRITEBACK
+	movi	a4, CONFIG_MEMMAP_CACHEATTR
 	movi	a8, 0x20000000
-	movi	a9, PLATFORM_DEFAULT_MEM_SIZE
-	j	2f
 1:
-	sub	a9, a9, a8
-2:
-#if XCHAL_DCACHE_SIZE
 	rdtlb1	a3, a5
+	xor	a3, a3, a4
 	and	a3, a3, a6
-	or	a3, a3, a7
+	xor	a3, a3, a4
 	wdtlb	a3, a5
-#endif
-#if XCHAL_ICACHE_SIZE
-	ritlb1	a4, a5
-	and	a4, a4, a6
-	or	a4, a4, a7
-	witlb	a4, a5
-#endif
+	ritlb1	a3, a5
+	xor	a3, a3, a4
+	and	a3, a3, a6
+	xor	a3, a3, a4
+	witlb	a3, a5
+
 	add	a5, a5, a8
-	bltu	a8, a9, 1b
+	srli	a4, a4, 4
+	bgeu	a5, a8, 1b
 
+	isync
 #endif
 
 	.endm
diff --git a/arch/xtensa/include/asm/irq.h b/arch/xtensa/include/asm/irq.h
index 19707db966f1..6c6ed23e0c79 100644
--- a/arch/xtensa/include/asm/irq.h
+++ b/arch/xtensa/include/asm/irq.h
@@ -12,32 +12,17 @@
 #define _XTENSA_IRQ_H
 
 #include <linux/init.h>
-#include <platform/hardware.h>
 #include <variant/core.h>
 
-#ifdef CONFIG_VARIANT_IRQ_SWITCH
-#include <variant/irq.h>
+#ifdef CONFIG_PLATFORM_NR_IRQS
+# define PLATFORM_NR_IRQS CONFIG_PLATFORM_NR_IRQS
 #else
-static inline void variant_irq_enable(unsigned int irq) { }
-static inline void variant_irq_disable(unsigned int irq) { }
-#endif
-
-#ifndef VARIANT_NR_IRQS
-# define VARIANT_NR_IRQS 0
-#endif
-#ifndef PLATFORM_NR_IRQS
 # define PLATFORM_NR_IRQS 0
 #endif
 #define XTENSA_NR_IRQS XCHAL_NUM_INTERRUPTS
-#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS + 1)
+#define NR_IRQS (XTENSA_NR_IRQS + PLATFORM_NR_IRQS + 1)
 #define XTENSA_PIC_LINUX_IRQ(hwirq) ((hwirq) + 1)
 
-#if VARIANT_NR_IRQS == 0
-static inline void variant_init_irq(void) { }
-#else
-void variant_init_irq(void) __init;
-#endif
-
 static __inline__ int irq_canonicalize(int irq)
 {
 	return (irq);
diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h
index 2317c835a4db..9c12babc016c 100644
--- a/arch/xtensa/include/asm/kmem_layout.h
+++ b/arch/xtensa/include/asm/kmem_layout.h
@@ -63,12 +63,6 @@
 #error XCHAL_KSEG_PADDR is not properly aligned to XCHAL_KSEG_ALIGNMENT
 #endif
 
-#else
-
-#define XCHAL_KSEG_CACHED_VADDR	__XTENSA_UL_CONST(0xd0000000)
-#define XCHAL_KSEG_BYPASS_VADDR	__XTENSA_UL_CONST(0xd8000000)
-#define XCHAL_KSEG_SIZE		__XTENSA_UL_CONST(0x08000000)
-
 #endif
 
 #ifndef CONFIG_KASAN
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 5d69c11c01b8..09c56cba442e 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -14,7 +14,6 @@
 #include <asm/processor.h>
 #include <asm/types.h>
 #include <asm/cache.h>
-#include <platform/hardware.h>
 #include <asm/kmem_layout.h>
 
 /*
@@ -31,8 +30,8 @@
 #define MAX_LOW_PFN	(PHYS_PFN(XCHAL_KSEG_PADDR) + \
 			 PHYS_PFN(XCHAL_KSEG_SIZE))
 #else
-#define PAGE_OFFSET	PLATFORM_DEFAULT_MEM_START
-#define PHYS_OFFSET	PLATFORM_DEFAULT_MEM_START
+#define PAGE_OFFSET	_AC(CONFIG_DEFAULT_MEM_START, UL)
+#define PHYS_OFFSET	_AC(CONFIG_DEFAULT_MEM_START, UL)
 #define MAX_LOW_PFN	PHYS_PFN(0xfffffffful)
 #endif
 
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 38802259978f..29cfe421cf41 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -66,6 +66,7 @@
 #define FIRST_USER_ADDRESS	0UL
 #define FIRST_USER_PGD_NR	(FIRST_USER_ADDRESS >> PGDIR_SHIFT)
 
+#ifdef CONFIG_MMU
 /*
  * Virtual memory area. We keep a distance to other memory regions to be
  * on the safe side. We also use this area for cache aliasing.
@@ -80,6 +81,13 @@
 #define TLBTEMP_SIZE		ICACHE_WAY_SIZE
 #endif
 
+#else
+
+#define VMALLOC_START		__XTENSA_UL_CONST(0)
+#define VMALLOC_END		__XTENSA_UL_CONST(0xffffffff)
+
+#endif
+
 /*
  * For the Xtensa architecture, the PTE layout is as follows:
  *
diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h
index f8fbef67bc5f..560483356a06 100644
--- a/arch/xtensa/include/asm/platform.h
+++ b/arch/xtensa/include/asm/platform.h
@@ -75,4 +75,31 @@ extern void platform_calibrate_ccount (void);
  */
 void cpu_reset(void) __attribute__((noreturn));
 
+/*
+ * Memory caching is platform-dependent in noMMU xtensa configurations.
+ * The following set of functions should be implemented in platform code
+ * in order to enable coherent DMA memory operations when CONFIG_MMU is not
+ * enabled. Default implementations do nothing and issue a warning.
+ */
+
+/*
+ * Check whether p points to a cached memory.
+ */
+bool platform_vaddr_cached(const void *p);
+
+/*
+ * Check whether p points to an uncached memory.
+ */
+bool platform_vaddr_uncached(const void *p);
+
+/*
+ * Return pointer to an uncached view of the cached sddress p.
+ */
+void *platform_vaddr_to_uncached(void *p);
+
+/*
+ * Return pointer to a cached view of the uncached sddress p.
+ */
+void *platform_vaddr_to_cached(void *p);
+
 #endif	/* _XTENSA_PLATFORM_H */
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 5b0027d4ecc0..e4ccb88b7996 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -11,7 +11,6 @@
 #define _XTENSA_PROCESSOR_H
 
 #include <variant/core.h>
-#include <platform/hardware.h>
 
 #include <linux/compiler.h>
 #include <asm/ptrace.h>
diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h
index 65d3da9db19b..7111280c8842 100644
--- a/arch/xtensa/include/asm/vectors.h
+++ b/arch/xtensa/include/asm/vectors.h
@@ -19,7 +19,6 @@
 #define _XTENSA_VECTORS_H
 
 #include <variant/core.h>
-#include <platform/hardware.h>
 #include <asm/kmem_layout.h>
 
 #if XCHAL_HAVE_PTP_MMU
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index 9c4e9433e536..2f76118ecf62 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -181,6 +181,8 @@ ENTRY(_startup)
 
 	isync
 
+	initialize_cacheattr
+
 #ifdef CONFIG_HAVE_SMP
 	movi	a2, CCON	# MX External Register to Configure Cache
 	movi	a3, 1
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 18e4ef34ac45..a48bf2d10ac2 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -158,7 +158,6 @@ void __init init_IRQ(void)
 #ifdef CONFIG_SMP
 	ipi_init();
 #endif
-	variant_init_irq();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index a02dc563d290..1fc138b6bc0a 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -16,26 +16,25 @@
  */
 
 #include <linux/dma-contiguous.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/dma-direct.h>
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/string.h>
 #include <linux/types.h>
 #include <asm/cacheflush.h>
 #include <asm/io.h>
+#include <asm/platform.h>
 
-static void do_cache_op(dma_addr_t dma_handle, size_t size,
+static void do_cache_op(phys_addr_t paddr, size_t size,
 			void (*fn)(unsigned long, unsigned long))
 {
-	unsigned long off = dma_handle & (PAGE_SIZE - 1);
-	unsigned long pfn = PFN_DOWN(dma_handle);
+	unsigned long off = paddr & (PAGE_SIZE - 1);
+	unsigned long pfn = PFN_DOWN(paddr);
 	struct page *page = pfn_to_page(pfn);
 
 	if (!PageHighMem(page))
-		fn((unsigned long)bus_to_virt(dma_handle), size);
+		fn((unsigned long)phys_to_virt(paddr), size);
 	else
 		while (size > 0) {
 			size_t sz = min_t(size_t, size, PAGE_SIZE - off);
@@ -49,14 +48,13 @@ static void do_cache_op(dma_addr_t dma_handle, size_t size,
 		}
 }
 
-static void xtensa_sync_single_for_cpu(struct device *dev,
-				       dma_addr_t dma_handle, size_t size,
-				       enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_BIDIRECTIONAL:
 	case DMA_FROM_DEVICE:
-		do_cache_op(dma_handle, size, __invalidate_dcache_range);
+		do_cache_op(paddr, size, __invalidate_dcache_range);
 		break;
 
 	case DMA_NONE:
@@ -68,15 +66,14 @@ static void xtensa_sync_single_for_cpu(struct device *dev,
 	}
 }
 
-static void xtensa_sync_single_for_device(struct device *dev,
-					  dma_addr_t dma_handle, size_t size,
-					  enum dma_data_direction dir)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
 	switch (dir) {
 	case DMA_BIDIRECTIONAL:
 	case DMA_TO_DEVICE:
 		if (XCHAL_DCACHE_IS_WRITEBACK)
-			do_cache_op(dma_handle, size, __flush_dcache_range);
+			do_cache_op(paddr, size, __flush_dcache_range);
 		break;
 
 	case DMA_NONE:
@@ -88,43 +85,66 @@ static void xtensa_sync_single_for_device(struct device *dev,
 	}
 }
 
-static void xtensa_sync_sg_for_cpu(struct device *dev,
-				   struct scatterlist *sg, int nents,
-				   enum dma_data_direction dir)
+#ifdef CONFIG_MMU
+bool platform_vaddr_cached(const void *p)
 {
-	struct scatterlist *s;
-	int i;
+	unsigned long addr = (unsigned long)p;
 
-	for_each_sg(sg, s, nents, i) {
-		xtensa_sync_single_for_cpu(dev, sg_dma_address(s),
-					   sg_dma_len(s), dir);
-	}
+	return addr >= XCHAL_KSEG_CACHED_VADDR &&
+	       addr - XCHAL_KSEG_CACHED_VADDR < XCHAL_KSEG_SIZE;
 }
 
-static void xtensa_sync_sg_for_device(struct device *dev,
-				      struct scatterlist *sg, int nents,
-				      enum dma_data_direction dir)
+bool platform_vaddr_uncached(const void *p)
 {
-	struct scatterlist *s;
-	int i;
+	unsigned long addr = (unsigned long)p;
 
-	for_each_sg(sg, s, nents, i) {
-		xtensa_sync_single_for_device(dev, sg_dma_address(s),
-					      sg_dma_len(s), dir);
-	}
+	return addr >= XCHAL_KSEG_BYPASS_VADDR &&
+	       addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE;
+}
+
+void *platform_vaddr_to_uncached(void *p)
+{
+	return p + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
+}
+
+void *platform_vaddr_to_cached(void *p)
+{
+	return p + XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
+}
+#else
+bool __attribute__((weak)) platform_vaddr_cached(const void *p)
+{
+	WARN_ONCE(1, "Default %s implementation is used\n", __func__);
+	return true;
+}
+
+bool __attribute__((weak)) platform_vaddr_uncached(const void *p)
+{
+	WARN_ONCE(1, "Default %s implementation is used\n", __func__);
+	return false;
+}
+
+void __attribute__((weak)) *platform_vaddr_to_uncached(void *p)
+{
+	WARN_ONCE(1, "Default %s implementation is used\n", __func__);
+	return p;
+}
+
+void __attribute__((weak)) *platform_vaddr_to_cached(void *p)
+{
+	WARN_ONCE(1, "Default %s implementation is used\n", __func__);
+	return p;
 }
+#endif
 
 /*
  * Note: We assume that the full memory space is always mapped to 'kseg'
  *	 Otherwise we have to use page attributes (not implemented).
  */
 
-static void *xtensa_dma_alloc(struct device *dev, size_t size,
-			      dma_addr_t *handle, gfp_t flag,
-			      unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+		gfp_t flag, unsigned long attrs)
 {
-	unsigned long ret;
-	unsigned long uncached;
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	struct page *page = NULL;
 
@@ -147,6 +167,10 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
 
 	*handle = phys_to_dma(dev, page_to_phys(page));
 
+	if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+		return page;
+	}
+
 #ifdef CONFIG_MMU
 	if (PageHighMem(page)) {
 		void *p;
@@ -161,27 +185,21 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
 		return p;
 	}
 #endif
-	ret = (unsigned long)page_address(page);
-	BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
-	       ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
-
-	uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
-	__invalidate_dcache_range(ret, size);
-
-	return (void *)uncached;
+	BUG_ON(!platform_vaddr_cached(page_address(page)));
+	__invalidate_dcache_range((unsigned long)page_address(page), size);
+	return platform_vaddr_to_uncached(page_address(page));
 }
 
-static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr,
-			    dma_addr_t dma_handle, unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+		dma_addr_t dma_handle, unsigned long attrs)
 {
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	unsigned long addr = (unsigned long)vaddr;
 	struct page *page;
 
-	if (addr >= XCHAL_KSEG_BYPASS_VADDR &&
-	    addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE) {
-		addr += XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
-		page = virt_to_page(addr);
+	if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+		page = vaddr;
+	} else if (platform_vaddr_uncached(vaddr)) {
+		page = virt_to_page(platform_vaddr_to_cached(vaddr));
 	} else {
 #ifdef CONFIG_MMU
 		dma_common_free_remap(vaddr, size, VM_MAP);
@@ -192,72 +210,3 @@ static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr,
 	if (!dma_release_from_contiguous(dev, page, count))
 		__free_pages(page, get_order(size));
 }
-
-static dma_addr_t xtensa_map_page(struct device *dev, struct page *page,
-				  unsigned long offset, size_t size,
-				  enum dma_data_direction dir,
-				  unsigned long attrs)
-{
-	dma_addr_t dma_handle = page_to_phys(page) + offset;
-
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		xtensa_sync_single_for_device(dev, dma_handle, size, dir);
-
-	return dma_handle;
-}
-
-static void xtensa_unmap_page(struct device *dev, dma_addr_t dma_handle,
-			      size_t size, enum dma_data_direction dir,
-			      unsigned long attrs)
-{
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		xtensa_sync_single_for_cpu(dev, dma_handle, size, dir);
-}
-
-static int xtensa_map_sg(struct device *dev, struct scatterlist *sg,
-			 int nents, enum dma_data_direction dir,
-			 unsigned long attrs)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sg, s, nents, i) {
-		s->dma_address = xtensa_map_page(dev, sg_page(s), s->offset,
-						 s->length, dir, attrs);
-	}
-	return nents;
-}
-
-static void xtensa_unmap_sg(struct device *dev,
-			    struct scatterlist *sg, int nents,
-			    enum dma_data_direction dir,
-			    unsigned long attrs)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sg, s, nents, i) {
-		xtensa_unmap_page(dev, sg_dma_address(s),
-				  sg_dma_len(s), dir, attrs);
-	}
-}
-
-int xtensa_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return 0;
-}
-
-const struct dma_map_ops xtensa_dma_map_ops = {
-	.alloc = xtensa_dma_alloc,
-	.free = xtensa_dma_free,
-	.map_page = xtensa_map_page,
-	.unmap_page = xtensa_unmap_page,
-	.map_sg = xtensa_map_sg,
-	.unmap_sg = xtensa_unmap_sg,
-	.sync_single_for_cpu = xtensa_sync_single_for_cpu,
-	.sync_single_for_device = xtensa_sync_single_for_device,
-	.sync_sg_for_cpu = xtensa_sync_sg_for_cpu,
-	.sync_sg_for_device = xtensa_sync_sg_for_device,
-	.mapping_error = xtensa_dma_mapping_error,
-};
-EXPORT_SYMBOL(xtensa_dma_map_ops);
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 686a27444bba..351283b60df6 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -47,8 +47,6 @@
 #include <asm/smp.h>
 #include <asm/sysmem.h>
 
-#include <platform/hardware.h>
-
 #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
 struct screen_info screen_info = {
 	.orig_x = 0,
@@ -81,6 +79,7 @@ static char __initdata command_line[COMMAND_LINE_SIZE];
 static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
 #endif
 
+#ifdef CONFIG_PARSE_BOOTPARAM
 /*
  * Boot parameter parsing.
  *
@@ -178,6 +177,13 @@ static int __init parse_bootparam(const bp_tag_t* tag)
 
 	return 0;
 }
+#else
+static int __init parse_bootparam(const bp_tag_t *tag)
+{
+	pr_info("Ignoring boot parameters at %p\n", tag);
+	return 0;
+}
+#endif
 
 #ifdef CONFIG_OF
 
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 70b731edc7b8..a1c3edb8ad56 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -20,7 +20,7 @@
 
 #include <asm/vectors.h>
 #include <variant/core.h>
-#include <platform/hardware.h>
+
 OUTPUT_ARCH(xtensa)
 ENTRY(_start)
 
diff --git a/arch/xtensa/platforms/iss/include/platform/hardware.h b/arch/xtensa/platforms/iss/include/platform/hardware.h
deleted file mode 100644
index 6930c12adc16..000000000000
--- a/arch/xtensa/platforms/iss/include/platform/hardware.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * include/asm-xtensa/platform-iss/hardware.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 Tensilica Inc.
- */
-
-/*
- * This file contains the default configuration of ISS.
- */
-
-#ifndef _XTENSA_PLATFORM_ISS_HARDWARE_H
-#define _XTENSA_PLATFORM_ISS_HARDWARE_H
-
-/*
- * Memory configuration.
- */
-
-#define PLATFORM_DEFAULT_MEM_START	0x00000000
-#define PLATFORM_DEFAULT_MEM_SIZE	0x08000000
-
-/*
- * Interrupt configuration.
- */
-
-#endif /* _XTENSA_PLATFORM_ISS_HARDWARE_H */
diff --git a/arch/xtensa/platforms/xt2000/include/platform/hardware.h b/arch/xtensa/platforms/xt2000/include/platform/hardware.h
index 886ef156ded3..8e5e0d6a81ec 100644
--- a/arch/xtensa/platforms/xt2000/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xt2000/include/platform/hardware.h
@@ -17,17 +17,6 @@
 
 #include <variant/core.h>
 
-/* 
- * Memory configuration.
- */
-
-#define PLATFORM_DEFAULT_MEM_START 0x00000000
-#define PLATFORM_DEFAULT_MEM_SIZE 0x08000000
-
-/*
- * Number of platform IRQs
- */
-#define PLATFORM_NR_IRQS 3
 /*
  * On-board components.
  */
diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
index 1fda7e20dfcb..30d9cb6cf168 100644
--- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
@@ -17,15 +17,6 @@
 #ifndef __XTENSA_XTAVNET_HARDWARE_H
 #define __XTENSA_XTAVNET_HARDWARE_H
 
-/* Memory configuration. */
-
-#define PLATFORM_DEFAULT_MEM_START __XTENSA_UL(CONFIG_DEFAULT_MEM_START)
-#define PLATFORM_DEFAULT_MEM_SIZE  __XTENSA_UL(CONFIG_DEFAULT_MEM_SIZE)
-
-/* Interrupt configuration. */
-
-#define PLATFORM_NR_IRQS	0
-
 /* Default assignment of LX60 devices to external interrupts. */
 
 #ifdef CONFIG_XTENSA_MX
diff --git a/arch/xtensa/variants/test_kc705_be/include/variant/core.h b/arch/xtensa/variants/test_kc705_be/include/variant/core.h
new file mode 100644
index 000000000000..a4ebdf7197d7
--- /dev/null
+++ b/arch/xtensa/variants/test_kc705_be/include/variant/core.h
@@ -0,0 +1,575 @@
+/* 
+ * xtensa/config/core-isa.h -- HAL definitions that are dependent on Xtensa
+ *				processor CORE configuration
+ *
+ *  See <xtensa/config/core.h>, which includes this file, for more details.
+ */
+
+/* Xtensa processor core configuration information.
+
+   Copyright (c) 1999-2015 Tensilica Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef _XTENSA_CORE_CONFIGURATION_H
+#define _XTENSA_CORE_CONFIGURATION_H
+
+
+/****************************************************************************
+	    Parameters Useful for Any Code, USER or PRIVILEGED
+ ****************************************************************************/
+
+/*
+ *  Note:  Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is
+ *  configured, and a value of 0 otherwise.  These macros are always defined.
+ */
+
+
+/*----------------------------------------------------------------------
+				ISA
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_BE			1	/* big-endian byte ordering */
+#define XCHAL_HAVE_WINDOWED		1	/* windowed registers option */
+#define XCHAL_NUM_AREGS			32	/* num of physical addr regs */
+#define XCHAL_NUM_AREGS_LOG2		5	/* log2(XCHAL_NUM_AREGS) */
+#define XCHAL_MAX_INSTRUCTION_SIZE	8	/* max instr bytes (3..8) */
+#define XCHAL_HAVE_DEBUG		1	/* debug option */
+#define XCHAL_HAVE_DENSITY		1	/* 16-bit instructions */
+#define XCHAL_HAVE_LOOPS		1	/* zero-overhead loops */
+#define XCHAL_LOOP_BUFFER_SIZE		0	/* zero-ov. loop instr buffer size */
+#define XCHAL_HAVE_NSA			1	/* NSA/NSAU instructions */
+#define XCHAL_HAVE_MINMAX		1	/* MIN/MAX instructions */
+#define XCHAL_HAVE_SEXT			1	/* SEXT instruction */
+#define XCHAL_HAVE_DEPBITS		0	/* DEPBITS instruction */
+#define XCHAL_HAVE_CLAMPS		1	/* CLAMPS instruction */
+#define XCHAL_HAVE_MUL16		1	/* MUL16S/MUL16U instructions */
+#define XCHAL_HAVE_MUL32		1	/* MULL instruction */
+#define XCHAL_HAVE_MUL32_HIGH		1	/* MULUH/MULSH instructions */
+#define XCHAL_HAVE_DIV32		1	/* QUOS/QUOU/REMS/REMU instructions */
+#define XCHAL_HAVE_L32R			1	/* L32R instruction */
+#define XCHAL_HAVE_ABSOLUTE_LITERALS	0	/* non-PC-rel (extended) L32R */
+#define XCHAL_HAVE_CONST16		0	/* CONST16 instruction */
+#define XCHAL_HAVE_ADDX			1	/* ADDX#/SUBX# instructions */
+#define XCHAL_HAVE_WIDE_BRANCHES	0	/* B*.W18 or B*.W15 instr's */
+#define XCHAL_HAVE_PREDICTED_BRANCHES	0	/* B[EQ/EQZ/NE/NEZ]T instr's */
+#define XCHAL_HAVE_CALL4AND12		1	/* (obsolete option) */
+#define XCHAL_HAVE_ABS			1	/* ABS instruction */
+/*#define XCHAL_HAVE_POPC		0*/	/* POPC instruction */
+/*#define XCHAL_HAVE_CRC		0*/	/* CRC instruction */
+#define XCHAL_HAVE_RELEASE_SYNC		1	/* L32AI/S32RI instructions */
+#define XCHAL_HAVE_S32C1I		1	/* S32C1I instruction */
+#define XCHAL_HAVE_SPECULATION		0	/* speculation */
+#define XCHAL_HAVE_FULL_RESET		1	/* all regs/state reset */
+#define XCHAL_NUM_CONTEXTS		1	/* */
+#define XCHAL_NUM_MISC_REGS		2	/* num of scratch regs (0..4) */
+#define XCHAL_HAVE_TAP_MASTER		0	/* JTAG TAP control instr's */
+#define XCHAL_HAVE_PRID			1	/* processor ID register */
+#define XCHAL_HAVE_EXTERN_REGS		1	/* WER/RER instructions */
+#define XCHAL_HAVE_MX			0	/* MX core (Tensilica internal) */
+#define XCHAL_HAVE_MP_INTERRUPTS	0	/* interrupt distributor port */
+#define XCHAL_HAVE_MP_RUNSTALL		0	/* core RunStall control port */
+#define XCHAL_HAVE_PSO			0	/* Power Shut-Off */
+#define XCHAL_HAVE_PSO_CDM		0	/* core/debug/mem pwr domains */
+#define XCHAL_HAVE_PSO_FULL_RETENTION	0	/* all regs preserved on PSO */
+#define XCHAL_HAVE_THREADPTR		1	/* THREADPTR register */
+#define XCHAL_HAVE_BOOLEANS		1	/* boolean registers */
+#define XCHAL_HAVE_CP			1	/* CPENABLE reg (coprocessor) */
+#define XCHAL_CP_MAXCFG			8	/* max allowed cp id plus one */
+#define XCHAL_HAVE_MAC16		1	/* MAC16 package */
+
+#define XCHAL_HAVE_FUSION		 0	/* Fusion*/
+#define XCHAL_HAVE_FUSION_FP	 0	        /* Fusion FP option */
+#define XCHAL_HAVE_FUSION_LOW_POWER 0	/* Fusion Low Power option */
+#define XCHAL_HAVE_FUSION_AES	 0	        /* Fusion BLE/Wifi AES-128 CCM option */
+#define XCHAL_HAVE_FUSION_CONVENC	 0       /* Fusion Conv Encode option */
+#define XCHAL_HAVE_FUSION_LFSR_CRC	 0	/* Fusion LFSR-CRC option */
+#define XCHAL_HAVE_FUSION_BITOPS	 0	/* Fusion Bit Operations Support option */
+#define XCHAL_HAVE_FUSION_AVS	 0	/* Fusion AVS option */
+#define XCHAL_HAVE_FUSION_16BIT_BASEBAND	 0	/* Fusion 16-bit Baseband option */
+#define XCHAL_HAVE_HIFIPRO		0	/* HiFiPro Audio Engine pkg */
+#define XCHAL_HAVE_HIFI4		0	/* HiFi4 Audio Engine pkg */
+#define XCHAL_HAVE_HIFI4_VFPU		0	/* HiFi4 Audio Engine VFPU option */
+#define XCHAL_HAVE_HIFI3		0	/* HiFi3 Audio Engine pkg */
+#define XCHAL_HAVE_HIFI3_VFPU		0	/* HiFi3 Audio Engine VFPU option */
+#define XCHAL_HAVE_HIFI2		1	/* HiFi2 Audio Engine pkg */
+#define XCHAL_HAVE_HIFI2_MUL32X24	1	/* HiFi2 and 32x24 MACs */
+#define XCHAL_HAVE_HIFI2EP		1	/* HiFi2EP */
+#define XCHAL_HAVE_HIFI_MINI		0	
+
+
+#define XCHAL_HAVE_VECTORFPU2005	0	/* vector or user floating-point pkg */
+#define XCHAL_HAVE_USER_DPFPU         0       /* user DP floating-point pkg */
+#define XCHAL_HAVE_USER_SPFPU         0       /* user DP floating-point pkg */
+#define XCHAL_HAVE_FP                 0      /* single prec floating point */
+#define XCHAL_HAVE_FP_DIV             0  /* FP with DIV instructions */
+#define XCHAL_HAVE_FP_RECIP           0        /* FP with RECIP instructions */
+#define XCHAL_HAVE_FP_SQRT            0 /* FP with SQRT instructions */
+#define XCHAL_HAVE_FP_RSQRT           0        /* FP with RSQRT instructions */
+#define XCHAL_HAVE_DFP                        0     /* double precision FP pkg */
+#define XCHAL_HAVE_DFP_DIV            0 /* DFP with DIV instructions */
+#define XCHAL_HAVE_DFP_RECIP          0       /* DFP with RECIP instructions*/
+#define XCHAL_HAVE_DFP_SQRT           0        /* DFP with SQRT instructions */
+#define XCHAL_HAVE_DFP_RSQRT          0       /* DFP with RSQRT instructions*/
+#define XCHAL_HAVE_DFP_ACCEL		0	/* double precision FP acceleration pkg */
+#define XCHAL_HAVE_DFP_accel		XCHAL_HAVE_DFP_ACCEL				/* for backward compatibility */
+
+#define XCHAL_HAVE_DFPU_SINGLE_ONLY    0                 	/* DFPU Coprocessor, single precision only */
+#define XCHAL_HAVE_DFPU_SINGLE_DOUBLE  0               	/* DFPU Coprocessor, single and double precision */
+#define XCHAL_HAVE_VECTRA1		0	/* Vectra I  pkg */
+#define XCHAL_HAVE_VECTRALX		0	/* Vectra LX pkg */
+#define XCHAL_HAVE_PDX4		        0	/* PDX4 */
+#define XCHAL_HAVE_CONNXD2		0	/* ConnX D2 pkg */
+#define XCHAL_HAVE_CONNXD2_DUALLSFLIX   0	/* ConnX D2 & Dual LoadStore Flix */
+#define XCHAL_HAVE_BBE16		0	/* ConnX BBE16 pkg */
+#define XCHAL_HAVE_BBE16_RSQRT		0	/* BBE16 & vector recip sqrt */
+#define XCHAL_HAVE_BBE16_VECDIV		0	/* BBE16 & vector divide */
+#define XCHAL_HAVE_BBE16_DESPREAD	0	/* BBE16 & despread */
+#define XCHAL_HAVE_BBENEP		0	/* ConnX BBENEP pkgs */
+#define XCHAL_HAVE_BSP3			0	/* ConnX BSP3 pkg */
+#define XCHAL_HAVE_BSP3_TRANSPOSE	0	/* BSP3 & transpose32x32 */
+#define XCHAL_HAVE_SSP16		0	/* ConnX SSP16 pkg */
+#define XCHAL_HAVE_SSP16_VITERBI	0	/* SSP16 & viterbi */
+#define XCHAL_HAVE_TURBO16		0	/* ConnX Turbo16 pkg */
+#define XCHAL_HAVE_BBP16		0	/* ConnX BBP16 pkg */
+#define XCHAL_HAVE_FLIX3		0	/* basic 3-way FLIX option */
+#define XCHAL_HAVE_GRIVPEP              0   /*  GRIVPEP is General Release of IVPEP */
+#define XCHAL_HAVE_GRIVPEP_HISTOGRAM    0   /* Histogram option on GRIVPEP */
+
+
+/*----------------------------------------------------------------------
+				MISC
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_NUM_LOADSTORE_UNITS	1	/* load/store units */
+#define XCHAL_NUM_WRITEBUFFER_ENTRIES	8	/* size of write buffer */
+#define XCHAL_INST_FETCH_WIDTH		8	/* instr-fetch width in bytes */
+#define XCHAL_DATA_WIDTH		8	/* data width in bytes */
+#define XCHAL_DATA_PIPE_DELAY		1	/* d-side pipeline delay
+						   (1 = 5-stage, 2 = 7-stage) */
+#define XCHAL_CLOCK_GATING_GLOBAL	0	/* global clock gating */
+#define XCHAL_CLOCK_GATING_FUNCUNIT	0	/* funct. unit clock gating */
+/*  In T1050, applies to selected core load and store instructions (see ISA): */
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION	1	/* unaligned loads cause exc. */
+#define XCHAL_UNALIGNED_STORE_EXCEPTION	1	/* unaligned stores cause exc.*/
+#define XCHAL_UNALIGNED_LOAD_HW		0	/* unaligned loads work in hw */
+#define XCHAL_UNALIGNED_STORE_HW	0	/* unaligned stores work in hw*/
+
+#define XCHAL_SW_VERSION		1100002	/* sw version of this header */
+
+#define XCHAL_CORE_ID			"test_kc705_be"	/* alphanum core name
+						   (CoreID) set in the Xtensa
+						   Processor Generator */
+
+#define XCHAL_BUILD_UNIQUE_ID		0x00058D8C	/* 22-bit sw build ID */
+
+/*
+ *  These definitions describe the hardware targeted by this software.
+ */
+#define XCHAL_HW_CONFIGID0		0xC1B3FFFF	/* ConfigID hi 32 bits*/
+#define XCHAL_HW_CONFIGID1		0x1C858D8C	/* ConfigID lo 32 bits*/
+#define XCHAL_HW_VERSION_NAME		"LX6.0.2"	/* full version name */
+#define XCHAL_HW_VERSION_MAJOR		2600	/* major ver# of targeted hw */
+#define XCHAL_HW_VERSION_MINOR		2	/* minor ver# of targeted hw */
+#define XCHAL_HW_VERSION		260002	/* major*100+minor */
+#define XCHAL_HW_REL_LX6		1
+#define XCHAL_HW_REL_LX6_0		1
+#define XCHAL_HW_REL_LX6_0_2		1
+#define XCHAL_HW_CONFIGID_RELIABLE	1
+/*  If software targets a *range* of hardware versions, these are the bounds: */
+#define XCHAL_HW_MIN_VERSION_MAJOR	2600	/* major v of earliest tgt hw */
+#define XCHAL_HW_MIN_VERSION_MINOR	2	/* minor v of earliest tgt hw */
+#define XCHAL_HW_MIN_VERSION		260002	/* earliest targeted hw */
+#define XCHAL_HW_MAX_VERSION_MAJOR	2600	/* major v of latest tgt hw */
+#define XCHAL_HW_MAX_VERSION_MINOR	2	/* minor v of latest tgt hw */
+#define XCHAL_HW_MAX_VERSION		260002	/* latest targeted hw */
+
+
+/*----------------------------------------------------------------------
+				CACHE
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_ICACHE_LINESIZE		32	/* I-cache line size in bytes */
+#define XCHAL_DCACHE_LINESIZE		32	/* D-cache line size in bytes */
+#define XCHAL_ICACHE_LINEWIDTH		5	/* log2(I line size in bytes) */
+#define XCHAL_DCACHE_LINEWIDTH		5	/* log2(D line size in bytes) */
+
+#define XCHAL_ICACHE_SIZE		16384	/* I-cache size in bytes or 0 */
+#define XCHAL_DCACHE_SIZE		16384	/* D-cache size in bytes or 0 */
+
+#define XCHAL_DCACHE_IS_WRITEBACK	1	/* writeback feature */
+#define XCHAL_DCACHE_IS_COHERENT	0	/* MP coherence feature */
+
+#define XCHAL_HAVE_PREFETCH		1	/* PREFCTL register */
+#define XCHAL_HAVE_PREFETCH_L1		0	/* prefetch to L1 dcache */
+#define XCHAL_PREFETCH_CASTOUT_LINES	1	/* dcache pref. castout bufsz */
+#define XCHAL_PREFETCH_ENTRIES		8	/* cache prefetch entries */
+#define XCHAL_PREFETCH_BLOCK_ENTRIES	0	/* prefetch block streams */
+#define XCHAL_HAVE_CACHE_BLOCKOPS	0	/* block prefetch for caches */
+#define XCHAL_HAVE_ICACHE_TEST		1	/* Icache test instructions */
+#define XCHAL_HAVE_DCACHE_TEST		1	/* Dcache test instructions */
+#define XCHAL_HAVE_ICACHE_DYN_WAYS	0	/* Icache dynamic way support */
+#define XCHAL_HAVE_DCACHE_DYN_WAYS	0	/* Dcache dynamic way support */
+
+
+
+
+/****************************************************************************
+    Parameters Useful for PRIVILEGED (Supervisory or Non-Virtualized) Code
+ ****************************************************************************/
+
+
+#ifndef XTENSA_HAL_NON_PRIVILEGED_ONLY
+
+/*----------------------------------------------------------------------
+				CACHE
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_PIF			1	/* any outbound PIF present */
+
+/*  If present, cache size in bytes == (ways * 2^(linewidth + setwidth)).  */
+
+/*  Number of cache sets in log2(lines per way):  */
+#define XCHAL_ICACHE_SETWIDTH		7
+#define XCHAL_DCACHE_SETWIDTH		7
+
+/*  Cache set associativity (number of ways):  */
+#define XCHAL_ICACHE_WAYS		4
+#define XCHAL_DCACHE_WAYS		4
+
+/*  Cache features:  */
+#define XCHAL_ICACHE_LINE_LOCKABLE	1
+#define XCHAL_DCACHE_LINE_LOCKABLE	1
+#define XCHAL_ICACHE_ECC_PARITY		0
+#define XCHAL_DCACHE_ECC_PARITY		0
+
+/*  Cache access size in bytes (affects operation of SICW instruction):  */
+#define XCHAL_ICACHE_ACCESS_SIZE	8
+#define XCHAL_DCACHE_ACCESS_SIZE	8
+
+#define XCHAL_DCACHE_BANKS		1	/* number of banks */
+
+/*  Number of encoded cache attr bits (see <xtensa/hal.h> for decoded bits):  */
+#define XCHAL_CA_BITS			4
+
+/*  Whether MEMCTL register has anything useful  */
+#define XCHAL_USE_MEMCTL		(((XCHAL_LOOP_BUFFER_SIZE > 0)	||	\
+					   XCHAL_DCACHE_IS_COHERENT	||	\
+					   XCHAL_HAVE_ICACHE_DYN_WAYS	||	\
+					   XCHAL_HAVE_DCACHE_DYN_WAYS)	&&	\
+					   (XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RE_2012_0))
+
+
+/*----------------------------------------------------------------------
+			INTERNAL I/D RAM/ROMs and XLMI
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_NUM_INSTROM		0	/* number of core instr. ROMs */
+#define XCHAL_NUM_INSTRAM		0	/* number of core instr. RAMs */
+#define XCHAL_NUM_DATAROM		0	/* number of core data ROMs */
+#define XCHAL_NUM_DATARAM		0	/* number of core data RAMs */
+#define XCHAL_NUM_URAM			0	/* number of core unified RAMs*/
+#define XCHAL_NUM_XLMI			0	/* number of core XLMI ports */
+
+#define XCHAL_HAVE_IMEM_LOADSTORE	1	/* can load/store to IROM/IRAM*/
+
+
+/*----------------------------------------------------------------------
+			INTERRUPTS and TIMERS
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_INTERRUPTS		1	/* interrupt option */
+#define XCHAL_HAVE_HIGHPRI_INTERRUPTS	1	/* med/high-pri. interrupts */
+#define XCHAL_HAVE_NMI			1	/* non-maskable interrupt */
+#define XCHAL_HAVE_CCOUNT		1	/* CCOUNT reg. (timer option) */
+#define XCHAL_NUM_TIMERS		3	/* number of CCOMPAREn regs */
+#define XCHAL_NUM_INTERRUPTS		22	/* number of interrupts */
+#define XCHAL_NUM_INTERRUPTS_LOG2	5	/* ceil(log2(NUM_INTERRUPTS)) */
+#define XCHAL_NUM_EXTINTERRUPTS		16	/* num of external interrupts */
+#define XCHAL_NUM_INTLEVELS		6	/* number of interrupt levels
+						   (not including level zero) */
+#define XCHAL_EXCM_LEVEL		4	/* level masked by PS.EXCM */
+	/* (always 1 in XEA1; levels 2 .. EXCM_LEVEL are "medium priority") */
+
+/*  Masks of interrupts at each interrupt level:  */
+#define XCHAL_INTLEVEL1_MASK		0x001F00BF
+#define XCHAL_INTLEVEL2_MASK		0x00000140
+#define XCHAL_INTLEVEL3_MASK		0x00200E00
+#define XCHAL_INTLEVEL4_MASK		0x00008000
+#define XCHAL_INTLEVEL5_MASK		0x00003000
+#define XCHAL_INTLEVEL6_MASK		0x00000000
+#define XCHAL_INTLEVEL7_MASK		0x00004000
+
+/*  Masks of interrupts at each range 1..n of interrupt levels:  */
+#define XCHAL_INTLEVEL1_ANDBELOW_MASK	0x001F00BF
+#define XCHAL_INTLEVEL2_ANDBELOW_MASK	0x001F01FF
+#define XCHAL_INTLEVEL3_ANDBELOW_MASK	0x003F0FFF
+#define XCHAL_INTLEVEL4_ANDBELOW_MASK	0x003F8FFF
+#define XCHAL_INTLEVEL5_ANDBELOW_MASK	0x003FBFFF
+#define XCHAL_INTLEVEL6_ANDBELOW_MASK	0x003FBFFF
+#define XCHAL_INTLEVEL7_ANDBELOW_MASK	0x003FFFFF
+
+/*  Level of each interrupt:  */
+#define XCHAL_INT0_LEVEL		1
+#define XCHAL_INT1_LEVEL		1
+#define XCHAL_INT2_LEVEL		1
+#define XCHAL_INT3_LEVEL		1
+#define XCHAL_INT4_LEVEL		1
+#define XCHAL_INT5_LEVEL		1
+#define XCHAL_INT6_LEVEL		2
+#define XCHAL_INT7_LEVEL		1
+#define XCHAL_INT8_LEVEL		2
+#define XCHAL_INT9_LEVEL		3
+#define XCHAL_INT10_LEVEL		3
+#define XCHAL_INT11_LEVEL		3
+#define XCHAL_INT12_LEVEL		5
+#define XCHAL_INT13_LEVEL		5
+#define XCHAL_INT14_LEVEL		7
+#define XCHAL_INT15_LEVEL		4
+#define XCHAL_INT16_LEVEL		1
+#define XCHAL_INT17_LEVEL		1
+#define XCHAL_INT18_LEVEL		1
+#define XCHAL_INT19_LEVEL		1
+#define XCHAL_INT20_LEVEL		1
+#define XCHAL_INT21_LEVEL		3
+#define XCHAL_DEBUGLEVEL		6	/* debug interrupt level */
+#define XCHAL_HAVE_DEBUG_EXTERN_INT	1	/* OCD external db interrupt */
+#define XCHAL_NMILEVEL			7	/* NMI "level" (for use with
+						   EXCSAVE/EPS/EPC_n, RFI n) */
+
+/*  Type of each interrupt:  */
+#define XCHAL_INT0_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT1_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT2_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT3_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT4_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT5_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT6_TYPE 	XTHAL_INTTYPE_TIMER
+#define XCHAL_INT7_TYPE 	XTHAL_INTTYPE_SOFTWARE
+#define XCHAL_INT8_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT9_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT10_TYPE 	XTHAL_INTTYPE_TIMER
+#define XCHAL_INT11_TYPE 	XTHAL_INTTYPE_SOFTWARE
+#define XCHAL_INT12_TYPE 	XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT13_TYPE 	XTHAL_INTTYPE_TIMER
+#define XCHAL_INT14_TYPE 	XTHAL_INTTYPE_NMI
+#define XCHAL_INT15_TYPE 	XTHAL_INTTYPE_PROFILING
+#define XCHAL_INT16_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT17_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT18_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT19_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT20_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT21_TYPE 	XTHAL_INTTYPE_EXTERN_EDGE
+
+/*  Masks of interrupts for each type of interrupt:  */
+#define XCHAL_INTTYPE_MASK_UNCONFIGURED	0xFFC00000
+#define XCHAL_INTTYPE_MASK_SOFTWARE	0x00000880
+#define XCHAL_INTTYPE_MASK_EXTERN_EDGE	0x003F0000
+#define XCHAL_INTTYPE_MASK_EXTERN_LEVEL	0x0000133F
+#define XCHAL_INTTYPE_MASK_TIMER	0x00002440
+#define XCHAL_INTTYPE_MASK_NMI		0x00004000
+#define XCHAL_INTTYPE_MASK_WRITE_ERROR	0x00000000
+#define XCHAL_INTTYPE_MASK_PROFILING	0x00008000
+
+/*  Interrupt numbers assigned to specific interrupt sources:  */
+#define XCHAL_TIMER0_INTERRUPT		6	/* CCOMPARE0 */
+#define XCHAL_TIMER1_INTERRUPT		10	/* CCOMPARE1 */
+#define XCHAL_TIMER2_INTERRUPT		13	/* CCOMPARE2 */
+#define XCHAL_TIMER3_INTERRUPT		XTHAL_TIMER_UNCONFIGURED
+#define XCHAL_NMI_INTERRUPT		14	/* non-maskable interrupt */
+#define XCHAL_PROFILING_INTERRUPT	15	/* profiling interrupt */
+
+/*  Interrupt numbers for levels at which only one interrupt is configured:  */
+#define XCHAL_INTLEVEL4_NUM		15
+#define XCHAL_INTLEVEL7_NUM		14
+/*  (There are many interrupts each at level(s) 1, 2, 3, 5.)  */
+
+
+/*
+ *  External interrupt mapping.
+ *  These macros describe how Xtensa processor interrupt numbers
+ *  (as numbered internally, eg. in INTERRUPT and INTENABLE registers)
+ *  map to external BInterrupt<n> pins, for those interrupts
+ *  configured as external (level-triggered, edge-triggered, or NMI).
+ *  See the Xtensa processor databook for more details.
+ */
+
+/*  Core interrupt numbers mapped to each EXTERNAL BInterrupt pin number:  */
+#define XCHAL_EXTINT0_NUM		0	/* (intlevel 1) */
+#define XCHAL_EXTINT1_NUM		1	/* (intlevel 1) */
+#define XCHAL_EXTINT2_NUM		2	/* (intlevel 1) */
+#define XCHAL_EXTINT3_NUM		3	/* (intlevel 1) */
+#define XCHAL_EXTINT4_NUM		4	/* (intlevel 1) */
+#define XCHAL_EXTINT5_NUM		5	/* (intlevel 1) */
+#define XCHAL_EXTINT6_NUM		8	/* (intlevel 2) */
+#define XCHAL_EXTINT7_NUM		9	/* (intlevel 3) */
+#define XCHAL_EXTINT8_NUM		12	/* (intlevel 5) */
+#define XCHAL_EXTINT9_NUM		14	/* (intlevel 7) */
+#define XCHAL_EXTINT10_NUM		16	/* (intlevel 1) */
+#define XCHAL_EXTINT11_NUM		17	/* (intlevel 1) */
+#define XCHAL_EXTINT12_NUM		18	/* (intlevel 1) */
+#define XCHAL_EXTINT13_NUM		19	/* (intlevel 1) */
+#define XCHAL_EXTINT14_NUM		20	/* (intlevel 1) */
+#define XCHAL_EXTINT15_NUM		21	/* (intlevel 3) */
+/*  EXTERNAL BInterrupt pin numbers mapped to each core interrupt number:  */
+#define XCHAL_INT0_EXTNUM		0	/* (intlevel 1) */
+#define XCHAL_INT1_EXTNUM		1	/* (intlevel 1) */
+#define XCHAL_INT2_EXTNUM		2	/* (intlevel 1) */
+#define XCHAL_INT3_EXTNUM		3	/* (intlevel 1) */
+#define XCHAL_INT4_EXTNUM		4	/* (intlevel 1) */
+#define XCHAL_INT5_EXTNUM		5	/* (intlevel 1) */
+#define XCHAL_INT8_EXTNUM		6	/* (intlevel 2) */
+#define XCHAL_INT9_EXTNUM		7	/* (intlevel 3) */
+#define XCHAL_INT12_EXTNUM		8	/* (intlevel 5) */
+#define XCHAL_INT14_EXTNUM		9	/* (intlevel 7) */
+#define XCHAL_INT16_EXTNUM		10	/* (intlevel 1) */
+#define XCHAL_INT17_EXTNUM		11	/* (intlevel 1) */
+#define XCHAL_INT18_EXTNUM		12	/* (intlevel 1) */
+#define XCHAL_INT19_EXTNUM		13	/* (intlevel 1) */
+#define XCHAL_INT20_EXTNUM		14	/* (intlevel 1) */
+#define XCHAL_INT21_EXTNUM		15	/* (intlevel 3) */
+
+
+/*----------------------------------------------------------------------
+			EXCEPTIONS and VECTORS
+  ----------------------------------------------------------------------*/
+
+#define XCHAL_XEA_VERSION		2	/* Xtensa Exception Architecture
+						   number: 1 == XEA1 (old)
+							   2 == XEA2 (new)
+							   0 == XEAX (extern) or TX */
+#define XCHAL_HAVE_XEA1			0	/* Exception Architecture 1 */
+#define XCHAL_HAVE_XEA2			1	/* Exception Architecture 2 */
+#define XCHAL_HAVE_XEAX			0	/* External Exception Arch. */
+#define XCHAL_HAVE_EXCEPTIONS		1	/* exception option */
+#define XCHAL_HAVE_HALT			0	/* halt architecture option */
+#define XCHAL_HAVE_BOOTLOADER		0	/* boot loader (for TX) */
+#define XCHAL_HAVE_MEM_ECC_PARITY	0	/* local memory ECC/parity */
+#define XCHAL_HAVE_VECTOR_SELECT	1	/* relocatable vectors */
+#define XCHAL_HAVE_VECBASE		1	/* relocatable vectors */
+#define XCHAL_VECBASE_RESET_VADDR	0x00002000  /* VECBASE reset value */
+#define XCHAL_VECBASE_RESET_PADDR	0x00002000
+#define XCHAL_RESET_VECBASE_OVERLAP	0
+
+#define XCHAL_RESET_VECTOR0_VADDR	0xFE000000
+#define XCHAL_RESET_VECTOR0_PADDR	0xFE000000
+#define XCHAL_RESET_VECTOR1_VADDR	0x00001000
+#define XCHAL_RESET_VECTOR1_PADDR	0x00001000
+#define XCHAL_RESET_VECTOR_VADDR	0xFE000000
+#define XCHAL_RESET_VECTOR_PADDR	0xFE000000
+#define XCHAL_USER_VECOFS		0x00000340
+#define XCHAL_USER_VECTOR_VADDR		0x00002340
+#define XCHAL_USER_VECTOR_PADDR		0x00002340
+#define XCHAL_KERNEL_VECOFS		0x00000300
+#define XCHAL_KERNEL_VECTOR_VADDR	0x00002300
+#define XCHAL_KERNEL_VECTOR_PADDR	0x00002300
+#define XCHAL_DOUBLEEXC_VECOFS		0x000003C0
+#define XCHAL_DOUBLEEXC_VECTOR_VADDR	0x000023C0
+#define XCHAL_DOUBLEEXC_VECTOR_PADDR	0x000023C0
+#define XCHAL_WINDOW_OF4_VECOFS		0x00000000
+#define XCHAL_WINDOW_UF4_VECOFS		0x00000040
+#define XCHAL_WINDOW_OF8_VECOFS		0x00000080
+#define XCHAL_WINDOW_UF8_VECOFS		0x000000C0
+#define XCHAL_WINDOW_OF12_VECOFS	0x00000100
+#define XCHAL_WINDOW_UF12_VECOFS	0x00000140
+#define XCHAL_WINDOW_VECTORS_VADDR	0x00002000
+#define XCHAL_WINDOW_VECTORS_PADDR	0x00002000
+#define XCHAL_INTLEVEL2_VECOFS		0x00000180
+#define XCHAL_INTLEVEL2_VECTOR_VADDR	0x00002180
+#define XCHAL_INTLEVEL2_VECTOR_PADDR	0x00002180
+#define XCHAL_INTLEVEL3_VECOFS		0x000001C0
+#define XCHAL_INTLEVEL3_VECTOR_VADDR	0x000021C0
+#define XCHAL_INTLEVEL3_VECTOR_PADDR	0x000021C0
+#define XCHAL_INTLEVEL4_VECOFS		0x00000200
+#define XCHAL_INTLEVEL4_VECTOR_VADDR	0x00002200
+#define XCHAL_INTLEVEL4_VECTOR_PADDR	0x00002200
+#define XCHAL_INTLEVEL5_VECOFS		0x00000240
+#define XCHAL_INTLEVEL5_VECTOR_VADDR	0x00002240
+#define XCHAL_INTLEVEL5_VECTOR_PADDR	0x00002240
+#define XCHAL_INTLEVEL6_VECOFS		0x00000280
+#define XCHAL_INTLEVEL6_VECTOR_VADDR	0x00002280
+#define XCHAL_INTLEVEL6_VECTOR_PADDR	0x00002280
+#define XCHAL_DEBUG_VECOFS		XCHAL_INTLEVEL6_VECOFS
+#define XCHAL_DEBUG_VECTOR_VADDR	XCHAL_INTLEVEL6_VECTOR_VADDR
+#define XCHAL_DEBUG_VECTOR_PADDR	XCHAL_INTLEVEL6_VECTOR_PADDR
+#define XCHAL_NMI_VECOFS		0x000002C0
+#define XCHAL_NMI_VECTOR_VADDR		0x000022C0
+#define XCHAL_NMI_VECTOR_PADDR		0x000022C0
+#define XCHAL_INTLEVEL7_VECOFS		XCHAL_NMI_VECOFS
+#define XCHAL_INTLEVEL7_VECTOR_VADDR	XCHAL_NMI_VECTOR_VADDR
+#define XCHAL_INTLEVEL7_VECTOR_PADDR	XCHAL_NMI_VECTOR_PADDR
+
+
+/*----------------------------------------------------------------------
+				DEBUG MODULE
+  ----------------------------------------------------------------------*/
+
+/*  Misc  */
+#define XCHAL_HAVE_DEBUG_ERI		1	/* ERI to debug module */
+#define XCHAL_HAVE_DEBUG_APB		0	/* APB to debug module */
+#define XCHAL_HAVE_DEBUG_JTAG		1	/* JTAG to debug module */
+
+/*  On-Chip Debug (OCD)  */
+#define XCHAL_HAVE_OCD			1	/* OnChipDebug option */
+#define XCHAL_NUM_IBREAK		2	/* number of IBREAKn regs */
+#define XCHAL_NUM_DBREAK		2	/* number of DBREAKn regs */
+#define XCHAL_HAVE_OCD_DIR_ARRAY	0	/* faster OCD option (to LX4) */
+#define XCHAL_HAVE_OCD_LS32DDR		1	/* L32DDR/S32DDR (faster OCD) */
+
+/*  TRAX (in core)  */
+#define XCHAL_HAVE_TRAX			1	/* TRAX in debug module */
+#define XCHAL_TRAX_MEM_SIZE		262144	/* TRAX memory size in bytes */
+#define XCHAL_TRAX_MEM_SHAREABLE	1	/* start/end regs; ready sig. */
+#define XCHAL_TRAX_ATB_WIDTH		0	/* ATB width (bits), 0=no ATB */
+#define XCHAL_TRAX_TIME_WIDTH		0	/* timestamp bitwidth, 0=none */
+
+/*  Perf counters  */
+#define XCHAL_NUM_PERF_COUNTERS		8	/* performance counters */
+
+
+/*----------------------------------------------------------------------
+				MMU
+  ----------------------------------------------------------------------*/
+
+/*  See core-matmap.h header file for more details.  */
+
+#define XCHAL_HAVE_TLBS			1	/* inverse of HAVE_CACHEATTR */
+#define XCHAL_HAVE_SPANNING_WAY		1	/* one way maps I+D 4GB vaddr */
+#define XCHAL_SPANNING_WAY		6	/* TLB spanning way number */
+#define XCHAL_HAVE_IDENTITY_MAP		0	/* vaddr == paddr always */
+#define XCHAL_HAVE_CACHEATTR		0	/* CACHEATTR register present */
+#define XCHAL_HAVE_MIMIC_CACHEATTR	0	/* region protection */
+#define XCHAL_HAVE_XLT_CACHEATTR	0	/* region prot. w/translation */
+#define XCHAL_HAVE_PTP_MMU		1	/* full MMU (with page table
+						   [autorefill] and protection)
+						   usable for an MMU-based OS */
+/*  If none of the above last 4 are set, it's a custom TLB configuration.  */
+#define XCHAL_ITLB_ARF_ENTRIES_LOG2	2	/* log2(autorefill way size) */
+#define XCHAL_DTLB_ARF_ENTRIES_LOG2	2	/* log2(autorefill way size) */
+
+#define XCHAL_MMU_ASID_BITS		8	/* number of bits in ASIDs */
+#define XCHAL_MMU_RINGS			4	/* number of rings (1..4) */
+#define XCHAL_MMU_RING_BITS		2	/* num of bits in RING field */
+
+#endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */
+
+
+#endif /* _XTENSA_CORE_CONFIGURATION_H */
+
diff --git a/arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h b/arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h
new file mode 100644
index 000000000000..b87fe1a5177b
--- /dev/null
+++ b/arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h
@@ -0,0 +1,308 @@
+/* 
+ * tie-asm.h -- compile-time HAL assembler definitions dependent on CORE & TIE
+ *
+ *  NOTE:  This header file is not meant to be included directly.
+ */
+
+/* This header file contains assembly-language definitions (assembly
+   macros, etc.) for this specific Xtensa processor's TIE extensions
+   and options.  It is customized to this Xtensa processor configuration.
+
+   Copyright (c) 1999-2015 Cadence Design Systems Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef _XTENSA_CORE_TIE_ASM_H
+#define _XTENSA_CORE_TIE_ASM_H
+
+/*  Selection parameter values for save-area save/restore macros:  */
+/*  Option vs. TIE:  */
+#define XTHAL_SAS_TIE	0x0001	/* custom extension or coprocessor */
+#define XTHAL_SAS_OPT	0x0002	/* optional (and not a coprocessor) */
+#define XTHAL_SAS_ANYOT	0x0003	/* both of the above */
+/*  Whether used automatically by compiler:  */
+#define XTHAL_SAS_NOCC	0x0004	/* not used by compiler w/o special opts/code */
+#define XTHAL_SAS_CC	0x0008	/* used by compiler without special opts/code */
+#define XTHAL_SAS_ANYCC	0x000C	/* both of the above */
+/*  ABI handling across function calls:  */
+#define XTHAL_SAS_CALR	0x0010	/* caller-saved */
+#define XTHAL_SAS_CALE	0x0020	/* callee-saved */
+#define XTHAL_SAS_GLOB	0x0040	/* global across function calls (in thread) */
+#define XTHAL_SAS_ANYABI	0x0070	/* all of the above three */
+/*  Misc  */
+#define XTHAL_SAS_ALL	0xFFFF	/* include all default NCP contents */
+#define XTHAL_SAS3(optie,ccuse,abi)	( ((optie) & XTHAL_SAS_ANYOT)  \
+					| ((ccuse) & XTHAL_SAS_ANYCC)  \
+					| ((abi)   & XTHAL_SAS_ANYABI) )
+
+
+    /*
+      *  Macro to store all non-coprocessor (extra) custom TIE and optional state
+      *  (not including zero-overhead loop registers).
+      *  Required parameters:
+      *      ptr         Save area pointer address register (clobbered)
+      *                  (register must contain a 4 byte aligned address).
+      *      at1..at4    Four temporary address registers (first XCHAL_NCP_NUM_ATMPS
+      *                  registers are clobbered, the remaining are unused).
+      *  Optional parameters:
+      *      continue    If macro invoked as part of a larger store sequence, set to 1
+      *                  if this is not the first in the sequence.  Defaults to 0.
+      *      ofs         Offset from start of larger sequence (from value of first ptr
+      *                  in sequence) at which to store.  Defaults to next available space
+      *                  (or 0 if <continue> is 0).
+      *      select      Select what category(ies) of registers to store, as a bitmask
+      *                  (see XTHAL_SAS_xxx constants).  Defaults to all registers.
+      *      alloc       Select what category(ies) of registers to allocate; if any
+      *                  category is selected here that is not in <select>, space for
+      *                  the corresponding registers is skipped without doing any store.
+      */
+    .macro xchal_ncp_store  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0
+	xchal_sa_start	\continue, \ofs
+	// Optional global registers used by default by the compiler:
+	.ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\select)
+	xchal_sa_align	\ptr, 0, 1020, 4, 4
+	rur.THREADPTR	\at1		// threadptr option
+	s32i	\at1, \ptr, .Lxchal_ofs_+0
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 4
+	.elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\alloc)) == 0
+	xchal_sa_align	\ptr, 0, 1020, 4, 4
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 4
+	.endif
+	// Optional caller-saved registers used by default by the compiler:
+	.ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\select)
+	xchal_sa_align	\ptr, 0, 1016, 4, 4
+	rsr.ACCLO	\at1		// MAC16 option
+	s32i	\at1, \ptr, .Lxchal_ofs_+0
+	rsr.ACCHI	\at1		// MAC16 option
+	s32i	\at1, \ptr, .Lxchal_ofs_+4
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 8
+	.elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+	xchal_sa_align	\ptr, 0, 1016, 4, 4
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 8
+	.endif
+	// Optional caller-saved registers not used by default by the compiler:
+	.ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select)
+	xchal_sa_align	\ptr, 0, 1000, 4, 4
+	rsr.BR	\at1		// boolean option
+	s32i	\at1, \ptr, .Lxchal_ofs_+0
+	rsr.SCOMPARE1	\at1		// conditional store option
+	s32i	\at1, \ptr, .Lxchal_ofs_+4
+	rsr.M0	\at1		// MAC16 option
+	s32i	\at1, \ptr, .Lxchal_ofs_+8
+	rsr.M1	\at1		// MAC16 option
+	s32i	\at1, \ptr, .Lxchal_ofs_+12
+	rsr.M2	\at1		// MAC16 option
+	s32i	\at1, \ptr, .Lxchal_ofs_+16
+	rsr.M3	\at1		// MAC16 option
+	s32i	\at1, \ptr, .Lxchal_ofs_+20
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 24
+	.elseif ((XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+	xchal_sa_align	\ptr, 0, 1000, 4, 4
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 24
+	.endif
+    .endm	// xchal_ncp_store
+
+    /*
+      *  Macro to load all non-coprocessor (extra) custom TIE and optional state
+      *  (not including zero-overhead loop registers).
+      *  Required parameters:
+      *      ptr         Save area pointer address register (clobbered)
+      *                  (register must contain a 4 byte aligned address).
+      *      at1..at4    Four temporary address registers (first XCHAL_NCP_NUM_ATMPS
+      *                  registers are clobbered, the remaining are unused).
+      *  Optional parameters:
+      *      continue    If macro invoked as part of a larger load sequence, set to 1
+      *                  if this is not the first in the sequence.  Defaults to 0.
+      *      ofs         Offset from start of larger sequence (from value of first ptr
+      *                  in sequence) at which to load.  Defaults to next available space
+      *                  (or 0 if <continue> is 0).
+      *      select      Select what category(ies) of registers to load, as a bitmask
+      *                  (see XTHAL_SAS_xxx constants).  Defaults to all registers.
+      *      alloc       Select what category(ies) of registers to allocate; if any
+      *                  category is selected here that is not in <select>, space for
+      *                  the corresponding registers is skipped without doing any load.
+      */
+    .macro xchal_ncp_load  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0
+	xchal_sa_start	\continue, \ofs
+	// Optional global registers used by default by the compiler:
+	.ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\select)
+	xchal_sa_align	\ptr, 0, 1020, 4, 4
+	l32i	\at1, \ptr, .Lxchal_ofs_+0
+	wur.THREADPTR	\at1		// threadptr option
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 4
+	.elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\alloc)) == 0
+	xchal_sa_align	\ptr, 0, 1020, 4, 4
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 4
+	.endif
+	// Optional caller-saved registers used by default by the compiler:
+	.ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\select)
+	xchal_sa_align	\ptr, 0, 1016, 4, 4
+	l32i	\at1, \ptr, .Lxchal_ofs_+0
+	wsr.ACCLO	\at1		// MAC16 option
+	l32i	\at1, \ptr, .Lxchal_ofs_+4
+	wsr.ACCHI	\at1		// MAC16 option
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 8
+	.elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+	xchal_sa_align	\ptr, 0, 1016, 4, 4
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 8
+	.endif
+	// Optional caller-saved registers not used by default by the compiler:
+	.ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select)
+	xchal_sa_align	\ptr, 0, 1000, 4, 4
+	l32i	\at1, \ptr, .Lxchal_ofs_+0
+	wsr.BR	\at1		// boolean option
+	l32i	\at1, \ptr, .Lxchal_ofs_+4
+	wsr.SCOMPARE1	\at1		// conditional store option
+	l32i	\at1, \ptr, .Lxchal_ofs_+8
+	wsr.M0	\at1		// MAC16 option
+	l32i	\at1, \ptr, .Lxchal_ofs_+12
+	wsr.M1	\at1		// MAC16 option
+	l32i	\at1, \ptr, .Lxchal_ofs_+16
+	wsr.M2	\at1		// MAC16 option
+	l32i	\at1, \ptr, .Lxchal_ofs_+20
+	wsr.M3	\at1		// MAC16 option
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 24
+	.elseif ((XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+	xchal_sa_align	\ptr, 0, 1000, 4, 4
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 24
+	.endif
+    .endm	// xchal_ncp_load
+
+
+#define XCHAL_NCP_NUM_ATMPS	1
+
+    /* 
+     *  Macro to store the state of TIE coprocessor AudioEngineLX.
+     *  Required parameters:
+     *      ptr         Save area pointer address register (clobbered)
+     *                  (register must contain a 8 byte aligned address).
+     *      at1..at4    Four temporary address registers (first XCHAL_CP1_NUM_ATMPS
+     *                  registers are clobbered, the remaining are unused).
+     *  Optional parameters are the same as for xchal_ncp_store.
+     */
+#define xchal_cp_AudioEngineLX_store	xchal_cp1_store
+    .macro	xchal_cp1_store  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0
+	xchal_sa_start \continue, \ofs
+	// Custom caller-saved registers not used by default by the compiler:
+	.ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select)
+	xchal_sa_align	\ptr, 0, 0, 8, 8
+	rur.AE_OVF_SAR	\at1		// ureg 240
+	s32i	\at1, \ptr, .Lxchal_ofs_+0
+	rur.AE_BITHEAD	\at1		// ureg 241
+	s32i	\at1, \ptr, .Lxchal_ofs_+4
+	rur.AE_TS_FTS_BU_BP	\at1		// ureg 242
+	s32i	\at1, \ptr, .Lxchal_ofs_+8
+	rur.AE_SD_NO	\at1		// ureg 243
+	s32i	\at1, \ptr, .Lxchal_ofs_+12
+	rur.AE_CBEGIN0	\at1		// ureg 246
+	s32i	\at1, \ptr, .Lxchal_ofs_+16
+	rur.AE_CEND0	\at1		// ureg 247
+	s32i	\at1, \ptr, .Lxchal_ofs_+20
+	ae_sp24x2s.i	aep0, \ptr, .Lxchal_ofs_+24
+	ae_sp24x2s.i	aep1, \ptr, .Lxchal_ofs_+32
+	ae_sp24x2s.i	aep2, \ptr, .Lxchal_ofs_+40
+	ae_sp24x2s.i	aep3, \ptr, .Lxchal_ofs_+48
+	ae_sp24x2s.i	aep4, \ptr, .Lxchal_ofs_+56
+	addi	\ptr, \ptr, 64
+	ae_sp24x2s.i	aep5, \ptr, .Lxchal_ofs_+0
+	ae_sp24x2s.i	aep6, \ptr, .Lxchal_ofs_+8
+	ae_sp24x2s.i	aep7, \ptr, .Lxchal_ofs_+16
+	ae_sq56s.i	aeq0, \ptr, .Lxchal_ofs_+24
+	ae_sq56s.i	aeq1, \ptr, .Lxchal_ofs_+32
+	ae_sq56s.i	aeq2, \ptr, .Lxchal_ofs_+40
+	ae_sq56s.i	aeq3, \ptr, .Lxchal_ofs_+48
+	.set	.Lxchal_pofs_, .Lxchal_pofs_ + 64
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 56
+	.elseif ((XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+	xchal_sa_align	\ptr, 0, 0, 8, 8
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 120
+	.endif
+    .endm	// xchal_cp1_store
+
+    /* 
+     *  Macro to load the state of TIE coprocessor AudioEngineLX.
+     *  Required parameters:
+     *      ptr         Save area pointer address register (clobbered)
+     *                  (register must contain a 8 byte aligned address).
+     *      at1..at4    Four temporary address registers (first XCHAL_CP1_NUM_ATMPS
+     *                  registers are clobbered, the remaining are unused).
+     *  Optional parameters are the same as for xchal_ncp_load.
+     */
+#define xchal_cp_AudioEngineLX_load	xchal_cp1_load
+    .macro	xchal_cp1_load  ptr at1 at2 at3 at4  continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0
+	xchal_sa_start \continue, \ofs
+	// Custom caller-saved registers not used by default by the compiler:
+	.ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select)
+	xchal_sa_align	\ptr, 0, 0, 8, 8
+	l32i	\at1, \ptr, .Lxchal_ofs_+0
+	wur.AE_OVF_SAR	\at1		// ureg 240
+	l32i	\at1, \ptr, .Lxchal_ofs_+4
+	wur.AE_BITHEAD	\at1		// ureg 241
+	l32i	\at1, \ptr, .Lxchal_ofs_+8
+	wur.AE_TS_FTS_BU_BP	\at1		// ureg 242
+	l32i	\at1, \ptr, .Lxchal_ofs_+12
+	wur.AE_SD_NO	\at1		// ureg 243
+	l32i	\at1, \ptr, .Lxchal_ofs_+16
+	wur.AE_CBEGIN0	\at1		// ureg 246
+	l32i	\at1, \ptr, .Lxchal_ofs_+20
+	wur.AE_CEND0	\at1		// ureg 247
+	ae_lp24x2.i	aep0, \ptr, .Lxchal_ofs_+24
+	ae_lp24x2.i	aep1, \ptr, .Lxchal_ofs_+32
+	ae_lp24x2.i	aep2, \ptr, .Lxchal_ofs_+40
+	ae_lp24x2.i	aep3, \ptr, .Lxchal_ofs_+48
+	ae_lp24x2.i	aep4, \ptr, .Lxchal_ofs_+56
+	addi	\ptr, \ptr, 64
+	ae_lp24x2.i	aep5, \ptr, .Lxchal_ofs_+0
+	ae_lp24x2.i	aep6, \ptr, .Lxchal_ofs_+8
+	ae_lp24x2.i	aep7, \ptr, .Lxchal_ofs_+16
+	addi	\ptr, \ptr, 24
+	ae_lq56.i	aeq0, \ptr, .Lxchal_ofs_+0
+	ae_lq56.i	aeq1, \ptr, .Lxchal_ofs_+8
+	ae_lq56.i	aeq2, \ptr, .Lxchal_ofs_+16
+	ae_lq56.i	aeq3, \ptr, .Lxchal_ofs_+24
+	.set	.Lxchal_pofs_, .Lxchal_pofs_ + 88
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 32
+	.elseif ((XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+	xchal_sa_align	\ptr, 0, 0, 8, 8
+	.set	.Lxchal_ofs_, .Lxchal_ofs_ + 120
+	.endif
+    .endm	// xchal_cp1_load
+
+#define XCHAL_CP1_NUM_ATMPS	1
+#define XCHAL_SA_NUM_ATMPS	1
+
+	/*  Empty macros for unconfigured coprocessors:  */
+	.macro xchal_cp0_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp0_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp2_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp2_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp3_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp3_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp4_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp4_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp5_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp5_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp6_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp6_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp7_store	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+	.macro xchal_cp7_load	p a b c d continue=0 ofs=-1 select=-1 ; .endm
+
+#endif /*_XTENSA_CORE_TIE_ASM_H*/
+
diff --git a/arch/xtensa/variants/test_kc705_be/include/variant/tie.h b/arch/xtensa/variants/test_kc705_be/include/variant/tie.h
new file mode 100644
index 000000000000..4e1b4baac8e2
--- /dev/null
+++ b/arch/xtensa/variants/test_kc705_be/include/variant/tie.h
@@ -0,0 +1,182 @@
+/* 
+ * tie.h -- compile-time HAL definitions dependent on CORE & TIE configuration
+ *
+ *  NOTE:  This header file is not meant to be included directly.
+ */
+
+/* This header file describes this specific Xtensa processor's TIE extensions
+   that extend basic Xtensa core functionality.  It is customized to this
+   Xtensa processor configuration.
+
+   Copyright (c) 1999-2015 Cadence Design Systems Inc.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef _XTENSA_CORE_TIE_H
+#define _XTENSA_CORE_TIE_H
+
+#define XCHAL_CP_NUM			2	/* number of coprocessors */
+#define XCHAL_CP_MAX			8	/* max CP ID + 1 (0 if none) */
+#define XCHAL_CP_MASK			0x82	/* bitmask of all CPs by ID */
+#define XCHAL_CP_PORT_MASK		0x80	/* bitmask of only port CPs */
+
+/*  Basic parameters of each coprocessor:  */
+#define XCHAL_CP1_NAME			"AudioEngineLX"
+#define XCHAL_CP1_IDENT			AudioEngineLX
+#define XCHAL_CP1_SA_SIZE		120	/* size of state save area */
+#define XCHAL_CP1_SA_ALIGN		8	/* min alignment of save area */
+#define XCHAL_CP_ID_AUDIOENGINELX   	1	/* coprocessor ID (0..7) */
+#define XCHAL_CP7_NAME			"XTIOP"
+#define XCHAL_CP7_IDENT			XTIOP
+#define XCHAL_CP7_SA_SIZE		0	/* size of state save area */
+#define XCHAL_CP7_SA_ALIGN		1	/* min alignment of save area */
+#define XCHAL_CP_ID_XTIOP           	7	/* coprocessor ID (0..7) */
+
+/*  Filler info for unassigned coprocessors, to simplify arrays etc:  */
+#define XCHAL_CP0_SA_SIZE		0
+#define XCHAL_CP0_SA_ALIGN		1
+#define XCHAL_CP2_SA_SIZE		0
+#define XCHAL_CP2_SA_ALIGN		1
+#define XCHAL_CP3_SA_SIZE		0
+#define XCHAL_CP3_SA_ALIGN		1
+#define XCHAL_CP4_SA_SIZE		0
+#define XCHAL_CP4_SA_ALIGN		1
+#define XCHAL_CP5_SA_SIZE		0
+#define XCHAL_CP5_SA_ALIGN		1
+#define XCHAL_CP6_SA_SIZE		0
+#define XCHAL_CP6_SA_ALIGN		1
+
+/*  Save area for non-coprocessor optional and custom (TIE) state:  */
+#define XCHAL_NCP_SA_SIZE		36
+#define XCHAL_NCP_SA_ALIGN		4
+
+/*  Total save area for optional and custom state (NCP + CPn):  */
+#define XCHAL_TOTAL_SA_SIZE		160	/* with 16-byte align padding */
+#define XCHAL_TOTAL_SA_ALIGN		8	/* actual minimum alignment */
+
+/*
+ * Detailed contents of save areas.
+ * NOTE:  caller must define the XCHAL_SA_REG macro (not defined here)
+ * before expanding the XCHAL_xxx_SA_LIST() macros.
+ *
+ * XCHAL_SA_REG(s,ccused,abikind,kind,opt,name,galign,align,asize,
+ *		dbnum,base,regnum,bitsz,gapsz,reset,x...)
+ *
+ *	s = passed from XCHAL_*_LIST(s), eg. to select how to expand
+ *	ccused = set if used by compiler without special options or code
+ *	abikind = 0 (caller-saved), 1 (callee-saved), or 2 (thread-global)
+ *	kind = 0 (special reg), 1 (TIE user reg), or 2 (TIE regfile reg)
+ *	opt = 0 (custom TIE extension or coprocessor), or 1 (optional reg)
+ *	name = lowercase reg name (no quotes)
+ *	galign = group byte alignment (power of 2) (galign >= align)
+ *	align = register byte alignment (power of 2)
+ *	asize = allocated size in bytes (asize*8 == bitsz + gapsz + padsz)
+ *	  (not including any pad bytes required to galign this or next reg)
+ *	dbnum = unique target number f/debug (see <xtensa-libdb-macros.h>)
+ *	base = reg shortname w/o index (or sr=special, ur=TIE user reg)
+ *	regnum = reg index in regfile, or special/TIE-user reg number
+ *	bitsz = number of significant bits (regfile width, or ur/sr mask bits)
+ *	gapsz = intervening bits, if bitsz bits not stored contiguously
+ *	(padsz = pad bits at end [TIE regfile] or at msbits [ur,sr] of asize)
+ *	reset = register reset value (or 0 if undefined at reset)
+ *	x = reserved for future use (0 until then)
+ *
+ *  To filter out certain registers, e.g. to expand only the non-global
+ *  registers used by the compiler, you can do something like this:
+ *
+ *  #define XCHAL_SA_REG(s,ccused,p...)	SELCC##ccused(p)
+ *  #define SELCC0(p...)
+ *  #define SELCC1(abikind,p...)	SELAK##abikind(p)
+ *  #define SELAK0(p...)		REG(p)
+ *  #define SELAK1(p...)		REG(p)
+ *  #define SELAK2(p...)
+ *  #define REG(kind,tie,name,galn,aln,asz,csz,dbnum,base,rnum,bsz,rst,x...) \
+ *		...what you want to expand...
+ */
+
+#define XCHAL_NCP_SA_NUM	9
+#define XCHAL_NCP_SA_LIST(s)	\
+ XCHAL_SA_REG(s,1,2,1,1,      threadptr, 4, 4, 4,0x03E7,  ur,231, 32,0,0,0) \
+ XCHAL_SA_REG(s,1,0,0,1,          acclo, 4, 4, 4,0x0210,  sr,16 , 32,0,0,0) \
+ XCHAL_SA_REG(s,1,0,0,1,          acchi, 4, 4, 4,0x0211,  sr,17 ,  8,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1,             br, 4, 4, 4,0x0204,  sr,4  , 16,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1,      scompare1, 4, 4, 4,0x020C,  sr,12 , 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1,             m0, 4, 4, 4,0x0220,  sr,32 , 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1,             m1, 4, 4, 4,0x0221,  sr,33 , 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1,             m2, 4, 4, 4,0x0222,  sr,34 , 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1,             m3, 4, 4, 4,0x0223,  sr,35 , 32,0,0,0)
+
+#define XCHAL_CP0_SA_NUM	0
+#define XCHAL_CP0_SA_LIST(s)	/* empty */
+
+#define XCHAL_CP1_SA_NUM	18
+#define XCHAL_CP1_SA_LIST(s)	\
+ XCHAL_SA_REG(s,0,0,1,0,     ae_ovf_sar, 8, 4, 4,0x03F0,  ur,240,  7,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0,     ae_bithead, 4, 4, 4,0x03F1,  ur,241, 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0,ae_ts_fts_bu_bp, 4, 4, 4,0x03F2,  ur,242, 16,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0,       ae_sd_no, 4, 4, 4,0x03F3,  ur,243, 28,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0,     ae_cbegin0, 4, 4, 4,0x03F6,  ur,246, 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0,       ae_cend0, 4, 4, 4,0x03F7,  ur,247, 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aep0, 8, 8, 8,0x0060, aep,0  , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aep1, 8, 8, 8,0x0061, aep,1  , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aep2, 8, 8, 8,0x0062, aep,2  , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aep3, 8, 8, 8,0x0063, aep,3  , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aep4, 8, 8, 8,0x0064, aep,4  , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aep5, 8, 8, 8,0x0065, aep,5  , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aep6, 8, 8, 8,0x0066, aep,6  , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aep7, 8, 8, 8,0x0067, aep,7  , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aeq0, 8, 8, 8,0x0068, aeq,0  , 56,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aeq1, 8, 8, 8,0x0069, aeq,1  , 56,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aeq2, 8, 8, 8,0x006A, aeq,2  , 56,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0,           aeq3, 8, 8, 8,0x006B, aeq,3  , 56,0,0,0)
+
+#define XCHAL_CP2_SA_NUM	0
+#define XCHAL_CP2_SA_LIST(s)	/* empty */
+
+#define XCHAL_CP3_SA_NUM	0
+#define XCHAL_CP3_SA_LIST(s)	/* empty */
+
+#define XCHAL_CP4_SA_NUM	0
+#define XCHAL_CP4_SA_LIST(s)	/* empty */
+
+#define XCHAL_CP5_SA_NUM	0
+#define XCHAL_CP5_SA_LIST(s)	/* empty */
+
+#define XCHAL_CP6_SA_NUM	0
+#define XCHAL_CP6_SA_LIST(s)	/* empty */
+
+#define XCHAL_CP7_SA_NUM	0
+#define XCHAL_CP7_SA_LIST(s)	/* empty */
+
+/* Byte length of instruction from its first nibble (op0 field), per FLIX.  */
+#define XCHAL_OP0_FORMAT_LENGTHS	3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,8
+/* Byte length of instruction from its first byte, per FLIX.  */
+#define XCHAL_BYTE0_FORMAT_LENGTHS	\
+	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\
+	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\
+	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\
+	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\
+	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,\
+	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,\
+	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,\
+	3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+
+#endif /*_XTENSA_CORE_TIE_H*/
+