summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig10
-rw-r--r--arch/arm/configs/exynos_defconfig4
-rw-r--r--arch/arm/configs/multi_v7_defconfig4
-rw-r--r--arch/arm/configs/pxa_defconfig4
-rw-r--r--arch/arm/include/asm/kvm_emulate.h12
-rw-r--r--arch/arm/include/asm/kvm_host.h5
-rw-r--r--arch/arm/include/asm/kvm_mmu.h14
-rw-r--r--arch/arm/include/uapi/asm/kvm.h13
-rw-r--r--arch/arm/kernel/asm-offsets.c13
-rw-r--r--arch/arm/kernel/process.c9
-rw-r--r--arch/arm/kvm/coproc.c25
-rw-r--r--arch/arm/kvm/guest.c23
-rw-r--r--arch/arm/probes/uprobes/core.c2
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/configs/defconfig4
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/kvm_arm.h1
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h17
-rw-r--r--arch/arm64/include/asm/kvm_host.h28
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h35
-rw-r--r--arch/arm64/include/asm/memory.h7
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h24
-rw-r--r--arch/arm64/include/asm/sysreg.h3
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h13
-rw-r--r--arch/arm64/kernel/cpufeature.c20
-rw-r--r--arch/arm64/kvm/guest.c33
-rw-r--r--arch/arm64/kvm/hyp-init.S6
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c5
-rw-r--r--arch/arm64/kvm/inject_fault.c6
-rw-r--r--arch/arm64/kvm/reset.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c54
-rw-r--r--arch/h8300/Kconfig1
-rw-r--r--arch/h8300/Makefile4
-rw-r--r--arch/h8300/boot/dts/h8300h_sim.dts2
-rw-r--r--arch/h8300/include/asm/bitops.h14
-rw-r--r--arch/h8300/include/asm/ptrace.h2
-rw-r--r--arch/h8300/kernel/kgdb.c2
-rw-r--r--arch/h8300/kernel/setup.c46
-rw-r--r--arch/h8300/kernel/sim-console.c7
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/hp/common/sba_iommu.c4
-rw-r--r--arch/ia64/include/asm/io.h1
-rw-r--r--arch/ia64/kernel/asm-offsets.c4
-rw-r--r--arch/ia64/kernel/fsys.S12
-rw-r--r--arch/ia64/kernel/setup.c11
-rw-r--r--arch/ia64/mm/contig.c75
-rw-r--r--arch/ia64/mm/discontig.c134
-rw-r--r--arch/m68k/coldfire/clk.c29
-rw-r--r--arch/m68k/include/asm/dma.h4
-rw-r--r--arch/microblaze/Kconfig4
-rw-r--r--arch/microblaze/Makefile4
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/dma-mapping.h28
-rw-r--r--arch/microblaze/include/asm/pgtable.h5
-rw-r--r--arch/microblaze/kernel/dma.c144
-rw-r--r--arch/microblaze/kernel/head.S5
-rw-r--r--arch/microblaze/mm/consistent.c54
-rw-r--r--arch/microblaze/pci/pci-common.c13
-rw-r--r--arch/microblaze/pci/xilinx_pci.c1
-rw-r--r--arch/mips/kernel/uprobes.c2
-rw-r--r--arch/parisc/include/asm/elf.h9
-rw-r--r--arch/parisc/include/asm/linkage.h9
-rw-r--r--arch/parisc/include/asm/processor.h6
-rw-r--r--arch/parisc/include/asm/traps.h4
-rw-r--r--arch/parisc/include/asm/unwind.h6
-rw-r--r--arch/parisc/kernel/entry.S79
-rw-r--r--arch/parisc/kernel/processor.c2
-rw-r--r--arch/parisc/kernel/stacktrace.c15
-rw-r--r--arch/parisc/kernel/sys_parisc.c5
-rw-r--r--arch/parisc/kernel/syscall.S35
-rw-r--r--arch/parisc/kernel/traps.c37
-rw-r--r--arch/parisc/kernel/unwind.c43
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h47
-rw-r--r--arch/powerpc/include/asm/kvm_host.h26
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv.c42
-rw-r--r--arch/powerpc/kvm/book3s_xive.c19
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c7
-rw-r--r--arch/powerpc/kvm/powerpc.c30
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/Makefile16
-rw-r--r--arch/s390/include/asm/ftrace.h6
-rw-r--r--arch/s390/include/asm/kvm_host.h11
-rw-r--r--arch/s390/include/uapi/asm/kvm.h5
-rw-r--r--arch/s390/kernel/ftrace.c2
-rw-r--r--arch/s390/kernel/mcount.S2
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c301
-rw-r--r--arch/s390/kvm/priv.c40
-rw-r--r--arch/s390/kvm/vsie.c11
-rw-r--r--arch/s390/tools/gen_facilities.c3
-rw-r--r--arch/sparc/Kconfig6
-rw-r--r--arch/sparc/Makefile6
-rw-r--r--arch/sparc/include/asm/dma-mapping.h5
-rw-r--r--arch/sparc/kernel/ioport.c193
-rw-r--r--arch/sparc/mm/init_32.c127
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/boot/compressed/kaslr.c5
-rw-r--r--arch/x86/hyperv/Makefile2
-rw-r--r--arch/x86/hyperv/nested.c56
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/export.h5
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h8
-rw-r--r--arch/x86/include/asm/kvm_host.h56
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/trace/hyperv.h14
-rw-r--r--arch/x86/include/asm/vmx.h3
-rw-r--r--arch/x86/include/uapi/asm/kvm.h37
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/kvm.c112
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/hyperv.c27
-rw-r--r--arch/x86/kvm/hyperv.h2
-rw-r--r--arch/x86/kvm/lapic.c40
-rw-r--r--arch/x86/kvm/mmu.c531
-rw-r--r--arch/x86/kvm/mmu.h24
-rw-r--r--arch/x86/kvm/paging_tmpl.h28
-rw-r--r--arch/x86/kvm/svm.c20
-rw-r--r--arch/x86/kvm/vmx.c1192
-rw-r--r--arch/x86/kvm/x86.c119
-rw-r--r--arch/xtensa/Kconfig60
-rw-r--r--arch/xtensa/boot/boot-elf/bootstrap.S19
-rw-r--r--arch/xtensa/configs/nommu_kc705_defconfig2
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--arch/xtensa/include/asm/cacheasm.h69
-rw-r--r--arch/xtensa/include/asm/dma-mapping.h26
-rw-r--r--arch/xtensa/include/asm/initialize_mmu.h42
-rw-r--r--arch/xtensa/include/asm/irq.h21
-rw-r--r--arch/xtensa/include/asm/kmem_layout.h6
-rw-r--r--arch/xtensa/include/asm/page.h5
-rw-r--r--arch/xtensa/include/asm/pgtable.h8
-rw-r--r--arch/xtensa/include/asm/platform.h27
-rw-r--r--arch/xtensa/include/asm/processor.h1
-rw-r--r--arch/xtensa/include/asm/vectors.h1
-rw-r--r--arch/xtensa/kernel/head.S2
-rw-r--r--arch/xtensa/kernel/irq.c1
-rw-r--r--arch/xtensa/kernel/pci-dma.c193
-rw-r--r--arch/xtensa/kernel/setup.c10
-rw-r--r--arch/xtensa/kernel/vmlinux.lds.S2
-rw-r--r--arch/xtensa/platforms/iss/include/platform/hardware.h29
-rw-r--r--arch/xtensa/platforms/xt2000/include/platform/hardware.h11
-rw-r--r--arch/xtensa/platforms/xtfpga/include/platform/hardware.h9
-rw-r--r--arch/xtensa/variants/test_kc705_be/include/variant/core.h575
-rw-r--r--arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h308
-rw-r--r--arch/xtensa/variants/test_kc705_be/include/variant/tie.h182
149 files changed, 4226 insertions, 1860 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index c6148166a7b4..4426e9687d89 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -841,6 +841,16 @@ config REFCOUNT_FULL
against various use-after-free conditions that can be used in
security flaw exploits.
+config HAVE_ARCH_PREL32_RELOCATIONS
+ bool
+ help
+ May be selected by an architecture if it supports place-relative
+ 32-bit relocations, both in the toolchain and in the module loader,
+ in which case relative references can be used in special sections
+ for PCI fixup, initcalls etc which are only half the size on 64 bit
+ architectures, and don't require runtime relocation on relocatable
+ kernels.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 85b2369d6b20..27ea6dfcf2f2 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -155,8 +155,8 @@ CONFIG_THERMAL_EMULATION=y
CONFIG_WATCHDOG=y
CONFIG_S3C2410_WATCHDOG=y
CONFIG_MFD_CROS_EC=y
-CONFIG_MFD_CROS_EC_I2C=y
-CONFIG_MFD_CROS_EC_SPI=y
+CONFIG_CROS_EC_I2C=y
+CONFIG_CROS_EC_SPI=y
CONFIG_MFD_MAX14577=y
CONFIG_MFD_MAX77686=y
CONFIG_MFD_MAX77693=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 8f6be1982545..be732f382418 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -490,8 +490,8 @@ CONFIG_MFD_AC100=y
CONFIG_MFD_AXP20X_I2C=y
CONFIG_MFD_AXP20X_RSB=y
CONFIG_MFD_CROS_EC=m
-CONFIG_MFD_CROS_EC_I2C=m
-CONFIG_MFD_CROS_EC_SPI=m
+CONFIG_CROS_EC_I2C=m
+CONFIG_CROS_EC_SPI=m
CONFIG_MFD_DA9063=m
CONFIG_MFD_MAX14577=y
CONFIG_MFD_MAX77686=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index 5655a1cee87d..6bb506edb1f5 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -398,8 +398,8 @@ CONFIG_MFD_AS3711=y
CONFIG_MFD_BCM590XX=m
CONFIG_MFD_AXP20X=y
CONFIG_MFD_CROS_EC=m
-CONFIG_MFD_CROS_EC_I2C=m
-CONFIG_MFD_CROS_EC_SPI=m
+CONFIG_CROS_EC_I2C=m
+CONFIG_CROS_EC_SPI=m
CONFIG_MFD_ASIC3=y
CONFIG_PMIC_DA903X=y
CONFIG_HTC_EGPIO=y
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index fe2fb1ddd771..77121b713bef 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -107,9 +107,19 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
return (unsigned long *)&vcpu->arch.hcr;
}
+static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr &= ~HCR_TWE;
+}
+
+static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr |= HCR_TWE;
+}
+
static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
{
- return 1;
+ return true;
}
static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 1f1fe4109b02..79906cecb091 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -216,6 +216,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
unsigned long kvm_call_hyp(void *hypfn, ...);
void force_vm_exit(const cpumask_t *mask);
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events);
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 8553d68b7c8a..265ea9cf7df7 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -75,17 +75,9 @@ phys_addr_t kvm_get_idmap_vector(void);
int kvm_mmu_init(void);
void kvm_clear_hyp_idmap(void);
-static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
-{
- *pmd = new_pmd;
- dsb(ishst);
-}
-
-static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
-{
- *pte = new_pte;
- dsb(ishst);
-}
+#define kvm_mk_pmd(ptep) __pmd(__pa(ptep) | PMD_TYPE_TABLE)
+#define kvm_mk_pud(pmdp) __pud(__pa(pmdp) | PMD_TYPE_TABLE)
+#define kvm_mk_pgd(pudp) ({ BUILD_BUG(); 0; })
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
{
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 16e006f708ca..4602464ebdfb 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,7 @@
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -125,6 +126,18 @@ struct kvm_sync_regs {
struct kvm_arch_memory_slot {
};
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+ struct {
+ __u8 serror_pending;
+ __u8 serror_has_esr;
+ /* Align it to 8 bytes */
+ __u8 pad[6];
+ __u64 serror_esr;
+ } exception;
+ __u32 reserved[12];
+};
+
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 974d8d7d1bcd..3968d6c22455 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -38,25 +38,14 @@
#error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32
#endif
/*
- * GCC 3.0, 3.1: general bad code generation.
- * GCC 3.2.0: incorrect function argument offset calculation.
- * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c
- * (http://gcc.gnu.org/PR8896) and incorrect structure
- * initialisation in fs/jffs2/erase.c
* GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854
* miscompiles find_get_entry(), and can result in EXT3 and EXT4
* filesystem corruption (possibly other FS too).
*/
-#ifdef __GNUC__
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-#error Your compiler is too buggy; it is known to miscompile kernels.
-#error Known good compilers: 3.3, 4.x
-#endif
-#if GCC_VERSION >= 40800 && GCC_VERSION < 40803
+#if defined(GCC_VERSION) && GCC_VERSION >= 40800 && GCC_VERSION < 40803
#error Your compiler is too buggy; it is known to miscompile kernels
#error and result in filesystem corruption and oopses.
#endif
-#endif
int main(void)
{
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d9c299133111..82ab015bf42b 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -330,16 +330,15 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
* atomic helpers. Insert it into the gate_vma so that it is visible
* through ptrace and /proc/<pid>/mem.
*/
-static struct vm_area_struct gate_vma = {
- .vm_start = 0xffff0000,
- .vm_end = 0xffff0000 + PAGE_SIZE,
- .vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC,
-};
+static struct vm_area_struct gate_vma;
static int __init gate_vma_init(void)
{
vma_init(&gate_vma, NULL);
gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
+ gate_vma.vm_start = 0xffff0000;
+ gate_vma.vm_end = 0xffff0000 + PAGE_SIZE;
+ gate_vma.vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
return 0;
}
arch_initcall(gate_vma_init);
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 3a02e76699a6..450c7a4fbc8a 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -246,6 +246,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
const struct coproc_reg *r)
{
u64 reg;
+ bool g1;
if (!p->is_write)
return read_from_write_only(vcpu, p);
@@ -253,7 +254,25 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
reg |= *vcpu_reg(vcpu, p->Rt1) ;
- vgic_v3_dispatch_sgi(vcpu, reg);
+ /*
+ * In a system where GICD_CTLR.DS=1, a ICC_SGI0R access generates
+ * Group0 SGIs only, while ICC_SGI1R can generate either group,
+ * depending on the SGI configuration. ICC_ASGI1R is effectively
+ * equivalent to ICC_SGI0R, as there is no "alternative" secure
+ * group.
+ */
+ switch (p->Op1) {
+ default: /* Keep GCC quiet */
+ case 0: /* ICC_SGI1R */
+ g1 = true;
+ break;
+ case 1: /* ICC_ASGI1R */
+ case 2: /* ICC_SGI0R */
+ g1 = false;
+ break;
+ }
+
+ vgic_v3_dispatch_sgi(vcpu, reg, g1);
return true;
}
@@ -459,6 +478,10 @@ static const struct coproc_reg cp15_regs[] = {
/* ICC_SGI1R */
{ CRm64(12), Op1( 0), is64, access_gic_sgi},
+ /* ICC_ASGI1R */
+ { CRm64(12), Op1( 1), is64, access_gic_sgi},
+ /* ICC_SGI0R */
+ { CRm64(12), Op1( 2), is64, access_gic_sgi},
/* VBAR: swapped by interrupt.S. */
{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index a18f33edc471..2b8de885b2bf 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -261,6 +261,29 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
+
+ return 0;
+}
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ bool serror_pending = events->exception.serror_pending;
+ bool has_esr = events->exception.serror_has_esr;
+
+ if (serror_pending && has_esr)
+ return -EINVAL;
+ else if (serror_pending)
+ kvm_inject_vabt(vcpu);
+
+ return 0;
+}
+
int __attribute_const__ kvm_target_cpu(void)
{
switch (read_cpuid_part()) {
diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c
index d1329f1ba4e4..bf992264060e 100644
--- a/arch/arm/probes/uprobes/core.c
+++ b/arch/arm/probes/uprobes/core.c
@@ -32,7 +32,7 @@ bool is_swbp_insn(uprobe_opcode_t *insn)
int set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long vaddr)
{
- return uprobe_write_opcode(mm, vaddr,
+ return uprobe_write_opcode(auprobe, mm, vaddr,
__opcode_to_mem_arm(auprobe->bpinsn));
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d0a53cc6293a..29e75b47becd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -108,6 +108,7 @@ config ARM64
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 2c07e233012b..514787d45dee 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -373,8 +373,8 @@ CONFIG_UNIPHIER_WATCHDOG=y
CONFIG_BCM2835_WDT=y
CONFIG_MFD_AXP20X_RSB=y
CONFIG_MFD_CROS_EC=y
-CONFIG_MFD_CROS_EC_I2C=y
-CONFIG_MFD_CROS_EC_SPI=y
+CONFIG_CROS_EC_I2C=y
+CONFIG_CROS_EC_SPI=y
CONFIG_MFD_CROS_EC_CHARDEV=m
CONFIG_MFD_EXYNOS_LPASS=m
CONFIG_MFD_HI6421_PMIC=y
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index be3bf3d08916..ae1f70450fb2 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -50,7 +50,8 @@
#define ARM64_HW_DBM 29
#define ARM64_SSBD 30
#define ARM64_MISMATCHED_CACHE_TYPE 31
+#define ARM64_HAS_STAGE2_FWB 32
-#define ARM64_NCAPS 32
+#define ARM64_NCAPS 33
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 6dd285e979c9..aa45df752a16 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,7 @@
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
+#define HCR_FWB (UL(1) << 46)
#define HCR_TEA (UL(1) << 37)
#define HCR_TERR (UL(1) << 36)
#define HCR_TLOR (UL(1) << 35)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 0c97e45d1dc3..6106a85ae0be 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -63,6 +63,8 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
/* trap error record accesses */
vcpu->arch.hcr_el2 |= HCR_TERR;
}
+ if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+ vcpu->arch.hcr_el2 |= HCR_FWB;
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
@@ -81,6 +83,21 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
return (unsigned long *)&vcpu->arch.hcr_el2;
}
+static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 &= ~HCR_TWE;
+}
+
+static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 |= HCR_TWE;
+}
+
+static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.vsesr_el2;
+}
+
static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
{
vcpu->arch.vsesr_el2 = vsesr;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index fe8777b12f86..f26055f2306e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -350,6 +350,11 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events);
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
@@ -378,16 +383,23 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
int kvm_perf_init(void);
int kvm_perf_teardown(void);
+void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
+
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
-void __kvm_set_tpidr_el2(u64 tpidr_el2);
DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
{
- u64 tpidr_el2;
+ /*
+ * Calculate the raw per-cpu offset without a translation from the
+ * kernel's mapping to the linear mapping, and store it in tpidr_el2
+ * so that we can use adr_l to access per-cpu variables in EL2.
+ */
+ u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) -
+ (u64)kvm_ksym_ref(kvm_host_cpu_state));
/*
* Call initialization code, and switch to the full blown HYP code.
@@ -396,17 +408,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
* cpus_have_const_cap() wrapper.
*/
BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
- __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
-
- /*
- * Calculate the raw per-cpu offset without a translation from the
- * kernel's mapping to the linear mapping, and store it in tpidr_el2
- * so that we can use adr_l to access per-cpu variables in EL2.
- */
- tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
- - (u64)kvm_ksym_ref(kvm_host_cpu_state);
-
- kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
+ __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
}
static inline bool kvm_arch_check_sve_has_vhe(void)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index fb9a7127bb75..d6fff7de5539 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -169,8 +169,12 @@ phys_addr_t kvm_get_idmap_vector(void);
int kvm_mmu_init(void);
void kvm_clear_hyp_idmap(void);
-#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
-#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
+#define kvm_mk_pmd(ptep) \
+ __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE)
+#define kvm_mk_pud(pmdp) \
+ __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE)
+#define kvm_mk_pgd(pudp) \
+ __pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
{
@@ -267,6 +271,15 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
{
void *va = page_address(pfn_to_page(pfn));
+ /*
+ * With FWB, we ensure that the guest always accesses memory using
+ * cacheable attributes, and we don't have to clean to PoC when
+ * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
+ * PoU is not required either in this case.
+ */
+ if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+ return;
+
kvm_flush_dcache_to_poc(va, size);
}
@@ -287,20 +300,26 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
static inline void __kvm_flush_dcache_pte(pte_t pte)
{
- struct page *page = pte_page(pte);
- kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+ if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+ struct page *page = pte_page(pte);
+ kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+ }
}
static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
{
- struct page *page = pmd_page(pmd);
- kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+ if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+ struct page *page = pmd_page(pmd);
+ kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+ }
}
static inline void __kvm_flush_dcache_pud(pud_t pud)
{
- struct page *page = pud_page(pud);
- kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+ if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+ struct page *page = pud_page(pud);
+ kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+ }
}
#define kvm_virt_to_phys(x) __pa_symbol(x)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 49d99214f43c..b96442960aea 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -155,6 +155,13 @@
#define MT_S2_NORMAL 0xf
#define MT_S2_DEVICE_nGnRE 0x1
+/*
+ * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
+ * Stage-2 enforces Normal-WB and Device-nGnRE
+ */
+#define MT_S2_FWB_NORMAL 6
+#define MT_S2_FWB_DEVICE_nGnRE 1
+
#ifdef CONFIG_ARM64_4K_PAGES
#define IOREMAP_MAX_ORDER (PUD_SHIFT)
#else
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 108ecad7acc5..78b942c1bea4 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -67,8 +67,28 @@
#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-#define PAGE_S2 __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
-#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
+#define PAGE_S2_MEMATTR(attr) \
+ ({ \
+ u64 __val; \
+ if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \
+ __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \
+ else \
+ __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \
+ __val; \
+ })
+
+#define PAGE_S2_XN \
+ ({ \
+ u64 __val; \
+ if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) \
+ __val = 0; \
+ else \
+ __val = PTE_S2_XN; \
+ __val; \
+ })
+
+#define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN)
+#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index e205ec8489e9..c1470931b897 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -314,6 +314,8 @@
#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
#define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3)
#define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5)
+#define SYS_ICC_ASGI1R_EL1 sys_reg(3, 0, 12, 11, 6)
+#define SYS_ICC_SGI0R_EL1 sys_reg(3, 0, 12, 11, 7)
#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
#define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2)
@@ -579,6 +581,7 @@
#define ID_AA64MMFR1_VMIDBITS_16 2
/* id_aa64mmfr2 */
+#define ID_AA64MMFR2_FWB_SHIFT 40
#define ID_AA64MMFR2_AT_SHIFT 32
#define ID_AA64MMFR2_LVA_SHIFT 16
#define ID_AA64MMFR2_IESB_SHIFT 12
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 4e76630dd655..97c3478ee6e7 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,7 @@
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -154,6 +155,18 @@ struct kvm_sync_regs {
struct kvm_arch_memory_slot {
};
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+ struct {
+ __u8 serror_pending;
+ __u8 serror_has_esr;
+ /* Align it to 8 bytes */
+ __u8 pad[6];
+ __u64 serror_esr;
+ } exception;
+ __u32 reserved[12];
+};
+
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 611e8921c3d4..e238b7932096 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -192,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
@@ -1026,6 +1027,14 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
}
#endif
+static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
+{
+ u64 val = read_sysreg_s(SYS_CLIDR_EL1);
+
+ /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
+ WARN_ON(val & (7 << 27 | 7 << 21));
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -1182,6 +1191,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cache_dic,
},
+ {
+ .desc = "Stage-2 Force Write-Back",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_STAGE2_FWB,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64MMFR2_FWB_SHIFT,
+ .min_field_value = 1,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_has_fwb,
+ },
#ifdef CONFIG_ARM64_HW_AFDBM
{
/*
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index cdd4d9d6d575..07256b08226c 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -289,6 +289,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
+ events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+
+ if (events->exception.serror_pending && events->exception.serror_has_esr)
+ events->exception.serror_esr = vcpu_get_vsesr(vcpu);
+
+ return 0;
+}
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ bool serror_pending = events->exception.serror_pending;
+ bool has_esr = events->exception.serror_has_esr;
+
+ if (serror_pending && has_esr) {
+ if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+ return -EINVAL;
+
+ if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
+ kvm_set_sei_esr(vcpu, events->exception.serror_esr);
+ else
+ return -EINVAL;
+ } else if (serror_pending) {
+ kvm_inject_vabt(vcpu);
+ }
+
+ return 0;
+}
+
int __attribute_const__ kvm_target_cpu(void)
{
unsigned long implementor = read_cpuid_implementor();
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 6fd91b31a131..ea9225160786 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -57,6 +57,7 @@ __invalid:
* x0: HYP pgd
* x1: HYP stack
* x2: HYP vectors
+ * x3: per-CPU offset
*/
__do_hyp_init:
/* Check for a stub HVC call */
@@ -119,9 +120,8 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
mov sp, x1
msr vbar_el2, x2
- /* copy tpidr_el1 into tpidr_el2 for use by HYP */
- mrs x1, tpidr_el1
- msr tpidr_el2, x1
+ /* Set tpidr_el2 for use by HYP */
+ msr tpidr_el2, x3
/* Hello, World! */
eret
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 35bc16832efe..9ce223944983 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -288,8 +288,3 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
vcpu->arch.sysregs_loaded_on_cpu = false;
}
-
-void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
-{
- asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
-}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index d8e71659ba7e..a55e91dfcf8f 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -164,9 +164,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
inject_undef64(vcpu);
}
-static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
+void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
{
- vcpu_set_vsesr(vcpu, esr);
+ vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
*vcpu_hcr(vcpu) |= HCR_VSE;
}
@@ -184,5 +184,5 @@ static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
*/
void kvm_inject_vabt(struct kvm_vcpu *vcpu)
{
- pend_guest_serror(vcpu, ESR_ELx_ISV);
+ kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 4e4aedaf7ab7..e37c78bbe1ca 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -77,8 +77,12 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_PMU_V3:
r = kvm_arm_support_pmu_v3();
break;
+ case KVM_CAP_ARM_INJECT_SERROR_ESR:
+ r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+ break;
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_VCPU_ATTRIBUTES:
+ case KVM_CAP_VCPU_EVENTS:
r = 1;
break;
default:
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a4363735d3f8..22fbbdbece3c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -194,7 +194,16 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
if (!p->is_write)
return read_from_write_only(vcpu, p, r);
- kvm_set_way_flush(vcpu);
+ /*
+ * Only track S/W ops if we don't have FWB. It still indicates
+ * that the guest is a bit broken (S/W operations should only
+ * be done by firmware, knowing that there is only a single
+ * CPU left in the system, and certainly not from non-secure
+ * software).
+ */
+ if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+ kvm_set_way_flush(vcpu);
+
return true;
}
@@ -243,10 +252,43 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
+ bool g1;
+
if (!p->is_write)
return read_from_write_only(vcpu, p, r);
- vgic_v3_dispatch_sgi(vcpu, p->regval);
+ /*
+ * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates
+ * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group,
+ * depending on the SGI configuration. ICC_ASGI1R_EL1 is effectively
+ * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure
+ * group.
+ */
+ if (p->is_aarch32) {
+ switch (p->Op1) {
+ default: /* Keep GCC quiet */
+ case 0: /* ICC_SGI1R */
+ g1 = true;
+ break;
+ case 1: /* ICC_ASGI1R */
+ case 2: /* ICC_SGI0R */
+ g1 = false;
+ break;
+ }
+ } else {
+ switch (p->Op2) {
+ default: /* Keep GCC quiet */
+ case 5: /* ICC_SGI1R_EL1 */
+ g1 = true;
+ break;
+ case 6: /* ICC_ASGI1R_EL1 */
+ case 7: /* ICC_SGI0R_EL1 */
+ g1 = false;
+ break;
+ }
+ }
+
+ vgic_v3_dispatch_sgi(vcpu, p->regval, g1);
return true;
}
@@ -1303,6 +1345,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
{ SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
+ { SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
+ { SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
{ SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
@@ -1613,8 +1657,6 @@ static const struct sys_reg_desc cp14_64_regs[] = {
* register).
*/
static const struct sys_reg_desc cp15_regs[] = {
- { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
-
{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
@@ -1737,8 +1779,10 @@ static const struct sys_reg_desc cp15_regs[] = {
static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
- { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
+ { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+ { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
+ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
{ Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
};
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e89d40be8cd..0b334b671e90 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -16,6 +16,7 @@ config H8300
select OF_IRQ
select OF_EARLY_FLATTREE
select HAVE_MEMBLOCK
+ select NO_BOOTMEM
select TIMER_OF
select H8300_TMR8
select HAVE_KERNEL_GZIP
diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile
index e1c02ca230cb..cc12b162c222 100644
--- a/arch/h8300/Makefile
+++ b/arch/h8300/Makefile
@@ -8,6 +8,8 @@
# (C) Copyright 2002-2015 Yoshinori Sato <ysato@users.sourceforge.jp>
#
+KBUILD_DEFCONFIG := edosk2674_defconfig
+
cflags-$(CONFIG_CPU_H8300H) := -mh
aflags-$(CONFIG_CPU_H8300H) := -mh -Wa,--mach=h8300h
ldflags-$(CONFIG_CPU_H8300H) := -mh8300helf_linux
@@ -22,6 +24,8 @@ KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\"
KBUILD_AFLAGS += $(aflags-y)
LDFLAGS += $(ldflags-y)
+CHECKFLAGS += -msize-long
+
ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := h8300-unknown-linux-
endif
diff --git a/arch/h8300/boot/dts/h8300h_sim.dts b/arch/h8300/boot/dts/h8300h_sim.dts
index f1c31cecdec8..595398b9d018 100644
--- a/arch/h8300/boot/dts/h8300h_sim.dts
+++ b/arch/h8300/boot/dts/h8300h_sim.dts
@@ -73,7 +73,7 @@
timer16: timer@ffff68 {
compatible = "renesas,16bit-timer";
reg = <0xffff68 8>, <0xffff60 8>;
- interrupts = <24 0>;
+ interrupts = <26 0>;
renesas,channel = <0>;
clocks = <&fclk>;
clock-names = "fck";
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index ea0cb0cf6a8b..647a83bd40b7 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -29,11 +29,11 @@ static inline unsigned long ffz(unsigned long word)
result = -1;
__asm__("1:\n\t"
- "shlr.l %2\n\t"
+ "shlr.l %1\n\t"
"adds #1,%0\n\t"
"bcs 1b"
- : "=r"(result)
- : "0"(result), "r"(word));
+ : "=r"(result),"=r"(word)
+ : "0"(result), "1"(word));
return result;
}
@@ -66,7 +66,7 @@ H8300_GEN_BITOP(change_bit, "bnot")
#undef H8300_GEN_BITOP
-static inline int test_bit(int nr, const unsigned long *addr)
+static inline int test_bit(int nr, const volatile unsigned long *addr)
{
int ret = 0;
unsigned char *b_addr;
@@ -162,11 +162,11 @@ static inline unsigned long __ffs(unsigned long word)
result = -1;
__asm__("1:\n\t"
- "shlr.l %2\n\t"
+ "shlr.l %1\n\t"
"adds #1,%0\n\t"
"bcc 1b"
- : "=r" (result)
- : "0"(result), "r"(word));
+ : "=r" (result),"=r"(word)
+ : "0"(result), "1"(word));
return result;
}
diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h
index 313cafa85380..66d383848ff1 100644
--- a/arch/h8300/include/asm/ptrace.h
+++ b/arch/h8300/include/asm/ptrace.h
@@ -4,6 +4,8 @@
#include <uapi/asm/ptrace.h>
+struct task_struct;
+
#ifndef __ASSEMBLY__
#ifndef PS_S
#define PS_S (0x10)
diff --git a/arch/h8300/kernel/kgdb.c b/arch/h8300/kernel/kgdb.c
index 602e478afbd5..1a1d30cb0609 100644
--- a/arch/h8300/kernel/kgdb.c
+++ b/arch/h8300/kernel/kgdb.c
@@ -129,7 +129,7 @@ void kgdb_arch_exit(void)
/* Nothing to do */
}
-const struct kgdb_arch arch_kgdb_ops = {
+struct kgdb_arch arch_kgdb_ops = {
/* Breakpoint instruction: trapa #2 */
.gdb_bpt_instr = { 0x57, 0x20 },
};
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index a4d0470c10a9..34e2df5c0d6d 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -23,7 +23,6 @@
#include <linux/init.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/clk-provider.h>
#include <linux/memblock.h>
@@ -71,10 +70,6 @@ void __init h8300_fdt_init(void *fdt, char *bootargs)
static void __init bootmem_init(void)
{
- int bootmap_size;
- unsigned long ram_start_pfn;
- unsigned long free_ram_start_pfn;
- unsigned long ram_end_pfn;
struct memblock_region *region;
memory_end = memory_start = 0;
@@ -88,33 +83,17 @@ static void __init bootmem_init(void)
if (!memory_end)
panic("No memory!");
- ram_start_pfn = PFN_UP(memory_start);
- /* free_ram_start_pfn is first page after kernel */
- free_ram_start_pfn = PFN_UP(__pa(_end));
- ram_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ /* setup bootmem globals (we use no_bootmem, but mm still depends on this) */
+ min_low_pfn = PFN_UP(memory_start);
+ max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ max_pfn = max_low_pfn;
- max_pfn = ram_end_pfn;
+ memblock_reserve(__pa(_stext), _end - _stext);
- /*
- * give all the memory to the bootmap allocator, tell it to put the
- * boot mem_map at the start of memory
- */
- bootmap_size = init_bootmem_node(NODE_DATA(0),
- free_ram_start_pfn,
- 0,
- ram_end_pfn);
- /*
- * free the usable memory, we have to make sure we do not free
- * the bootmem bitmap so we then reserve it after freeing it :-)
- */
- free_bootmem(PFN_PHYS(free_ram_start_pfn),
- (ram_end_pfn - free_ram_start_pfn) << PAGE_SHIFT);
- reserve_bootmem(PFN_PHYS(free_ram_start_pfn), bootmap_size,
- BOOTMEM_DEFAULT);
+ early_init_fdt_reserve_self();
+ early_init_fdt_scan_reserved_mem();
- for_each_memblock(reserved, region) {
- reserve_bootmem(region->base, region->size, BOOTMEM_DEFAULT);
- }
+ memblock_dump_all();
}
void __init setup_arch(char **cmdline_p)
@@ -188,15 +167,6 @@ const struct seq_operations cpuinfo_op = {
.show = show_cpuinfo,
};
-static int __init device_probe(void)
-{
- of_platform_populate(NULL, NULL, NULL, NULL);
-
- return 0;
-}
-
-device_initcall(device_probe);
-
#if defined(CONFIG_CPU_H8300H)
#define get_wait(base, addr) ({ \
int baddr; \
diff --git a/arch/h8300/kernel/sim-console.c b/arch/h8300/kernel/sim-console.c
index 46138f55a9ea..03aa35b1a08c 100644
--- a/arch/h8300/kernel/sim-console.c
+++ b/arch/h8300/kernel/sim-console.c
@@ -13,12 +13,13 @@
static void sim_write(struct console *con, const char *s, unsigned n)
{
- register const int fd __asm__("er0") = 1; /* stdout */
register const char *_ptr __asm__("er1") = s;
register const unsigned _len __asm__("er2") = n;
- __asm__(".byte 0x5e,0x00,0x00,0xc7\n\t" /* jsr @0xc7 (sys_write) */
- : : "g"(fd), "g"(_ptr), "g"(_len));
+ __asm__("sub.l er0,er0\n\t" /* er0 = 1 (stdout) */
+ "inc.l #1,er0\n\t"
+ ".byte 0x5e,0x00,0x00,0xc7\n\t" /* jsr @0xc7 (sys_write) */
+ : : "g"(_ptr), "g"(_len):"er0");
}
static int __init sim_setup(struct earlycon_device *device, const char *opt)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2bf4ef792f2c..8b4a0c1748c0 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,6 +28,7 @@ config IA64
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
+ select NO_BOOTMEM
select HAVE_VIRT_CPU_ACCOUNTING
select ARCH_HAS_DMA_MARK_CLEAN
select ARCH_HAS_SG_CHAIN
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index ee5b652d320a..671ce1e3f6f2 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1805,7 +1805,7 @@ static struct ioc_iommu ioc_iommu_info[] __initdata = {
{ SX2000_IOC_ID, "sx2000", NULL },
};
-static void ioc_init(unsigned long hpa, struct ioc *ioc)
+static void __init ioc_init(unsigned long hpa, struct ioc *ioc)
{
struct ioc_iommu *info;
@@ -2002,7 +2002,7 @@ sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
#endif
}
-static void acpi_sba_ioc_add(struct ioc *ioc)
+static void __init acpi_sba_ioc_add(struct ioc *ioc)
{
acpi_handle handle = ioc->handle;
acpi_status status;
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 6f952171abf9..1e6fef69bb01 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -454,6 +454,7 @@ extern void memset_io(volatile void __iomem *s, int c, long n);
#define xlate_dev_kmem_ptr xlate_dev_kmem_ptr
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
#include <asm-generic/io.h>
+#undef PCI_IOBASE
# endif /* __KERNEL__ */
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index f4db2168d1b8..00e8e2a1eb19 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -50,8 +50,7 @@ void foo(void)
DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
- DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
- DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+ DEFINE(IA64_TASK_THREAD_PID_OFFSET,offsetof (struct task_struct, thread_pid));
DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
@@ -68,6 +67,7 @@ void foo(void)
DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
group_stop_count));
DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+ DEFINE(IA64_SIGNAL_PIDS_TGID_OFFSET, offsetof (struct signal_struct, pids[PIDTYPE_TGID]));
BLANK();
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index fe742ffafc7a..d80c99a5f55d 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -62,16 +62,16 @@ ENTRY(fsys_getpid)
.prologue
.altrp b6
.body
- add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+ add r17=IA64_TASK_SIGNAL_OFFSET,r16
;;
- ld8 r17=[r17] // r17 = current->group_leader
+ ld8 r17=[r17] // r17 = current->signal
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;;
ld4 r9=[r9]
- add r17=IA64_TASK_TGIDLINK_OFFSET,r17
+ add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17
;;
and r9=TIF_ALLWORK_MASK,r9
- ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
+ ld8 r17=[r17] // r17 = current->signal->pids[PIDTYPE_TGID]
;;
add r8=IA64_PID_LEVEL_OFFSET,r17
;;
@@ -96,11 +96,11 @@ ENTRY(fsys_set_tid_address)
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- add r17=IA64_TASK_TGIDLINK_OFFSET,r16
+ add r17=IA64_TASK_THREAD_PID_OFFSET,r16
;;
ld4 r9=[r9]
tnat.z p6,p7=r32 // check argument register for being NaT
- ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
+ ld8 r17=[r17] // r17 = current->thread_pid
;;
and r9=TIF_ALLWORK_MASK,r9
add r8=IA64_PID_LEVEL_OFFSET,r17
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index ad43cbf70628..0e6c2d9fb498 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -32,6 +32,7 @@
#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/kernel.h>
+#include <linux/memblock.h>
#include <linux/reboot.h>
#include <linux/sched/mm.h>
#include <linux/sched/clock.h>
@@ -383,8 +384,16 @@ reserve_memory (void)
sort_regions(rsvd_region, num_rsvd_regions);
num_rsvd_regions = merge_regions(rsvd_region, num_rsvd_regions);
-}
+ /* reserve all regions except the end of memory marker with memblock */
+ for (n = 0; n < num_rsvd_regions - 1; n++) {
+ struct rsvd_region *region = &rsvd_region[n];
+ phys_addr_t addr = __pa(region->start);
+ phys_addr_t size = region->end - region->start;
+
+ memblock_reserve(addr, size);
+ }
+}
/**
* find_initrd - get initrd parameters from the boot parameter structure
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 7d64b30913d1..e2e40bbd391c 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -34,53 +34,6 @@ static unsigned long max_gap;
/* physical address where the bootmem map is located */
unsigned long bootmap_start;
-/**
- * find_bootmap_location - callback to find a memory area for the bootmap
- * @start: start of region
- * @end: end of region
- * @arg: unused callback data
- *
- * Find a place to put the bootmap and return its starting address in
- * bootmap_start. This address must be page-aligned.
- */
-static int __init
-find_bootmap_location (u64 start, u64 end, void *arg)
-{
- u64 needed = *(unsigned long *)arg;
- u64 range_start, range_end, free_start;
- int i;
-
-#if IGNORE_PFN0
- if (start == PAGE_OFFSET) {
- start += PAGE_SIZE;
- if (start >= end)
- return 0;
- }
-#endif
-
- free_start = PAGE_OFFSET;
-
- for (i = 0; i < num_rsvd_regions; i++) {
- range_start = max(start, free_start);
- range_end = min(end, rsvd_region[i].start & PAGE_MASK);
-
- free_start = PAGE_ALIGN(rsvd_region[i].end);
-
- if (range_end <= range_start)
- continue; /* skip over empty range */
-
- if (range_end - range_start >= needed) {
- bootmap_start = __pa(range_start);
- return -1; /* done */
- }
-
- /* nothing more available in this segment */
- if (range_end == end)
- return 0;
- }
- return 0;
-}
-
#ifdef CONFIG_SMP
static void *cpu_data;
/**
@@ -196,8 +149,6 @@ setup_per_cpu_areas(void)
void __init
find_memory (void)
{
- unsigned long bootmap_size;
-
reserve_memory();
/* first find highest page frame number */
@@ -205,21 +156,12 @@ find_memory (void)
max_low_pfn = 0;
efi_memmap_walk(find_max_min_low_pfn, NULL);
max_pfn = max_low_pfn;
- /* how many bytes to cover all the pages */
- bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
-
- /* look for a location to hold the bootmap */
- bootmap_start = ~0UL;
- efi_memmap_walk(find_bootmap_location, &bootmap_size);
- if (bootmap_start == ~0UL)
- panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
- bootmap_size = init_bootmem_node(NODE_DATA(0),
- (bootmap_start >> PAGE_SHIFT), 0, max_pfn);
-
- /* Free all available memory, then mark bootmem-map as being in use. */
- efi_memmap_walk(filter_rsvd_memory, free_bootmem);
- reserve_bootmem(bootmap_start, bootmap_size, BOOTMEM_DEFAULT);
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+ efi_memmap_walk(filter_memory, register_active_ranges);
+#else
+ memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
+#endif
find_initrd();
@@ -244,11 +186,9 @@ paging_init (void)
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_VIRTUAL_MEM_MAP
- efi_memmap_walk(filter_memory, register_active_ranges);
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap < LARGE_GAP) {
vmem_map = (struct page *) 0;
- free_area_init_nodes(max_zone_pfns);
} else {
unsigned long map_size;
@@ -266,13 +206,10 @@ paging_init (void)
*/
NODE_DATA(0)->node_mem_map = vmem_map +
find_min_pfn_with_active_regions();
- free_area_init_nodes(max_zone_pfns);
printk("Virtual mem_map starts at 0x%p\n", mem_map);
}
-#else /* !CONFIG_VIRTUAL_MEM_MAP */
- memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
- free_area_init_nodes(max_zone_pfns);
#endif /* !CONFIG_VIRTUAL_MEM_MAP */
+ free_area_init_nodes(max_zone_pfns);
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 7d9bd20319ff..1928d5719e41 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -20,6 +20,7 @@
#include <linux/nmi.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/acpi.h>
#include <linux/efi.h>
#include <linux/nodemask.h>
@@ -38,9 +39,6 @@ struct early_node_data {
struct ia64_node_data *node_data;
unsigned long pernode_addr;
unsigned long pernode_size;
-#ifdef CONFIG_ZONE_DMA32
- unsigned long num_dma_physpages;
-#endif
unsigned long min_pfn;
unsigned long max_pfn;
};
@@ -60,33 +58,31 @@ pg_data_t *pgdat_list[MAX_NUMNODES];
(((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1)))
/**
- * build_node_maps - callback to setup bootmem structs for each node
+ * build_node_maps - callback to setup mem_data structs for each node
* @start: physical start of range
* @len: length of range
* @node: node where this range resides
*
- * We allocate a struct bootmem_data for each piece of memory that we wish to
+ * Detect extents of each piece of memory that we wish to
* treat as a virtually contiguous block (i.e. each node). Each such block
* must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
* if necessary. Any non-existent pages will simply be part of the virtual
- * memmap. We also update min_low_pfn and max_low_pfn here as we receive
- * memory ranges from the caller.
+ * memmap.
*/
static int __init build_node_maps(unsigned long start, unsigned long len,
int node)
{
unsigned long spfn, epfn, end = start + len;
- struct bootmem_data *bdp = &bootmem_node_data[node];
epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
- if (!bdp->node_low_pfn) {
- bdp->node_min_pfn = spfn;
- bdp->node_low_pfn = epfn;
+ if (!mem_data[node].min_pfn) {
+ mem_data[node].min_pfn = spfn;
+ mem_data[node].max_pfn = epfn;
} else {
- bdp->node_min_pfn = min(spfn, bdp->node_min_pfn);
- bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
+ mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn);
+ mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn);
}
return 0;
@@ -269,7 +265,6 @@ static void __init fill_pernode(int node, unsigned long pernode,
{
void *cpu_data;
int cpus = early_nr_cpus_node(node);
- struct bootmem_data *bdp = &bootmem_node_data[node];
mem_data[node].pernode_addr = pernode;
mem_data[node].pernode_size = pernodesize;
@@ -284,8 +279,6 @@ static void __init fill_pernode(int node, unsigned long pernode,
mem_data[node].node_data = __va(pernode);
pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
-
- pgdat_list[node]->bdata = bdp;
pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
cpu_data = per_cpu_node_setup(cpu_data, node);
@@ -325,20 +318,16 @@ static int __init find_pernode_space(unsigned long start, unsigned long len,
int node)
{
unsigned long spfn, epfn;
- unsigned long pernodesize = 0, pernode, pages, mapsize;
- struct bootmem_data *bdp = &bootmem_node_data[node];
+ unsigned long pernodesize = 0, pernode;
spfn = start >> PAGE_SHIFT;
epfn = (start + len) >> PAGE_SHIFT;
- pages = bdp->node_low_pfn - bdp->node_min_pfn;
- mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
-
/*
* Make sure this memory falls within this node's usable memory
* since we may have thrown some away in build_maps().
*/
- if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn)
+ if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn)
return 0;
/* Don't setup this node's local space twice... */
@@ -353,32 +342,13 @@ static int __init find_pernode_space(unsigned long start, unsigned long len,
pernode = NODEDATA_ALIGN(start, node);
/* Is this range big enough for what we want to store here? */
- if (start + len > (pernode + pernodesize + mapsize))
+ if (start + len > (pernode + pernodesize))
fill_pernode(node, pernode, pernodesize);
return 0;
}
/**
- * free_node_bootmem - free bootmem allocator memory for use
- * @start: physical start of range
- * @len: length of range
- * @node: node where this range resides
- *
- * Simply calls the bootmem allocator to free the specified ranged from
- * the given pg_data_t's bdata struct. After this function has been called
- * for all the entries in the EFI memory map, the bootmem allocator will
- * be ready to service allocation requests.
- */
-static int __init free_node_bootmem(unsigned long start, unsigned long len,
- int node)
-{
- free_bootmem_node(pgdat_list[node], start, len);
-
- return 0;
-}
-
-/**
* reserve_pernode_space - reserve memory for per-node space
*
* Reserve the space used by the bootmem maps & per-node space in the boot
@@ -387,28 +357,17 @@ static int __init free_node_bootmem(unsigned long start, unsigned long len,
*/
static void __init reserve_pernode_space(void)
{
- unsigned long base, size, pages;
- struct bootmem_data *bdp;
+ unsigned long base, size;
int node;
for_each_online_node(node) {
- pg_data_t *pdp = pgdat_list[node];
-
if (node_isset(node, memory_less_mask))
continue;
- bdp = pdp->bdata;
-
- /* First the bootmem_map itself */
- pages = bdp->node_low_pfn - bdp->node_min_pfn;
- size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
- base = __pa(bdp->node_bootmem_map);
- reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
-
/* Now the per-node space */
size = mem_data[node].pernode_size;
base = __pa(mem_data[node].pernode_addr);
- reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
+ memblock_reserve(base, size);
}
}
@@ -528,6 +487,7 @@ void __init find_memory(void)
int node;
reserve_memory();
+ efi_memmap_walk(filter_memory, register_active_ranges);
if (num_online_nodes() == 0) {
printk(KERN_ERR "node info missing!\n");
@@ -544,38 +504,8 @@ void __init find_memory(void)
efi_memmap_walk(find_max_min_low_pfn, NULL);
for_each_online_node(node)
- if (bootmem_node_data[node].node_low_pfn) {
+ if (mem_data[node].min_pfn)
node_clear(node, memory_less_mask);
- mem_data[node].min_pfn = ~0UL;
- }
-
- efi_memmap_walk(filter_memory, register_active_ranges);
-
- /*
- * Initialize the boot memory maps in reverse order since that's
- * what the bootmem allocator expects
- */
- for (node = MAX_NUMNODES - 1; node >= 0; node--) {
- unsigned long pernode, pernodesize, map;
- struct bootmem_data *bdp;
-
- if (!node_online(node))
- continue;
- else if (node_isset(node, memory_less_mask))
- continue;
-
- bdp = &bootmem_node_data[node];
- pernode = mem_data[node].pernode_addr;
- pernodesize = mem_data[node].pernode_size;
- map = pernode + pernodesize;
-
- init_bootmem_node(pgdat_list[node],
- map>>PAGE_SHIFT,
- bdp->node_min_pfn,
- bdp->node_low_pfn);
- }
-
- efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
reserve_pernode_space();
memory_less_nodes();
@@ -655,36 +585,6 @@ void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
}
/**
- * count_node_pages - callback to build per-node memory info structures
- * @start: physical start of range
- * @len: length of range
- * @node: node where this range resides
- *
- * Each node has it's own number of physical pages, DMAable pages, start, and
- * end page frame number. This routine will be called by call_pernode_memory()
- * for each piece of usable memory and will setup these values for each node.
- * Very similar to build_maps().
- */
-static __init int count_node_pages(unsigned long start, unsigned long len, int node)
-{
- unsigned long end = start + len;
-
-#ifdef CONFIG_ZONE_DMA32
- if (start <= __pa(MAX_DMA_ADDRESS))
- mem_data[node].num_dma_physpages +=
- (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
-#endif
- start = GRANULEROUNDDOWN(start);
- end = GRANULEROUNDUP(end);
- mem_data[node].max_pfn = max(mem_data[node].max_pfn,
- end >> PAGE_SHIFT);
- mem_data[node].min_pfn = min(mem_data[node].min_pfn,
- start >> PAGE_SHIFT);
-
- return 0;
-}
-
-/**
* paging_init - setup page tables
*
* paging_init() sets up the page tables for each node of the system and frees
@@ -700,8 +600,6 @@ void __init paging_init(void)
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
- efi_memmap_walk(filter_rsvd_memory, count_node_pages);
-
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
diff --git a/arch/m68k/coldfire/clk.c b/arch/m68k/coldfire/clk.c
index 849cd208e2ed..7bc666e482eb 100644
--- a/arch/m68k/coldfire/clk.c
+++ b/arch/m68k/coldfire/clk.c
@@ -129,4 +129,33 @@ unsigned long clk_get_rate(struct clk *clk)
}
EXPORT_SYMBOL(clk_get_rate);
+/* dummy functions, should not be called */
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+ WARN_ON(clk);
+ return 0;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+ WARN_ON(clk);
+ return 0;
+}
+EXPORT_SYMBOL(clk_set_rate);
+
+int clk_set_parent(struct clk *clk, struct clk *parent)
+{
+ WARN_ON(clk);
+ return 0;
+}
+EXPORT_SYMBOL(clk_set_parent);
+
+struct clk *clk_get_parent(struct clk *clk)
+{
+ WARN_ON(clk);
+ return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
/***************************************************************************/
diff --git a/arch/m68k/include/asm/dma.h b/arch/m68k/include/asm/dma.h
index b0978a23bad1..ae2021964e32 100644
--- a/arch/m68k/include/asm/dma.h
+++ b/arch/m68k/include/asm/dma.h
@@ -390,7 +390,7 @@ static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
#ifdef DEBUG_DMA
printk("%s(%d): dmanr=%d DMR[%x]=%x DIR[%x]=%x\n", __FILE__, __LINE__,
- dmanr, (int) &dmalp[MCFDMA_DMR], dmabp[MCFDMA_DMR],
+ dmanr, (int) &dmalp[MCFDMA_DMR], dmalp[MCFDMA_DMR],
(int) &dmawp[MCFDMA_DIR], dmawp[MCFDMA_DIR]);
#endif
}
@@ -421,7 +421,7 @@ static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
#ifdef DEBUG_DMA
printk("%s(%d): dmanr=%d DMR[%x]=%x SAR[%x]=%08x DAR[%x]=%08x\n",
- __FILE__, __LINE__, dmanr, (int) &dmawp[MCFDMA_DMR], dmawp[MCFDMA_DMR],
+ __FILE__, __LINE__, dmanr, (int) &dmalp[MCFDMA_DMR], dmalp[MCFDMA_DMR],
(int) &dmalp[MCFDMA_DSAR], dmalp[MCFDMA_DSAR],
(int) &dmalp[MCFDMA_DDAR], dmalp[MCFDMA_DDAR]);
#endif
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 6163a39ddeb6..ace5c5bf1836 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -2,6 +2,8 @@ config MICROBLAZE
def_bool y
select ARCH_NO_SWAP
select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select ARCH_WANT_IPC_PARSE_VERSION
@@ -9,6 +11,8 @@ config MICROBLAZE
select TIMER_OF
select CLONE_BACKWARDS3
select COMMON_CLK
+ select DMA_NONCOHERENT_OPS
+ select DMA_NONCOHERENT_MMAP
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_DEVICES
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index d269dd4b8279..73330360a8e6 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -40,11 +40,11 @@ CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
ifdef CONFIG_CPU_BIG_ENDIAN
KBUILD_CFLAGS += -mbig-endian
KBUILD_AFLAGS += -mbig-endian
-LD += -EB
+LDFLAGS += -EB
else
KBUILD_CFLAGS += -mlittle-endian
KBUILD_AFLAGS += -mlittle-endian
-LD += -EL
+LDFLAGS += -EL
endif
CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index fe6a6c6e5003..569ba9e670c1 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += bugs.h
generic-y += compat.h
generic-y += device.h
generic-y += div64.h
+generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += exec.h
generic-y += extable.h
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h
deleted file mode 100644
index add50c1373bf..000000000000
--- a/arch/microblaze/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Implements the generic device dma API for microblaze and the pci
- *
- * Copyright (C) 2009-2010 Michal Simek <monstr@monstr.eu>
- * Copyright (C) 2009-2010 PetaLogix
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file is base on powerpc and x86 dma-mapping.h versions
- * Copyright (C) 2004 IBM
- */
-
-#ifndef _ASM_MICROBLAZE_DMA_MAPPING_H
-#define _ASM_MICROBLAZE_DMA_MAPPING_H
-
-/*
- * Available generic sets of operations
- */
-extern const struct dma_map_ops dma_nommu_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
- return &dma_nommu_ops;
-}
-
-#endif /* _ASM_MICROBLAZE_DMA_MAPPING_H */
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index db8b1fa83452..7b650ab14fa0 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -553,11 +553,6 @@ void __init *early_get_page(void);
extern unsigned long ioremap_bot, ioremap_base;
-void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle);
-void consistent_free(size_t size, void *vaddr);
-void consistent_sync(void *vaddr, size_t size, int direction);
-void consistent_sync_page(struct page *page, unsigned long offset,
- size_t size, int direction);
unsigned long consistent_virt_to_pfn(void *vaddr);
void setup_memory(void);
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 3145e7dc8ab1..71032cf64669 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -8,29 +8,15 @@
*/
#include <linux/device.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
#include <linux/gfp.h>
#include <linux/dma-debug.h>
#include <linux/export.h>
#include <linux/bug.h>
#include <asm/cacheflush.h>
-static void *dma_nommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- return consistent_alloc(flag, size, dma_handle);
-}
-
-static void dma_nommu_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- consistent_free(size, vaddr);
-}
-
-static inline void __dma_sync(unsigned long paddr,
- size_t size, enum dma_data_direction direction)
+static void __dma_sync(struct device *dev, phys_addr_t paddr, size_t size,
+ enum dma_data_direction direction)
{
switch (direction) {
case DMA_TO_DEVICE:
@@ -45,113 +31,21 @@ static inline void __dma_sync(unsigned long paddr,
}
}
-static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- struct scatterlist *sg;
- int i;
-
- /* FIXME this part of code is untested */
- for_each_sg(sgl, sg, nents, i) {
- sg->dma_address = sg_phys(sg);
-
- if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
- continue;
-
- __dma_sync(sg_phys(sg), sg->length, direction);
- }
-
- return nents;
-}
-
-static inline dma_addr_t dma_nommu_map_page(struct device *dev,
- struct page *page,
- unsigned long offset,
- size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- __dma_sync(page_to_phys(page) + offset, size, direction);
- return page_to_phys(page) + offset;
+ __dma_sync(dev, paddr, size, dir);
}
-static inline void dma_nommu_unmap_page(struct device *dev,
- dma_addr_t dma_address,
- size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
-/* There is not necessary to do cache cleanup
- *
- * phys_to_virt is here because in __dma_sync_page is __virt_to_phys and
- * dma_address is physical address
- */
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- __dma_sync(dma_address, size, direction);
+ __dma_sync(dev, paddr, size, dir);
}
-static inline void
-dma_nommu_sync_single_for_cpu(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction)
-{
- /*
- * It's pointless to flush the cache as the memory segment
- * is given to the CPU
- */
-
- if (direction == DMA_FROM_DEVICE)
- __dma_sync(dma_handle, size, direction);
-}
-
-static inline void
-dma_nommu_sync_single_for_device(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction)
-{
- /*
- * It's pointless to invalidate the cache if the device isn't
- * supposed to write to the relevant region
- */
-
- if (direction == DMA_TO_DEVICE)
- __dma_sync(dma_handle, size, direction);
-}
-
-static inline void
-dma_nommu_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sgl, int nents,
- enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- int i;
-
- /* FIXME this part of code is untested */
- if (direction == DMA_FROM_DEVICE)
- for_each_sg(sgl, sg, nents, i)
- __dma_sync(sg->dma_address, sg->length, direction);
-}
-
-static inline void
-dma_nommu_sync_sg_for_device(struct device *dev,
- struct scatterlist *sgl, int nents,
- enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- int i;
-
- /* FIXME this part of code is untested */
- if (direction == DMA_TO_DEVICE)
- for_each_sg(sgl, sg, nents, i)
- __dma_sync(sg->dma_address, sg->length, direction);
-}
-
-static
-int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t handle, size_t size,
- unsigned long attrs)
+int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t handle, size_t size,
+ unsigned long attrs)
{
#ifdef CONFIG_MMU
unsigned long user_count = vma_pages(vma);
@@ -170,17 +64,3 @@ int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
return -ENXIO;
#endif
}
-
-const struct dma_map_ops dma_nommu_ops = {
- .alloc = dma_nommu_alloc_coherent,
- .free = dma_nommu_free_coherent,
- .mmap = dma_nommu_mmap_coherent,
- .map_sg = dma_nommu_map_sg,
- .map_page = dma_nommu_map_page,
- .unmap_page = dma_nommu_unmap_page,
- .sync_single_for_cpu = dma_nommu_sync_single_for_cpu,
- .sync_single_for_device = dma_nommu_sync_single_for_device,
- .sync_sg_for_cpu = dma_nommu_sync_sg_for_cpu,
- .sync_sg_for_device = dma_nommu_sync_sg_for_device,
-};
-EXPORT_SYMBOL(dma_nommu_ops);
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index 4655ff342c64..f264fdcf152a 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -341,11 +341,6 @@ start_here:
/* Initialize r31 with current task address */
addik r31, r0, init_task
- /*
- * Call platform dependent initialize function.
- * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for
- * the function.
- */
addik r11, r0, machine_early_init
brald r15, r11
nop
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index b06c3a7faf20..c9a278ac795a 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -33,6 +33,7 @@
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/gfp.h>
+#include <linux/dma-noncoherent.h>
#include <asm/pgalloc.h>
#include <linux/io.h>
@@ -59,7 +60,8 @@
* uncached region. This will no doubt cause big problems if memory allocated
* here is not also freed properly. -- JW
*/
-void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, unsigned long attrs)
{
unsigned long order, vaddr;
void *ret;
@@ -154,7 +156,6 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
return ret;
}
-EXPORT_SYMBOL(consistent_alloc);
#ifdef CONFIG_MMU
static pte_t *consistent_virt_to_pte(void *vaddr)
@@ -178,7 +179,8 @@ unsigned long consistent_virt_to_pfn(void *vaddr)
/*
* free page(s) as defined by the above mapping.
*/
-void consistent_free(size_t size, void *vaddr)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_addr, unsigned long attrs)
{
struct page *page;
@@ -218,49 +220,3 @@ void consistent_free(size_t size, void *vaddr)
flush_tlb_all();
#endif
}
-EXPORT_SYMBOL(consistent_free);
-
-/*
- * make an area consistent.
- */
-void consistent_sync(void *vaddr, size_t size, int direction)
-{
- unsigned long start;
- unsigned long end;
-
- start = (unsigned long)vaddr;
-
- /* Convert start address back down to unshadowed memory region */
-#ifdef CONFIG_XILINX_UNCACHED_SHADOW
- start &= ~UNCACHED_SHADOW_MASK;
-#endif
- end = start + size;
-
- switch (direction) {
- case PCI_DMA_NONE:
- BUG();
- case PCI_DMA_FROMDEVICE: /* invalidate only */
- invalidate_dcache_range(start, end);
- break;
- case PCI_DMA_TODEVICE: /* writeback only */
- flush_dcache_range(start, end);
- break;
- case PCI_DMA_BIDIRECTIONAL: /* writeback and invalidate */
- flush_dcache_range(start, end);
- break;
- }
-}
-EXPORT_SYMBOL(consistent_sync);
-
-/*
- * consistent_sync_page makes memory consistent. identical
- * to consistent_sync, but takes a struct page instead of a
- * virtual address
- */
-void consistent_sync_page(struct page *page, unsigned long offset,
- size_t size, int direction)
-{
- unsigned long start = (unsigned long)page_address(page) + offset;
- consistent_sync((void *)start, size, direction);
-}
-EXPORT_SYMBOL(consistent_sync_page);
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index f34346d56095..2ffd171af8b6 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -597,19 +597,6 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- *
- * Why? Because some silly external IO cards only decode
- * the low 10 bits of the IO address. The 0x00-0xff region
- * is reserved for motherboard devices that decode all 16
- * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
- * but we want to try to avoid allocating at 0x2900-0x2bff
- * which might have be mirrored at 0x0100-0x03ff..
- */
int pcibios_add_device(struct pci_dev *dev)
{
dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
diff --git a/arch/microblaze/pci/xilinx_pci.c b/arch/microblaze/pci/xilinx_pci.c
index 14c7da5fd039..b800909ddccf 100644
--- a/arch/microblaze/pci/xilinx_pci.c
+++ b/arch/microblaze/pci/xilinx_pci.c
@@ -157,6 +157,7 @@ void __init xilinx_pci_init(void)
/* Set the max bus number to 255, and bus/subbus no's to 0 */
pci_reg = of_iomap(pci_node, 0);
+ WARN_ON(!pci_reg);
out_be32(pci_reg + XPLB_PCI_BUS, 0x000000ff);
iounmap(pci_reg);
diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c
index f7a0645ccb82..4aaff3b3175c 100644
--- a/arch/mips/kernel/uprobes.c
+++ b/arch/mips/kernel/uprobes.c
@@ -224,7 +224,7 @@ unsigned long arch_uretprobe_hijack_return_addr(
int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long vaddr)
{
- return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+ return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
}
void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index f019d3ec0c1c..d00973aab7f1 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -235,6 +235,7 @@ typedef unsigned long elf_greg_t;
#define SET_PERSONALITY(ex) \
({ \
set_personality((current->personality & ~PER_MASK) | PER_LINUX); \
+ clear_thread_flag(TIF_32BIT); \
current->thread.map_base = DEFAULT_MAP_BASE; \
current->thread.task_size = DEFAULT_TASK_SIZE; \
})
@@ -243,9 +244,11 @@ typedef unsigned long elf_greg_t;
#define COMPAT_SET_PERSONALITY(ex) \
({ \
- set_thread_flag(TIF_32BIT); \
- current->thread.map_base = DEFAULT_MAP_BASE32; \
- current->thread.task_size = DEFAULT_TASK_SIZE32; \
+ if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \
+ set_thread_flag(TIF_32BIT); \
+ current->thread.map_base = DEFAULT_MAP_BASE32; \
+ current->thread.task_size = DEFAULT_TASK_SIZE32; \
+ } else clear_thread_flag(TIF_32BIT); \
})
/*
diff --git a/arch/parisc/include/asm/linkage.h b/arch/parisc/include/asm/linkage.h
index 49f6f3d772cc..cd6fe4febead 100644
--- a/arch/parisc/include/asm/linkage.h
+++ b/arch/parisc/include/asm/linkage.h
@@ -22,15 +22,6 @@
name: ASM_NL\
.export name
-#ifdef CONFIG_64BIT
-#define ENDPROC(name) \
- END(name)
-#else
-#define ENDPROC(name) \
- .type name, @function !\
- END(name)
-#endif
-
#define ENTRY_CFI(name, ...) \
ENTRY(name) ASM_NL\
.proc ASM_NL\
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 2dbe5580a1a4..2bd5e695bdad 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -256,11 +256,7 @@ on downward growing arches, it looks like this:
* it in here from the current->personality
*/
-#ifdef CONFIG_64BIT
-#define USER_WIDE_MODE (!test_thread_flag(TIF_32BIT))
-#else
-#define USER_WIDE_MODE 0
-#endif
+#define USER_WIDE_MODE (!is_32bit_task())
#define start_thread(regs, new_pc, new_sp) do { \
elf_addr_t *sp = (elf_addr_t *)new_sp; \
diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h
index e00013248907..8ecc1f0c0483 100644
--- a/arch/parisc/include/asm/traps.h
+++ b/arch/parisc/include/asm/traps.h
@@ -2,7 +2,9 @@
#ifndef __ASM_TRAPS_H
#define __ASM_TRAPS_H
-#ifdef __KERNEL__
+#define PARISC_ITLB_TRAP 6 /* defined by architecture. Do not change. */
+
+#if !defined(__ASSEMBLY__)
struct pt_regs;
/* traps.c */
diff --git a/arch/parisc/include/asm/unwind.h b/arch/parisc/include/asm/unwind.h
index f133b7efbebb..9547e5261a8b 100644
--- a/arch/parisc/include/asm/unwind.h
+++ b/arch/parisc/include/asm/unwind.h
@@ -73,8 +73,10 @@ unwind_table_remove(struct unwind_table *table);
void unwind_frame_init(struct unwind_frame_info *info, struct task_struct *t,
struct pt_regs *regs);
-void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct task_struct *t);
-void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *regs);
+void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info,
+ struct task_struct *t);
+void unwind_frame_init_task(struct unwind_frame_info *info,
+ struct task_struct *task, struct pt_regs *regs);
int unwind_once(struct unwind_frame_info *info);
int unwind_to_user(struct unwind_frame_info *info);
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index c7508f5717fb..242c5ab65611 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -36,6 +36,7 @@
#include <asm/signal.h>
#include <asm/unistd.h>
#include <asm/ldcw.h>
+#include <asm/traps.h>
#include <asm/thread_info.h>
#include <linux/linkage.h>
@@ -692,7 +693,7 @@ ENTRY(fault_vector_20)
def 3
extint 4
def 5
- itlb_20 6
+ itlb_20 PARISC_ITLB_TRAP
def 7
def 8
def 9
@@ -735,7 +736,7 @@ ENTRY(fault_vector_11)
def 3
extint 4
def 5
- itlb_11 6
+ itlb_11 PARISC_ITLB_TRAP
def 7
def 8
def 9
@@ -776,7 +777,7 @@ END(fault_vector_11)
* copy_thread moved args into task save area.
*/
-ENTRY_CFI(ret_from_kernel_thread)
+ENTRY(ret_from_kernel_thread)
/* Call schedule_tail first though */
BL schedule_tail, %r2
nop
@@ -791,7 +792,7 @@ ENTRY_CFI(ret_from_kernel_thread)
copy %r31, %r2
b finish_child_return
nop
-ENDPROC_CFI(ret_from_kernel_thread)
+END(ret_from_kernel_thread)
/*
@@ -815,9 +816,8 @@ ENTRY_CFI(_switch_to)
LDREG TASK_THREAD_INFO(%r25), %r25
bv %r0(%r2)
mtctl %r25,%cr30
-ENDPROC_CFI(_switch_to)
-ENTRY_CFI(_switch_to_ret)
+ENTRY(_switch_to_ret)
mtctl %r0, %cr0 /* Needed for single stepping */
callee_rest
callee_rest_float
@@ -825,7 +825,7 @@ ENTRY_CFI(_switch_to_ret)
LDREG -RP_OFFSET(%r30), %r2
bv %r0(%r2)
copy %r26, %r28
-ENDPROC_CFI(_switch_to_ret)
+ENDPROC_CFI(_switch_to)
/*
* Common rfi return path for interruptions, kernel execve, and
@@ -886,14 +886,12 @@ ENTRY_CFI(syscall_exit_rfi)
STREG %r19,PT_SR5(%r16)
STREG %r19,PT_SR6(%r16)
STREG %r19,PT_SR7(%r16)
-ENDPROC_CFI(syscall_exit_rfi)
-ENTRY_CFI(intr_return)
+ENTRY(intr_return)
/* check for reschedule */
mfctl %cr30,%r1
LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
bb,<,n %r19,31-TIF_NEED_RESCHED,intr_do_resched /* forward */
-ENDPROC_CFI(intr_return)
.import do_notify_resume,code
intr_check_sig:
@@ -1049,6 +1047,7 @@ intr_extint:
b do_cpu_irq_mask
ldo R%intr_return(%r2), %r2 /* return to intr_return, not here */
+ENDPROC_CFI(syscall_exit_rfi)
/* Generic interruptions (illegal insn, unaligned, page fault, etc) */
@@ -1068,21 +1067,12 @@ ENTRY_CFI(intr_save) /* for os_hpmc */
save_specials %r29
/* If this trap is a itlb miss, skip saving/adjusting isr/ior */
-
- /*
- * FIXME: 1) Use a #define for the hardwired "6" below (and in
- * traps.c.
- * 2) Once we start executing code above 4 Gb, we need
- * to adjust iasq/iaoq here in the same way we
- * adjust isr/ior below.
- */
-
- cmpib,COND(=),n 6,%r26,skip_save_ior
+ cmpib,COND(=),n PARISC_ITLB_TRAP,%r26,skip_save_ior
- mfctl %cr20, %r16 /* isr */
+ mfctl %isr, %r16
nop /* serialize mfctl on PA 2.0 to avoid 4 cycle penalty */
- mfctl %cr21, %r17 /* ior */
+ mfctl %ior, %r17
#ifdef CONFIG_64BIT
@@ -1094,22 +1084,34 @@ ENTRY_CFI(intr_save) /* for os_hpmc */
extrd,u,*<> %r8,PSW_W_BIT,1,%r0
depdi 0,1,2,%r17
- /*
- * FIXME: This code has hardwired assumptions about the split
- * between space bits and offset bits. This will change
- * when we allow alternate page sizes.
- */
-
- /* adjust isr/ior. */
- extrd,u %r16,63,SPACEID_SHIFT,%r1 /* get high bits from isr for ior */
- depd %r1,31,SPACEID_SHIFT,%r17 /* deposit them into ior */
- depdi 0,63,SPACEID_SHIFT,%r16 /* clear them from isr */
+ /* adjust isr/ior: get high bits from isr and deposit in ior */
+ space_adjust %r16,%r17,%r1
#endif
STREG %r16, PT_ISR(%r29)
STREG %r17, PT_IOR(%r29)
+#if 0 && defined(CONFIG_64BIT)
+ /* Revisit when we have 64-bit code above 4Gb */
+ b,n intr_save2
skip_save_ior:
+ /* We have a itlb miss, and when executing code above 4 Gb on ILP64, we
+ * need to adjust iasq/iaoq here in the same way we adjusted isr/ior
+ * above.
+ */
+ extrd,u,* %r8,PSW_W_BIT,1,%r1
+ cmpib,COND(=),n 1,%r1,intr_save2
+ LDREG PT_IASQ0(%r29), %r16
+ LDREG PT_IAOQ0(%r29), %r17
+ /* adjust iasq/iaoq */
+ space_adjust %r16,%r17,%r1
+ STREG %r16, PT_IASQ0(%r29)
+ STREG %r17, PT_IAOQ0(%r29)
+#else
+skip_save_ior:
+#endif
+
+intr_save2:
virt_map
save_general %r29
@@ -1747,7 +1749,7 @@ fork_like fork
fork_like vfork
/* Set the return value for the child */
-ENTRY_CFI(child_return)
+ENTRY(child_return)
BL schedule_tail, %r2
nop
finish_child_return:
@@ -1759,7 +1761,7 @@ finish_child_return:
reg_restore %r1
b syscall_exit
copy %r0,%r28
-ENDPROC_CFI(child_return)
+END(child_return)
ENTRY_CFI(sys_rt_sigreturn_wrapper)
LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r26
@@ -1791,7 +1793,7 @@ ENTRY_CFI(sys_rt_sigreturn_wrapper)
LDREG PT_GR28(%r1),%r28 /* reload original r28 for syscall_exit */
ENDPROC_CFI(sys_rt_sigreturn_wrapper)
-ENTRY_CFI(syscall_exit)
+ENTRY(syscall_exit)
/* NOTE: Not all syscalls exit this way. rt_sigreturn will exit
* via syscall_exit_rfi if the signal was received while the process
* was running.
@@ -1990,15 +1992,13 @@ syscall_do_resched:
#else
nop
#endif
-ENDPROC_CFI(syscall_exit)
+END(syscall_exit)
#ifdef CONFIG_FUNCTION_TRACER
.import ftrace_function_trampoline,code
.align L1_CACHE_BYTES
- .globl mcount
- .type mcount, @function
ENTRY_CFI(mcount, caller)
_mcount:
.export _mcount,data
@@ -2027,8 +2027,6 @@ ENDPROC_CFI(mcount)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.align 8
- .globl return_to_handler
- .type return_to_handler, @function
ENTRY_CFI(return_to_handler, caller,frame=FRAME_SIZE)
.export parisc_return_to_handler,data
parisc_return_to_handler:
@@ -2078,6 +2076,7 @@ ENDPROC_CFI(return_to_handler)
/* void call_on_stack(unsigned long param1, void *func,
unsigned long new_stack) */
ENTRY_CFI(call_on_stack, FRAME=2*FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP)
+ENTRY(_call_on_stack)
copy %sp, %r1
/* Regarding the HPPA calling conventions for function pointers,
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 45cc65902fce..82bd0d0927ce 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -288,6 +288,8 @@ void __init collect_boot_cpu_data(void)
printk(KERN_INFO "model %s\n",
boot_cpu_data.pdc.sys_model_name);
+ dump_stack_set_arch_desc("%s", boot_cpu_data.pdc.sys_model_name);
+
boot_cpu_data.hversion = boot_cpu_data.pdc.model.hversion;
boot_cpu_data.sversion = boot_cpu_data.pdc.model.sversion;
diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c
index 2fe914c5f533..ec5835e83a7a 100644
--- a/arch/parisc/kernel/stacktrace.c
+++ b/arch/parisc/kernel/stacktrace.c
@@ -16,20 +16,7 @@ static void dump_trace(struct task_struct *task, struct stack_trace *trace)
{
struct unwind_frame_info info;
- /* initialize unwind info */
- if (task == current) {
- unsigned long sp;
- struct pt_regs r;
-HERE:
- asm volatile ("copy %%r30, %0" : "=r"(sp));
- memset(&r, 0, sizeof(struct pt_regs));
- r.iaoq[0] = (unsigned long)&&HERE;
- r.gr[2] = (unsigned long)__builtin_return_address(0);
- r.gr[30] = sp;
- unwind_frame_init(&info, task, &r);
- } else {
- unwind_frame_init_from_blocked_task(&info, task);
- }
+ unwind_frame_init_task(&info, task, NULL);
/* unwind stack and save entries in stack_trace struct */
trace->nr_entries = 0;
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 43b308cfdf53..376ea0d1b275 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -156,11 +156,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
int do_color_align, last_mmap;
struct vm_unmapped_area_info info;
-#ifdef CONFIG_64BIT
- /* This should only ever run for 32-bit processes. */
- BUG_ON(!test_thread_flag(TIF_32BIT));
-#endif
-
/* requested length too big for entire address space */
if (len > TASK_SIZE)
return -ENOMEM;
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 5f7e57fcaeef..f453997a7b8f 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -108,12 +108,8 @@ linux_gateway_entry:
mtsp %r0,%sr6 /* get kernel space into sr6 */
#ifdef CONFIG_64BIT
- /* for now we can *always* set the W bit on entry to the syscall
- * since we don't support wide userland processes. We could
- * also save the current SM other than in r0 and restore it on
- * exit from the syscall, and also use that value to know
- * whether to do narrow or wide syscalls. -PB
- */
+ /* Store W bit on entry to the syscall in case it's a wide userland
+ * process. */
ssm PSW_SM_W, %r1
extrd,u %r1,PSW_W_BIT,1,%r1
/* sp must be aligned on 4, so deposit the W bit setting into
@@ -227,8 +223,7 @@ linux_gateway_entry:
or,= %r2,%r2,%r2
ldo R%sys_call_table64(%r1), %r19
#else
- ldil L%sys_call_table, %r1
- ldo R%sys_call_table(%r1), %r19
+ load32 sys_call_table, %r19
#endif
comiclr,>> __NR_Linux_syscalls, %r20, %r0
b,n .Lsyscall_nosys
@@ -331,8 +326,6 @@ tracesys_next:
* task->thread.regs.gr[20] above.
*/
copy %ret0,%r20
- ldil L%sys_call_table,%r1
- ldo R%sys_call_table(%r1), %r19
ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */
LDREG TI_TASK(%r1), %r1
@@ -354,6 +347,23 @@ tracesys_next:
comiclr,>> __NR_Linux_syscalls, %r20, %r0
b,n .Ltracesys_nosys
+ /* Note! We cannot use the syscall table that is mapped
+ nearby since the gateway page is mapped execute-only. */
+
+#ifdef CONFIG_64BIT
+ LDREG TASK_PT_GR30(%r1), %r19 /* get users sp back */
+ extrd,u %r19,63,1,%r2 /* W hidden in bottom bit */
+
+ ldil L%sys_call_table, %r1
+ or,= %r2,%r2,%r2
+ addil L%(sys_call_table64-sys_call_table), %r1
+ ldo R%sys_call_table(%r1), %r19
+ or,= %r2,%r2,%r2
+ ldo R%sys_call_table64(%r1), %r19
+#else
+ load32 sys_call_table, %r19
+#endif
+
LDREGX %r20(%r19), %r19
/* If this is a sys_rt_sigreturn call, and the signal was received
@@ -464,16 +474,13 @@ tracesys_sigexit:
lws_start:
#ifdef CONFIG_64BIT
- /* FIXME: If we are a 64-bit kernel just
- * turn this on unconditionally.
- */
ssm PSW_SM_W, %r1
extrd,u %r1,PSW_W_BIT,1,%r1
/* sp must be aligned on 4, so deposit the W bit setting into
* the bottom of sp temporarily */
or,ev %r1,%r30,%r30
- /* Clip LWS number to a 32-bit value always */
+ /* Clip LWS number to a 32-bit value for 32-bit processes */
depdi 0, 31, 32, %r20
#endif
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 318815212518..68f10f87073d 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -45,7 +45,7 @@
#include "../math-emu/math-emu.h" /* for handle_fpe() */
-static void parisc_show_stack(struct task_struct *task, unsigned long *sp,
+static void parisc_show_stack(struct task_struct *task,
struct pt_regs *regs);
static int printbinary(char *buf, unsigned long x, int nbits)
@@ -152,7 +152,7 @@ void show_regs(struct pt_regs *regs)
printk("%s IAOQ[1]: %pS\n", level, (void *) regs->iaoq[1]);
printk("%s RP(r2): %pS\n", level, (void *) regs->gr[2]);
- parisc_show_stack(current, NULL, regs);
+ parisc_show_stack(current, regs);
}
}
@@ -185,44 +185,19 @@ static void do_show_stack(struct unwind_frame_info *info)
printk(KERN_CRIT "\n");
}
-static void parisc_show_stack(struct task_struct *task, unsigned long *sp,
+static void parisc_show_stack(struct task_struct *task,
struct pt_regs *regs)
{
struct unwind_frame_info info;
- struct task_struct *t;
- t = task ? task : current;
- if (regs) {
- unwind_frame_init(&info, t, regs);
- goto show_stack;
- }
-
- if (t == current) {
- unsigned long sp;
-
-HERE:
- asm volatile ("copy %%r30, %0" : "=r"(sp));
- {
- struct pt_regs r;
-
- memset(&r, 0, sizeof(struct pt_regs));
- r.iaoq[0] = (unsigned long)&&HERE;
- r.gr[2] = (unsigned long)__builtin_return_address(0);
- r.gr[30] = sp;
-
- unwind_frame_init(&info, current, &r);
- }
- } else {
- unwind_frame_init_from_blocked_task(&info, t);
- }
+ unwind_frame_init_task(&info, task, regs);
-show_stack:
do_show_stack(&info);
}
void show_stack(struct task_struct *t, unsigned long *sp)
{
- return parisc_show_stack(t, sp, NULL);
+ parisc_show_stack(t, NULL);
}
int is_valid_bugaddr(unsigned long iaoq)
@@ -557,7 +532,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
cpu_lpmc(5, regs);
return;
- case 6:
+ case PARISC_ITLB_TRAP:
/* Instruction TLB miss fault/Instruction page fault */
fault_address = regs->iaoq[0];
fault_space = regs->iasq[0];
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5cdf13069dd9..f329b466e68f 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -209,6 +209,8 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
* We have to use void * instead of a function pointer, because
* function pointers aren't a pointer to the function on 64-bit.
* Make them const so the compiler knows they live in .text
+ * Note: We could use dereference_kernel_function_descriptor()
+ * instead but we want to keep it simple here.
*/
extern void * const handle_interruption;
extern void * const ret_from_kernel_thread;
@@ -216,7 +218,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
extern void * const intr_return;
extern void * const _switch_to_ret;
#ifdef CONFIG_IRQSTACKS
- extern void * const call_on_stack;
+ extern void * const _call_on_stack;
#endif /* CONFIG_IRQSTACKS */
if (pc == (unsigned long) &handle_interruption) {
@@ -251,7 +253,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int
}
#ifdef CONFIG_IRQSTACKS
- if (pc == (unsigned long) &call_on_stack) {
+ if (pc == (unsigned long) &_call_on_stack) {
info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ);
info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET);
return 1;
@@ -403,9 +405,31 @@ void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct
kfree(r2);
}
-void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *regs)
+#define get_parisc_stackpointer() ({ \
+ unsigned long sp; \
+ __asm__("copy %%r30, %0" : "=r"(sp)); \
+ (sp); \
+})
+
+void unwind_frame_init_task(struct unwind_frame_info *info,
+ struct task_struct *task, struct pt_regs *regs)
{
- unwind_frame_init(info, current, regs);
+ task = task ? task : current;
+
+ if (task == current) {
+ struct pt_regs r;
+
+ if (!regs) {
+ memset(&r, 0, sizeof(r));
+ r.iaoq[0] = _THIS_IP_;
+ r.gr[2] = _RET_IP_;
+ r.gr[30] = get_parisc_stackpointer();
+ regs = &r;
+ }
+ unwind_frame_init(info, task, &r);
+ } else {
+ unwind_frame_init_from_blocked_task(info, task);
+ }
}
int unwind_once(struct unwind_frame_info *next_frame)
@@ -442,19 +466,12 @@ int unwind_to_user(struct unwind_frame_info *info)
unsigned long return_address(unsigned int level)
{
struct unwind_frame_info info;
- struct pt_regs r;
- unsigned long sp;
/* initialize unwind info */
- asm volatile ("copy %%r30, %0" : "=r"(sp));
- memset(&r, 0, sizeof(struct pt_regs));
- r.iaoq[0] = _THIS_IP_;
- r.gr[2] = _RET_IP_;
- r.gr[30] = sp;
- unwind_frame_init(&info, current, &r);
+ unwind_frame_init_task(&info, current, NULL);
/* unwind stack */
- ++level;
+ level += 2;
do {
if (unwind_once(&info) < 0 || info.ip == 0)
return 0;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a80669209155..db0b6eebbfa5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -177,6 +177,7 @@ config PPC
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_CBPF_JIT if !PPC64
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 1f345a0b6ba2..83a9aa3cf689 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
#define SPLIT_HACK_MASK 0xff000000
#define SPLIT_HACK_OFFS 0xfb000000
+/*
+ * This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
+ * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
+ * (but not its actual threading mode, which is not available) to avoid
+ * collisions.
+ *
+ * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
+ * 0) unchanged: if the guest is filling each VCORE completely then it will be
+ * using consecutive IDs and it will fill the space without any packing.
+ *
+ * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
+ * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
+ * added to avoid collisions.
+ *
+ * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
+ * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
+ * can be safely packed into the second half of each VCORE by adding an offset
+ * of (stride / 2).
+ *
+ * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
+ * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
+ * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
+ *
+ * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
+ * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
+ * must be free to use.
+ *
+ * (The offsets for each block are stored in block_offsets[], indexed by the
+ * block number if the stride is 8. For cases where the guest's stride is less
+ * than 8, we can re-use the block_offsets array by multiplying the block
+ * number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
+ */
+static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
+{
+ const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
+ int stride = kvm->arch.emul_smt_mode;
+ int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
+ u32 packed_id;
+
+ if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
+ return 0;
+ packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
+ if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
+ return 0;
+ return packed_id;
+}
+
#endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index fa4efa7e88f7..906bcbdfd2a1 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -42,7 +42,14 @@
#define KVM_USER_MEM_SLOTS 512
#include <asm/cputhreads.h>
-#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
+#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
+
+#else
+#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -672,7 +679,7 @@ struct kvm_vcpu_arch {
gva_t vaddr_accessed;
pgd_t *pgdir;
- u8 io_gpr; /* GPR used as IO source/target */
+ u16 io_gpr; /* GPR used as IO source/target */
u8 mmio_host_swabbed;
u8 mmio_sign_extend;
/* conversion between single and double precision */
@@ -688,7 +695,6 @@ struct kvm_vcpu_arch {
*/
u8 mmio_vsx_copy_nums;
u8 mmio_vsx_offset;
- u8 mmio_vsx_tx_sx_enabled;
u8 mmio_vmx_copy_nums;
u8 mmio_vmx_offset;
u8 mmio_copy_type;
@@ -801,14 +807,14 @@ struct kvm_vcpu_arch {
#define KVMPPC_VCPU_BUSY_IN_HOST 2
/* Values for vcpu->arch.io_gpr */
-#define KVM_MMIO_REG_MASK 0x001f
-#define KVM_MMIO_REG_EXT_MASK 0xffe0
+#define KVM_MMIO_REG_MASK 0x003f
+#define KVM_MMIO_REG_EXT_MASK 0xffc0
#define KVM_MMIO_REG_GPR 0x0000
-#define KVM_MMIO_REG_FPR 0x0020
-#define KVM_MMIO_REG_QPR 0x0040
-#define KVM_MMIO_REG_FQPR 0x0060
-#define KVM_MMIO_REG_VSX 0x0080
-#define KVM_MMIO_REG_VMX 0x00c0
+#define KVM_MMIO_REG_FPR 0x0040
+#define KVM_MMIO_REG_QPR 0x0080
+#define KVM_MMIO_REG_FQPR 0x00c0
+#define KVM_MMIO_REG_VSX 0x0100
+#define KVM_MMIO_REG_VMX 0x0180
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 486b7c83b8c5..e5b314ed054e 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -163,7 +163,7 @@
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
-#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
+#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index e8afdd4f4814..9a3f2646ecc7 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -179,7 +179,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
if ((tbltmp->it_page_shift <= stt->page_shift) &&
(tbltmp->it_offset << tbltmp->it_page_shift ==
stt->offset << stt->page_shift) &&
- (tbltmp->it_size << tbltmp->it_page_shift ==
+ (tbltmp->it_size << tbltmp->it_page_shift >=
stt->size << stt->page_shift)) {
/*
* Reference the table to avoid races with
@@ -295,7 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
{
struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter;
- unsigned long npages, size;
+ unsigned long npages, size = args->size;
int ret = -ENOMEM;
int i;
@@ -303,7 +303,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
return -EINVAL;
- size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 96f1cc6c97b7..574fc1dcb2bf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -127,14 +127,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
* and SPURR count and should be set according to the number of
* online threads in the vcore being run.
*/
-#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9
-#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9
-#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA
+#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
+#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
+#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
RWMR_RPA_P8_1THREAD,
@@ -1807,7 +1807,7 @@ static int threads_per_vcore(struct kvm *kvm)
return threads_per_subcore;
}
-static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
{
struct kvmppc_vcore *vcore;
@@ -1821,7 +1821,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
- vcore->first_vcpuid = core * kvm->arch.smt_mode;
+ vcore->first_vcpuid = id;
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@@ -2037,12 +2037,26 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
mutex_lock(&kvm->lock);
vcore = NULL;
err = -EINVAL;
- core = id / kvm->arch.smt_mode;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
+ pr_devel("KVM: VCPU ID too high\n");
+ core = KVM_MAX_VCORES;
+ } else {
+ BUG_ON(kvm->arch.smt_mode != 1);
+ core = kvmppc_pack_vcpu_id(kvm, id);
+ }
+ } else {
+ core = id / kvm->arch.smt_mode;
+ }
if (core < KVM_MAX_VCORES) {
vcore = kvm->arch.vcores[core];
- if (!vcore) {
+ if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
+ pr_devel("KVM: collision on id %u", id);
+ vcore = NULL;
+ } else if (!vcore) {
err = -ENOMEM;
- vcore = kvmppc_vcore_create(kvm, core);
+ vcore = kvmppc_vcore_create(kvm,
+ id & ~(kvm->arch.smt_mode - 1));
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
}
@@ -4550,6 +4564,8 @@ static int kvmppc_book3s_init_hv(void)
pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
return -ENODEV;
}
+ /* presence of intc confirmed - node can be dropped again */
+ of_node_put(np);
}
#endif
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index f9818d7d3381..126f02b3ffb8 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
return -EBUSY;
}
+static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
+{
+ return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
+}
+
static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb,
struct kvmppc_xive_irq_state *state)
@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
- xive->vp_base + state->act_server,
+ xive_vp(xive, state->act_server),
MASKED, state->number);
/* set old_p so we can track if an H_EOI was done */
state->old_p = true;
@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
- xive->vp_base + state->act_server,
+ xive_vp(xive, state->act_server),
state->act_priority, state->number);
/* If an EOI is needed, do it here */
if (!state->old_p)
@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
kvmppc_xive_select_irq(state, &hw_num, NULL);
return xive_native_configure_irq(hw_num,
- xive->vp_base + server,
+ xive_vp(xive, server),
prio, state->number);
}
@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
* which is fine for a never started interrupt.
*/
xive_native_configure_irq(hw_irq,
- xive->vp_base + state->act_server,
+ xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
/* Reconfigure the IPI */
xive_native_configure_irq(state->ipi_number,
- xive->vp_base + state->act_server,
+ xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
pr_devel("Duplicate !\n");
return -EEXIST;
}
- if (cpu >= KVM_MAX_VCPUS) {
+ if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
pr_devel("Out of bounds !\n");
return -EINVAL;
}
@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
xc->xive = xive;
xc->vcpu = vcpu;
xc->server_num = cpu;
- xc->vp_id = xive->vp_base + cpu;
+ xc->vp_id = xive_vp(xive, cpu);
xc->mfrr = 0xff;
xc->valid = true;
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index afde788be141..75dce1ef3bc8 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -106,7 +106,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
* if mmio_vsx_tx_sx_enabled == 1, copy data between
* VSR[32..63] and memory
*/
- vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
@@ -242,8 +241,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
}
emulated = kvmppc_handle_vsx_load(run, vcpu,
- KVM_MMIO_REG_VSX | (op.reg & 0x1f),
- io_size_each, 1, op.type & SIGNEXT);
+ KVM_MMIO_REG_VSX|op.reg, io_size_each,
+ 1, op.type & SIGNEXT);
break;
}
#endif
@@ -363,7 +362,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
}
emulated = kvmppc_handle_vsx_store(run, vcpu,
- op.reg & 0x1f, io_size_each, 1);
+ op.reg, io_size_each, 1);
break;
}
#endif
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3ccc386b380d..eba5756d5b41 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -879,10 +879,10 @@ static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
if (offset == -1)
return;
- if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
- val.vval = VCPU_VSX_VR(vcpu, index);
+ if (index >= 32) {
+ val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsxval[offset] = gpr;
- VCPU_VSX_VR(vcpu, index) = val.vval;
+ VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
VCPU_VSX_FPR(vcpu, index, offset) = gpr;
}
@@ -894,11 +894,11 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
union kvmppc_one_reg val;
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
- if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
- val.vval = VCPU_VSX_VR(vcpu, index);
+ if (index >= 32) {
+ val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsxval[0] = gpr;
val.vsxval[1] = gpr;
- VCPU_VSX_VR(vcpu, index) = val.vval;
+ VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
VCPU_VSX_FPR(vcpu, index, 0) = gpr;
VCPU_VSX_FPR(vcpu, index, 1) = gpr;
@@ -911,12 +911,12 @@ static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu,
union kvmppc_one_reg val;
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
- if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ if (index >= 32) {
val.vsx32val[0] = gpr;
val.vsx32val[1] = gpr;
val.vsx32val[2] = gpr;
val.vsx32val[3] = gpr;
- VCPU_VSX_VR(vcpu, index) = val.vval;
+ VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
val.vsx32val[0] = gpr;
val.vsx32val[1] = gpr;
@@ -936,10 +936,10 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
if (offset == -1)
return;
- if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
- val.vval = VCPU_VSX_VR(vcpu, index);
+ if (index >= 32) {
+ val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsx32val[offset] = gpr32;
- VCPU_VSX_VR(vcpu, index) = val.vval;
+ VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
dword_offset = offset / 2;
word_offset = offset % 2;
@@ -1360,10 +1360,10 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
break;
}
- if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ if (rs < 32) {
*val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
} else {
- reg.vval = VCPU_VSX_VR(vcpu, rs);
+ reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
*val = reg.vsxval[vsx_offset];
}
break;
@@ -1377,13 +1377,13 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
break;
}
- if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ if (rs < 32) {
dword_offset = vsx_offset / 2;
word_offset = vsx_offset % 2;
reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
*val = reg.vsx32val[word_offset];
} else {
- reg.vval = VCPU_VSX_VR(vcpu, rs);
+ reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
*val = reg.vsx32val[vsx_offset];
}
break;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a6afa60074cb..054b29c9a533 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -135,6 +135,7 @@ config S390
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_FENTRY
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
@@ -157,6 +158,7 @@ config S390
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MEMBLOCK_PHYS_MAP
select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_NOP_MCOUNT
select HAVE_OPROFILE
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index eee6703093c3..ba6d122526fb 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -96,13 +96,15 @@ ifdef CONFIG_EXPOLINE
endif
ifdef CONFIG_FUNCTION_TRACER
-# make use of hotpatch feature if the compiler supports it
-cc_hotpatch := -mhotpatch=0,3
-ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
-CC_FLAGS_FTRACE := $(cc_hotpatch)
-KBUILD_AFLAGS += -DCC_USING_HOTPATCH
-KBUILD_CFLAGS += -DCC_USING_HOTPATCH
-endif
+ ifeq ($(call cc-option-yn,-mfentry -mnop-mcount),n)
+ # make use of hotpatch feature if the compiler supports it
+ cc_hotpatch := -mhotpatch=0,3
+ ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
+ CC_FLAGS_FTRACE := $(cc_hotpatch)
+ KBUILD_AFLAGS += -DCC_USING_HOTPATCH
+ KBUILD_CFLAGS += -DCC_USING_HOTPATCH
+ endif
+ endif
endif
# Test CFI features of binutils
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index cfccc0edd00d..8ea270fdc7fb 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -4,7 +4,7 @@
#define ARCH_SUPPORTS_FTRACE_OPS 1
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
#define MCOUNT_INSN_SIZE 6
#else
#define MCOUNT_INSN_SIZE 24
@@ -42,7 +42,7 @@ struct ftrace_insn {
static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
{
#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
/* brcl 0,0 */
insn->opc = 0xc004;
insn->disp = 0;
@@ -57,7 +57,7 @@ static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
static inline int is_ftrace_nop(struct ftrace_insn *insn)
{
#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
if (insn->disp == 0)
return 1;
#else
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a2188e309bd6..29c940bf8506 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -269,6 +269,7 @@ struct kvm_s390_sie_block {
__u8 reserved1c0[8]; /* 0x01c0 */
#define ECD_HOSTREGMGMT 0x20000000
#define ECD_MEF 0x08000000
+#define ECD_ETOKENF 0x02000000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
@@ -655,6 +656,7 @@ struct kvm_vcpu_arch {
seqcount_t cputm_seqcount;
__u64 cputm_start;
bool gs_enabled;
+ bool skey_enabled;
};
struct kvm_vm_stat {
@@ -793,12 +795,6 @@ struct kvm_s390_vsie {
struct page *pages[KVM_MAX_VCPUS];
};
-struct kvm_s390_migration_state {
- unsigned long bitmap_size; /* in bits (number of guest pages) */
- atomic64_t dirty_pages; /* number of dirty pages */
- unsigned long *pgste_bitmap;
-};
-
struct kvm_arch{
void *sca;
int use_esca;
@@ -828,7 +824,8 @@ struct kvm_arch{
struct kvm_s390_vsie vsie;
u8 epdx;
u64 epoch;
- struct kvm_s390_migration_state *migration_state;
+ int migration_mode;
+ atomic64_t cmma_dirty_pages;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
struct kvm_s390_gisa *gisa;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 4cdaa55fabfe..9a50f02b9894 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -4,7 +4,7 @@
/*
* KVM s390 specific structures and definitions
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_FPRS (1UL << 8)
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
#define SDNXL (1UL << SDNXC)
@@ -258,6 +259,8 @@ struct kvm_sync_regs {
struct {
__u64 reserved1[2];
__u64 gscb[4];
+ __u64 etoken;
+ __u64 etoken_extension;
};
};
};
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index dc76d813e420..84be7f02d0c2 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -61,7 +61,7 @@ unsigned long ftrace_plt;
static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
{
-#ifdef CC_USING_HOTPATCH
+#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)
/* brcl 0,0 */
insn->opc = 0xc004;
insn->disp = 0;
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 27110f3294ed..e93fbf02490c 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -35,7 +35,7 @@ ENTRY(ftrace_caller)
.globl ftrace_regs_caller
.set ftrace_regs_caller,ftrace_caller
lgr %r1,%r15
-#ifndef CC_USING_HOTPATCH
+#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
aghi %r0,MCOUNT_RETURN_FIXUP
#endif
aghi %r15,-STACK_FRAME_SIZE
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index cb198d4a6dca..5c53e977be62 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -665,7 +665,7 @@ static void cpumsf_output_event_pid(struct perf_event *event,
goto out;
/* Update the process ID (see also kernel/events/core.c) */
- data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID);
+ data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
perf_output_sample(&handle, &header, data, event);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f9d90337e64a..91ad4a9425c0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -906,54 +906,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
*/
static int kvm_s390_vm_start_migration(struct kvm *kvm)
{
- struct kvm_s390_migration_state *mgs;
struct kvm_memory_slot *ms;
- /* should be the only one */
struct kvm_memslots *slots;
- unsigned long ram_pages;
+ unsigned long ram_pages = 0;
int slotnr;
/* migration mode already enabled */
- if (kvm->arch.migration_state)
+ if (kvm->arch.migration_mode)
return 0;
-
slots = kvm_memslots(kvm);
if (!slots || !slots->used_slots)
return -EINVAL;
- mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
- if (!mgs)
- return -ENOMEM;
- kvm->arch.migration_state = mgs;
-
- if (kvm->arch.use_cmma) {
+ if (!kvm->arch.use_cmma) {
+ kvm->arch.migration_mode = 1;
+ return 0;
+ }
+ /* mark all the pages in active slots as dirty */
+ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+ ms = slots->memslots + slotnr;
/*
- * Get the first slot. They are reverse sorted by base_gfn, so
- * the first slot is also the one at the end of the address
- * space. We have verified above that at least one slot is
- * present.
+ * The second half of the bitmap is only used on x86,
+ * and would be wasted otherwise, so we put it to good
+ * use here to keep track of the state of the storage
+ * attributes.
*/
- ms = slots->memslots;
- /* round up so we only use full longs */
- ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
- /* allocate enough bytes to store all the bits */
- mgs->pgste_bitmap = vmalloc(ram_pages / 8);
- if (!mgs->pgste_bitmap) {
- kfree(mgs);
- kvm->arch.migration_state = NULL;
- return -ENOMEM;
- }
-
- mgs->bitmap_size = ram_pages;
- atomic64_set(&mgs->dirty_pages, ram_pages);
- /* mark all the pages in active slots as dirty */
- for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
- ms = slots->memslots + slotnr;
- bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
- }
-
- kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+ memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
+ ram_pages += ms->npages;
}
+ atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
+ kvm->arch.migration_mode = 1;
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
return 0;
}
@@ -963,21 +946,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
*/
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
{
- struct kvm_s390_migration_state *mgs;
-
/* migration mode already disabled */
- if (!kvm->arch.migration_state)
+ if (!kvm->arch.migration_mode)
return 0;
- mgs = kvm->arch.migration_state;
- kvm->arch.migration_state = NULL;
-
- if (kvm->arch.use_cmma) {
+ kvm->arch.migration_mode = 0;
+ if (kvm->arch.use_cmma)
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
- /* We have to wait for the essa emulation to finish */
- synchronize_srcu(&kvm->srcu);
- vfree(mgs->pgste_bitmap);
- }
- kfree(mgs);
return 0;
}
@@ -1005,7 +979,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
static int kvm_s390_vm_get_migration(struct kvm *kvm,
struct kvm_device_attr *attr)
{
- u64 mig = (kvm->arch.migration_state != NULL);
+ u64 mig = kvm->arch.migration_mode;
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
return -ENXIO;
@@ -1653,6 +1627,134 @@ out:
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
/*
+ * Similar to gfn_to_memslot, but returns the index of a memslot also when the
+ * address falls in a hole. In that case the index of one of the memslots
+ * bordering the hole is returned.
+ */
+static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
+{
+ int start = 0, end = slots->used_slots;
+ int slot = atomic_read(&slots->lru_slot);
+ struct kvm_memory_slot *memslots = slots->memslots;
+
+ if (gfn >= memslots[slot].base_gfn &&
+ gfn < memslots[slot].base_gfn + memslots[slot].npages)
+ return slot;
+
+ while (start < end) {
+ slot = start + (end - start) / 2;
+
+ if (gfn >= memslots[slot].base_gfn)
+ end = slot;
+ else
+ start = slot + 1;
+ }
+
+ if (gfn >= memslots[start].base_gfn &&
+ gfn < memslots[start].base_gfn + memslots[start].npages) {
+ atomic_set(&slots->lru_slot, start);
+ }
+
+ return start;
+}
+
+static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+ u8 *res, unsigned long bufsize)
+{
+ unsigned long pgstev, hva, cur_gfn = args->start_gfn;
+
+ args->count = 0;
+ while (args->count < bufsize) {
+ hva = gfn_to_hva(kvm, cur_gfn);
+ /*
+ * We return an error if the first value was invalid, but we
+ * return successfully if at least one value was copied.
+ */
+ if (kvm_is_error_hva(hva))
+ return args->count ? 0 : -EFAULT;
+ if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+ pgstev = 0;
+ res[args->count++] = (pgstev >> 24) & 0x43;
+ cur_gfn++;
+ }
+
+ return 0;
+}
+
+static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
+ unsigned long cur_gfn)
+{
+ int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
+ struct kvm_memory_slot *ms = slots->memslots + slotidx;
+ unsigned long ofs = cur_gfn - ms->base_gfn;
+
+ if (ms->base_gfn + ms->npages <= cur_gfn) {
+ slotidx--;
+ /* If we are above the highest slot, wrap around */
+ if (slotidx < 0)
+ slotidx = slots->used_slots - 1;
+
+ ms = slots->memslots + slotidx;
+ ofs = 0;
+ }
+ ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
+ while ((slotidx > 0) && (ofs >= ms->npages)) {
+ slotidx--;
+ ms = slots->memslots + slotidx;
+ ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
+ }
+ return ms->base_gfn + ofs;
+}
+
+static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+ u8 *res, unsigned long bufsize)
+{
+ unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *ms;
+
+ cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
+ ms = gfn_to_memslot(kvm, cur_gfn);
+ args->count = 0;
+ args->start_gfn = cur_gfn;
+ if (!ms)
+ return 0;
+ next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+ mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
+
+ while (args->count < bufsize) {
+ hva = gfn_to_hva(kvm, cur_gfn);
+ if (kvm_is_error_hva(hva))
+ return 0;
+ /* Decrement only if we actually flipped the bit to 0 */
+ if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+ atomic64_dec(&kvm->arch.cmma_dirty_pages);
+ if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+ pgstev = 0;
+ /* Save the value */
+ res[args->count++] = (pgstev >> 24) & 0x43;
+ /* If the next bit is too far away, stop. */
+ if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
+ return 0;
+ /* If we reached the previous "next", find the next one */
+ if (cur_gfn == next_gfn)
+ next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+ /* Reached the end of memory or of the buffer, stop */
+ if ((next_gfn >= mem_end) ||
+ (next_gfn - args->start_gfn >= bufsize))
+ return 0;
+ cur_gfn++;
+ /* Reached the end of the current memslot, take the next one. */
+ if (cur_gfn - ms->base_gfn >= ms->npages) {
+ ms = gfn_to_memslot(kvm, cur_gfn);
+ if (!ms)
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
* This function searches for the next page with dirty CMMA attributes, and
* saves the attributes in the buffer up to either the end of the buffer or
* until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
@@ -1663,22 +1765,18 @@ out:
static int kvm_s390_get_cmma_bits(struct kvm *kvm,
struct kvm_s390_cmma_log *args)
{
- struct kvm_s390_migration_state *s = kvm->arch.migration_state;
- unsigned long bufsize, hva, pgstev, i, next, cur;
- int srcu_idx, peek, r = 0, rr;
- u8 *res;
-
- cur = args->start_gfn;
- i = next = pgstev = 0;
+ unsigned long bufsize;
+ int srcu_idx, peek, ret;
+ u8 *values;
- if (unlikely(!kvm->arch.use_cmma))
+ if (!kvm->arch.use_cmma)
return -ENXIO;
/* Invalid/unsupported flags were specified */
if (args->flags & ~KVM_S390_CMMA_PEEK)
return -EINVAL;
/* Migration mode query, and we are not doing a migration */
peek = !!(args->flags & KVM_S390_CMMA_PEEK);
- if (!peek && !s)
+ if (!peek && !kvm->arch.migration_mode)
return -EINVAL;
/* CMMA is disabled or was not used, or the buffer has length zero */
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
@@ -1686,74 +1784,35 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
memset(args, 0, sizeof(*args));
return 0;
}
-
- if (!peek) {
- /* We are not peeking, and there are no dirty pages */
- if (!atomic64_read(&s->dirty_pages)) {
- memset(args, 0, sizeof(*args));
- return 0;
- }
- cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
- args->start_gfn);
- if (cur >= s->bitmap_size) /* nothing found, loop back */
- cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
- if (cur >= s->bitmap_size) { /* again! (very unlikely) */
- memset(args, 0, sizeof(*args));
- return 0;
- }
- next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
+ /* We are not peeking, and there are no dirty pages */
+ if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
+ memset(args, 0, sizeof(*args));
+ return 0;
}
- res = vmalloc(bufsize);
- if (!res)
+ values = vmalloc(bufsize);
+ if (!values)
return -ENOMEM;
- args->start_gfn = cur;
-
down_read(&kvm->mm->mmap_sem);
srcu_idx = srcu_read_lock(&kvm->srcu);
- while (i < bufsize) {
- hva = gfn_to_hva(kvm, cur);
- if (kvm_is_error_hva(hva)) {
- r = -EFAULT;
- break;
- }
- /* decrement only if we actually flipped the bit to 0 */
- if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
- atomic64_dec(&s->dirty_pages);
- r = get_pgste(kvm->mm, hva, &pgstev);
- if (r < 0)
- pgstev = 0;
- /* save the value */
- res[i++] = (pgstev >> 24) & 0x43;
- /*
- * if the next bit is too far away, stop.
- * if we reached the previous "next", find the next one
- */
- if (!peek) {
- if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
- break;
- if (cur == next)
- next = find_next_bit(s->pgste_bitmap,
- s->bitmap_size, cur + 1);
- /* reached the end of the bitmap or of the buffer, stop */
- if ((next >= s->bitmap_size) ||
- (next >= args->start_gfn + bufsize))
- break;
- }
- cur++;
- }
+ if (peek)
+ ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
+ else
+ ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
srcu_read_unlock(&kvm->srcu, srcu_idx);
up_read(&kvm->mm->mmap_sem);
- args->count = i;
- args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
- rr = copy_to_user((void __user *)args->values, res, args->count);
- if (rr)
- r = -EFAULT;
+ if (kvm->arch.migration_mode)
+ args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
+ else
+ args->remaining = 0;
- vfree(res);
- return r;
+ if (copy_to_user((void __user *)args->values, values, args->count))
+ ret = -EFAULT;
+
+ vfree(values);
+ return ret;
}
/*
@@ -2192,10 +2251,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
- if (kvm->arch.migration_state) {
- vfree(kvm->arch.migration_state->pgste_bitmap);
- kfree(kvm->arch.migration_state);
- }
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
}
@@ -2353,6 +2408,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
if (test_kvm_facility(vcpu->kvm, 133))
vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
+ if (test_kvm_facility(vcpu->kvm, 156))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
* MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
*/
@@ -2602,7 +2659,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
}
if (test_kvm_facility(vcpu->kvm, 139))
vcpu->arch.sie_block->ecd |= ECD_MEF;
-
+ if (test_kvm_facility(vcpu->kvm, 156))
+ vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
if (vcpu->arch.sie_block->gd) {
vcpu->arch.sie_block->eca |= ECA_AIV;
VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
@@ -3520,6 +3578,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
preempt_enable();
}
+ /* SIE will load etoken directly from SDNX and therefore kvm_run */
kvm_run->kvm_dirty_regs = 0;
}
@@ -3559,7 +3618,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
__ctl_clear_bit(2, 4);
vcpu->arch.host_gscb = NULL;
}
-
+ /* SIE will save etoken directly into SDNX and therefore kvm_run */
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index cfc5a62329f6..d68f10441a16 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -205,13 +205,10 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
{
int rc;
- struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
trace_kvm_s390_skey_related_inst(vcpu);
/* Already enabled? */
- if (vcpu->kvm->arch.use_skf &&
- !(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
- !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+ if (vcpu->arch.skey_enabled)
return 0;
rc = s390_enable_skey();
@@ -222,9 +219,10 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
if (!vcpu->kvm->arch.use_skf)
- sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+ vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
else
- sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ vcpu->arch.skey_enabled = true;
return 0;
}
@@ -987,7 +985,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
- if (clear_user((void __user *)vmaddr, PAGE_SIZE))
+ if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
@@ -1024,9 +1022,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return 0;
}
-static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
+/*
+ * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
+ */
+static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
{
- struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state;
int r1, r2, nappended, entries;
unsigned long gfn, hva, res, pgstev, ptev;
unsigned long *cbrlo;
@@ -1076,10 +1076,12 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
cbrlo[entries] = gfn << PAGE_SHIFT;
}
- if (orc && gfn < ms->bitmap_size) {
- /* increment only if we are really flipping the bit to 1 */
- if (!test_and_set_bit(gfn, ms->pgste_bitmap))
- atomic64_inc(&ms->dirty_pages);
+ if (orc) {
+ struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn);
+
+ /* Increment only if we are really flipping the bit */
+ if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+ atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages);
}
return nappended;
@@ -1108,7 +1110,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
: ESSA_SET_STABLE_IF_RESIDENT))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- if (likely(!vcpu->kvm->arch.migration_state)) {
+ if (!vcpu->kvm->arch.migration_mode) {
/*
* CMMA is enabled in the KVM settings, but is disabled in
* the SIE block and in the mm_context, and we are not doing
@@ -1136,10 +1138,16 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* Retry the ESSA instruction */
kvm_s390_retry_instr(vcpu);
} else {
- /* Account for the possible extra cbrl entry */
- i = do_essa(vcpu, orc);
+ int srcu_idx;
+
+ down_read(&vcpu->kvm->mm->mmap_sem);
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ i = __do_essa(vcpu, orc);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ up_read(&vcpu->kvm->mm->mmap_sem);
if (i < 0)
return i;
+ /* Account for the possible extra cbrl entry */
entries += i;
}
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 84c89cb9636f..63844b95c22c 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -2,7 +2,7 @@
/*
* kvm nested virtualization support for s390x
*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2018
*
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
*/
@@ -378,6 +378,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (test_kvm_facility(vcpu->kvm, 139))
scb_s->ecd |= scb_o->ecd & ECD_MEF;
+ /* etoken */
+ if (test_kvm_facility(vcpu->kvm, 156))
+ scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
+
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
out:
@@ -627,7 +631,8 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vsie_page->riccbd_gpa = gpa;
scb_s->riccbd = hpa;
}
- if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
+ if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) ||
+ (scb_s->ecd & ECD_ETOKENF)) {
unsigned long sdnxc;
gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL;
@@ -818,6 +823,8 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* - < 0 if an error occurred
*/
static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+ __releases(vcpu->kvm->srcu)
+ __acquires(vcpu->kvm->srcu)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 90a8c9e84ca6..0c85aedcf9b3 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -4,7 +4,7 @@
* numbering scheme from the Princples of Operations: most significant bit
* has bit number 0.
*
- * Copyright IBM Corp. 2015
+ * Copyright IBM Corp. 2015, 2018
*
*/
@@ -106,6 +106,7 @@ static struct facility_def facility_defs[] = {
.name = "FACILITIES_KVM_CPUMODEL",
.bits = (int[]){
+ 156, /* etoken facility */
-1 /* END */
}
},
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 2d58c26bff9a..e6f2a38d2e61 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -45,9 +45,13 @@ config SPARC
select LOCKDEP_SMALL if LOCKDEP
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
+ select HAVE_MEMBLOCK
+ select NO_BOOTMEM
config SPARC32
def_bool !64BIT
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select DMA_NONCOHERENT_OPS
select GENERIC_ATOMIC64
select CLZ_TAB
select HAVE_UID16
@@ -60,7 +64,6 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
- select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
@@ -79,7 +82,6 @@ config SPARC64
select IRQ_PREFLOW_FASTEOI
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select HAVE_C_RECORDMCOUNT
- select NO_BOOTMEM
select HAVE_ARCH_AUDITSYSCALL
select ARCH_SUPPORTS_ATOMIC_RMW
select HAVE_NMI
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 966a13d2b127..e32ef20de567 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -9,10 +9,10 @@
# Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
# We are not yet configured - so test on arch
-ifeq ($(ARCH),sparc)
- KBUILD_DEFCONFIG := sparc32_defconfig
-else
+ifeq ($(ARCH),sparc64)
KBUILD_DEFCONFIG := sparc64_defconfig
+else
+ KBUILD_DEFCONFIG := sparc32_defconfig
endif
ifeq ($(CONFIG_SPARC32),y)
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index 12ae33daf52f..e17566376934 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -7,7 +7,6 @@
#include <linux/dma-debug.h>
extern const struct dma_map_ops *dma_ops;
-extern const struct dma_map_ops pci32_dma_ops;
extern struct bus_type pci_bus_type;
@@ -15,11 +14,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
#ifdef CONFIG_SPARC_LEON
if (sparc_cpu_model == sparc_leon)
- return &pci32_dma_ops;
+ return &dma_noncoherent_ops;
#endif
#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
if (bus == &pci_bus_type)
- return &pci32_dma_ops;
+ return &dma_noncoherent_ops;
#endif
return dma_ops;
}
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index cca9134cfa7d..6799c93c9f27 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -38,6 +38,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/scatterlist.h>
+#include <linux/dma-noncoherent.h>
#include <linux/of_device.h>
#include <asm/io.h>
@@ -434,42 +435,41 @@ arch_initcall(sparc_register_ioport);
/* Allocate and map kernel buffer using consistent mode DMA for a device.
* hwdev should be valid struct pci_dev pointer for PCI devices.
*/
-static void *pci32_alloc_coherent(struct device *dev, size_t len,
- dma_addr_t *pba, gfp_t gfp,
- unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, unsigned long attrs)
{
- unsigned long len_total = PAGE_ALIGN(len);
+ unsigned long len_total = PAGE_ALIGN(size);
void *va;
struct resource *res;
int order;
- if (len == 0) {
+ if (size == 0) {
return NULL;
}
- if (len > 256*1024) { /* __get_free_pages() limit */
+ if (size > 256*1024) { /* __get_free_pages() limit */
return NULL;
}
order = get_order(len_total);
va = (void *) __get_free_pages(gfp, order);
if (va == NULL) {
- printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT);
+ printk("%s: no %ld pages\n", __func__, len_total>>PAGE_SHIFT);
goto err_nopages;
}
if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) {
- printk("pci_alloc_consistent: no core\n");
+ printk("%s: no core\n", __func__);
goto err_nomem;
}
if (allocate_resource(&_sparc_dvma, res, len_total,
_sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) {
- printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total);
+ printk("%s: cannot occupy 0x%lx", __func__, len_total);
goto err_nova;
}
srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total);
- *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
+ *dma_handle = virt_to_phys(va);
return (void *) res->start;
err_nova:
@@ -481,184 +481,53 @@ err_nopages:
}
/* Free and unmap a consistent DMA buffer.
- * cpu_addr is what was returned from pci_alloc_consistent,
- * size must be the same as what as passed into pci_alloc_consistent,
- * and likewise dma_addr must be the same as what *dma_addrp was set to.
+ * cpu_addr is what was returned arch_dma_alloc, size must be the same as what
+ * was passed into arch_dma_alloc, and likewise dma_addr must be the same as
+ * what *dma_ndler was set to.
*
* References to the memory and mappings associated with cpu_addr/dma_addr
* past this call are illegal.
*/
-static void pci32_free_coherent(struct device *dev, size_t n, void *p,
- dma_addr_t ba, unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_addr, unsigned long attrs)
{
struct resource *res;
if ((res = lookup_resource(&_sparc_dvma,
- (unsigned long)p)) == NULL) {
- printk("pci_free_consistent: cannot free %p\n", p);
+ (unsigned long)cpu_addr)) == NULL) {
+ printk("%s: cannot free %p\n", __func__, cpu_addr);
return;
}
- if (((unsigned long)p & (PAGE_SIZE-1)) != 0) {
- printk("pci_free_consistent: unaligned va %p\n", p);
+ if (((unsigned long)cpu_addr & (PAGE_SIZE-1)) != 0) {
+ printk("%s: unaligned va %p\n", __func__, cpu_addr);
return;
}
- n = PAGE_ALIGN(n);
- if (resource_size(res) != n) {
- printk("pci_free_consistent: region 0x%lx asked 0x%lx\n",
- (long)resource_size(res), (long)n);
+ size = PAGE_ALIGN(size);
+ if (resource_size(res) != size) {
+ printk("%s: region 0x%lx asked 0x%zx\n", __func__,
+ (long)resource_size(res), size);
return;
}
- dma_make_coherent(ba, n);
- srmmu_unmapiorange((unsigned long)p, n);
+ dma_make_coherent(dma_addr, size);
+ srmmu_unmapiorange((unsigned long)cpu_addr, size);
release_resource(res);
kfree(res);
- free_pages((unsigned long)phys_to_virt(ba), get_order(n));
-}
-
-/*
- * Same as pci_map_single, but with pages.
- */
-static dma_addr_t pci32_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- /* IIep is write-through, not flushing. */
- return page_to_phys(page) + offset;
-}
-
-static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
-{
- if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_make_coherent(ba, PAGE_ALIGN(size));
-}
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scatter-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int n;
-
- /* IIep is write-through, not flushing. */
- for_each_sg(sgl, sg, nents, n) {
- sg->dma_address = sg_phys(sg);
- sg->dma_length = sg->length;
- }
- return nents;
-}
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int n;
-
- if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
- for_each_sg(sgl, sg, nents, n) {
- dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
- }
- }
-}
-
-/* Make physical memory consistent for a single
- * streaming mode DMA translation before or after a transfer.
- *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so. At the
- * next point you give the PCI dma address back to the card, you
- * must first perform a pci_dma_sync_for_device, and then the
- * device again owns the buffer.
- */
-static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
- size_t size, enum dma_data_direction dir)
-{
- if (dir != PCI_DMA_TODEVICE) {
- dma_make_coherent(ba, PAGE_ALIGN(size));
- }
-}
-
-static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba,
- size_t size, enum dma_data_direction dir)
-{
- if (dir != PCI_DMA_TODEVICE) {
- dma_make_coherent(ba, PAGE_ALIGN(size));
- }
+ free_pages((unsigned long)phys_to_virt(dma_addr), get_order(size));
}
-/* Make physical memory consistent for a set of streaming
- * mode DMA translations after a transfer.
- *
- * The same as pci_dma_sync_single_* but for a scatter-gather list,
- * same rules and usage.
- */
-static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int n;
-
- if (dir != PCI_DMA_TODEVICE) {
- for_each_sg(sgl, sg, nents, n) {
- dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
- }
- }
-}
+/* IIep is write-through, not flushing on cpu to device transfer. */
-static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- struct scatterlist *sg;
- int n;
-
- if (dir != PCI_DMA_TODEVICE) {
- for_each_sg(sgl, sg, nents, n) {
- dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
- }
- }
+ if (dir != PCI_DMA_TODEVICE)
+ dma_make_coherent(paddr, PAGE_ALIGN(size));
}
-/* note: leon re-uses pci32_dma_ops */
-const struct dma_map_ops pci32_dma_ops = {
- .alloc = pci32_alloc_coherent,
- .free = pci32_free_coherent,
- .map_page = pci32_map_page,
- .unmap_page = pci32_unmap_page,
- .map_sg = pci32_map_sg,
- .unmap_sg = pci32_unmap_sg,
- .sync_single_for_cpu = pci32_sync_single_for_cpu,
- .sync_single_for_device = pci32_sync_single_for_device,
- .sync_sg_for_cpu = pci32_sync_sg_for_cpu,
- .sync_sg_for_device = pci32_sync_sg_for_device,
-};
-EXPORT_SYMBOL(pci32_dma_ops);
-
const struct dma_map_ops *dma_ops = &sbus_dma_ops;
EXPORT_SYMBOL(dma_ops);
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 95fe4f081ba3..92634d4e440c 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/pagemap.h>
#include <linux/poison.h>
#include <linux/gfp.h>
@@ -101,13 +102,46 @@ static unsigned long calc_max_low_pfn(void)
return tmp;
}
+static void __init find_ramdisk(unsigned long end_of_phys_memory)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ unsigned long size;
+
+ /* Now have to check initial ramdisk, so that it won't pass
+ * the end of memory
+ */
+ if (sparc_ramdisk_image) {
+ if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE)
+ sparc_ramdisk_image -= KERNBASE;
+ initrd_start = sparc_ramdisk_image + phys_base;
+ initrd_end = initrd_start + sparc_ramdisk_size;
+ if (initrd_end > end_of_phys_memory) {
+ printk(KERN_CRIT "initrd extends beyond end of memory "
+ "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
+ initrd_end, end_of_phys_memory);
+ initrd_start = 0;
+ } else {
+ /* Reserve the initrd image area. */
+ size = initrd_end - initrd_start;
+ memblock_reserve(initrd_start, size);
+
+ initrd_start = (initrd_start - phys_base) + PAGE_OFFSET;
+ initrd_end = (initrd_end - phys_base) + PAGE_OFFSET;
+ }
+ }
+#endif
+}
+
unsigned long __init bootmem_init(unsigned long *pages_avail)
{
- unsigned long bootmap_size, start_pfn;
- unsigned long end_of_phys_memory = 0UL;
- unsigned long bootmap_pfn, bytes_avail, size;
+ unsigned long start_pfn, bytes_avail, size;
+ unsigned long end_of_phys_memory = 0;
+ unsigned long high_pages = 0;
int i;
+ memblock_set_bottom_up(true);
+ memblock_allow_resize();
+
bytes_avail = 0UL;
for (i = 0; sp_banks[i].num_bytes != 0; i++) {
end_of_phys_memory = sp_banks[i].base_addr +
@@ -124,24 +158,25 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
if (sp_banks[i].num_bytes == 0) {
sp_banks[i].base_addr = 0xdeadbeef;
} else {
+ memblock_add(sp_banks[i].base_addr,
+ sp_banks[i].num_bytes);
sp_banks[i+1].num_bytes = 0;
sp_banks[i+1].base_addr = 0xdeadbeef;
}
break;
}
}
+ memblock_add(sp_banks[i].base_addr, sp_banks[i].num_bytes);
}
/* Start with page aligned address of last symbol in kernel
- * image.
+ * image.
*/
start_pfn = (unsigned long)__pa(PAGE_ALIGN((unsigned long) &_end));
/* Now shift down to get the real physical page frame number. */
start_pfn >>= PAGE_SHIFT;
- bootmap_pfn = start_pfn;
-
max_pfn = end_of_phys_memory >> PAGE_SHIFT;
max_low_pfn = max_pfn;
@@ -150,85 +185,19 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
if (max_low_pfn > pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT)) {
highstart_pfn = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT);
max_low_pfn = calc_max_low_pfn();
+ high_pages = calc_highpages();
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
- calc_highpages() >> (20 - PAGE_SHIFT));
+ high_pages >> (20 - PAGE_SHIFT));
}
-#ifdef CONFIG_BLK_DEV_INITRD
- /* Now have to check initial ramdisk, so that bootmap does not overwrite it */
- if (sparc_ramdisk_image) {
- if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE)
- sparc_ramdisk_image -= KERNBASE;
- initrd_start = sparc_ramdisk_image + phys_base;
- initrd_end = initrd_start + sparc_ramdisk_size;
- if (initrd_end > end_of_phys_memory) {
- printk(KERN_CRIT "initrd extends beyond end of memory "
- "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
- initrd_end, end_of_phys_memory);
- initrd_start = 0;
- }
- if (initrd_start) {
- if (initrd_start >= (start_pfn << PAGE_SHIFT) &&
- initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE)
- bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT;
- }
- }
-#endif
- /* Initialize the boot-time allocator. */
- bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base,
- max_low_pfn);
-
- /* Now register the available physical memory with the
- * allocator.
- */
- *pages_avail = 0;
- for (i = 0; sp_banks[i].num_bytes != 0; i++) {
- unsigned long curr_pfn, last_pfn;
+ find_ramdisk(end_of_phys_memory);
- curr_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
- if (curr_pfn >= max_low_pfn)
- break;
-
- last_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
- if (last_pfn > max_low_pfn)
- last_pfn = max_low_pfn;
-
- /*
- * .. finally, did all the rounding and playing
- * around just make the area go away?
- */
- if (last_pfn <= curr_pfn)
- continue;
-
- size = (last_pfn - curr_pfn) << PAGE_SHIFT;
- *pages_avail += last_pfn - curr_pfn;
-
- free_bootmem(sp_banks[i].base_addr, size);
- }
-
-#ifdef CONFIG_BLK_DEV_INITRD
- if (initrd_start) {
- /* Reserve the initrd image area. */
- size = initrd_end - initrd_start;
- reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT);
- *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- initrd_start = (initrd_start - phys_base) + PAGE_OFFSET;
- initrd_end = (initrd_end - phys_base) + PAGE_OFFSET;
- }
-#endif
/* Reserve the kernel text/data/bss. */
size = (start_pfn << PAGE_SHIFT) - phys_base;
- reserve_bootmem(phys_base, size, BOOTMEM_DEFAULT);
- *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
+ memblock_reserve(phys_base, size);
- /* Reserve the bootmem map. We do not account for it
- * in pages_avail because we will release that memory
- * in free_all_bootmem.
- */
- size = bootmap_size;
- reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT);
- *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
+ size = memblock_phys_mem_size() - memblock_reserved_size();
+ *pages_avail = (size >> PAGE_SHIFT) - high_pages;
return max_pfn;
}
@@ -322,7 +291,7 @@ void __init mem_init(void)
map_high_region(start_pfn, end_pfn);
}
-
+
mem_init_print_info(NULL);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b0312f8947ce..512003f16889 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -124,6 +124,7 @@ config X86
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
+ select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 302517929932..d1e19f358b6e 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -23,11 +23,8 @@
* _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
* While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
* which is meaningless and will cause compiling error in some cases.
- * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
- * as empty.
*/
-#define _LINUX_EXPORT_H
-#define EXPORT_SYMBOL(sym)
+#define __DISABLE_EXPORTS
#include "misc.h"
#include "error.h"
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index b173d404e3df..b21ee65c4101 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,2 +1,2 @@
-obj-y := hv_init.o mmu.o
+obj-y := hv_init.o mmu.o nested.o
obj-$(CONFIG_X86_64) += hv_apic.o
diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
new file mode 100644
index 000000000000..b8e60cc50461
--- /dev/null
+++ b/arch/x86/hyperv/nested.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Hyper-V nested virtualization code.
+ *
+ * Copyright (C) 2018, Microsoft, Inc.
+ *
+ * Author : Lan Tianyu <Tianyu.Lan@microsoft.com>
+ */
+
+
+#include <linux/types.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+#include <asm/tlbflush.h>
+
+#include <asm/trace/hyperv.h>
+
+int hyperv_flush_guest_mapping(u64 as)
+{
+ struct hv_guest_mapping_flush **flush_pcpu;
+ struct hv_guest_mapping_flush *flush;
+ u64 status;
+ unsigned long flags;
+ int ret = -ENOTSUPP;
+
+ if (!hv_hypercall_pg)
+ goto fault;
+
+ local_irq_save(flags);
+
+ flush_pcpu = (struct hv_guest_mapping_flush **)
+ this_cpu_ptr(hyperv_pcpu_input_arg);
+
+ flush = *flush_pcpu;
+
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto fault;
+ }
+
+ flush->address_space = as;
+ flush->flags = 0;
+
+ status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE,
+ flush, NULL);
+ local_irq_restore(flags);
+
+ if (!(status & HV_HYPERCALL_RESULT_MASK))
+ ret = 0;
+
+fault:
+ trace_hyperv_nested_flush_guest_mapping(as, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index de690c2d2e33..a0ab9ab61c75 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -8,5 +8,6 @@ generated-y += xen-hypercalls.h
generic-y += dma-contiguous.h
generic-y += early_ioremap.h
+generic-y += export.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
deleted file mode 100644
index 2a51d66689c5..000000000000
--- a/arch/x86/include/asm/export.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include <asm-generic/export.h>
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6ced78af48da..e977b6b3a538 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -310,6 +310,7 @@ struct ms_hyperv_tsc_page {
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
/* Nested features (CPUID 0x4000000A) EAX */
+#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
struct hv_reenlightenment_control {
@@ -351,6 +352,7 @@ struct hv_tsc_emulation_status {
#define HVCALL_SEND_IPI_EX 0x0015
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
@@ -742,6 +744,12 @@ struct ipi_arg_ex {
struct hv_vpset vp_set;
};
+/* HvFlushGuestPhysicalAddressSpace hypercalls */
+struct hv_guest_mapping_flush {
+ u64 address_space;
+ u64 flags;
+};
+
/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
struct hv_tlb_flush {
u64 address_space;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index acebb808c4b5..00ddb0c9e612 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -55,6 +55,7 @@
#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
+#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5)
#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
@@ -76,13 +77,13 @@
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
+#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
| X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
| X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
-#define CR3_PCID_INVD BIT_64(63)
#define CR4_RESERVED_BITS \
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
@@ -326,6 +327,16 @@ struct rsvd_bits_validate {
u64 bad_mt_xwr;
};
+struct kvm_mmu_root_info {
+ gpa_t cr3;
+ hpa_t hpa;
+};
+
+#define KVM_MMU_ROOT_INFO_INVALID \
+ ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+
+#define KVM_MMU_NUM_PREV_ROOTS 3
+
/*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
@@ -345,7 +356,7 @@ struct kvm_mmu {
struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp);
- void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
+ void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
hpa_t root_hpa;
@@ -354,6 +365,7 @@ struct kvm_mmu {
u8 shadow_root_level;
u8 ept_ad;
bool direct_map;
+ struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
/*
* Bitmap; bit set = permission fault
@@ -978,6 +990,15 @@ struct kvm_x86_ops {
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+ int (*tlb_remote_flush)(struct kvm *kvm);
+
+ /*
+ * Flush any TLB entries associated with the given GVA.
+ * Does not need to flush GPA->HPA mappings.
+ * Can potentially get non-canonical addresses through INVLPGs, which
+ * the implementation may choose to ignore if appropriate.
+ */
+ void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
void (*run)(struct kvm_vcpu *vcpu);
int (*handle_exit)(struct kvm_vcpu *vcpu);
@@ -1090,6 +1111,14 @@ struct kvm_x86_ops {
void (*setup_mce)(struct kvm_vcpu *vcpu);
+ int (*get_nested_state)(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ unsigned user_data_size);
+ int (*set_nested_state)(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ struct kvm_nested_state *kvm_state);
+ void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
@@ -1122,6 +1151,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
return kvm_x86_ops->vm_free(kvm);
}
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+{
+ if (kvm_x86_ops->tlb_remote_flush &&
+ !kvm_x86_ops->tlb_remote_flush(kvm))
+ return 0;
+ else
+ return -ENOTSUPP;
+}
+
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
@@ -1273,6 +1312,10 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
return !!(*irq_state);
}
+#define KVM_MMU_ROOT_CURRENT BIT(0)
+#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
+#define KVM_MMU_ROOTS_ALL (~0UL)
+
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
@@ -1284,7 +1327,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1303,7 +1346,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
+void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush);
void kvm_enable_tdp(void);
void kvm_disable_tdp(void);
@@ -1418,6 +1462,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+ unsigned long ipi_bitmap_high, int min,
+ unsigned long icr, int op_64_bit);
+
u64 kvm_get_arch_capabilities(void);
void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index e1771a1987dd..f37704497d8f 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -347,6 +347,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void);
+int hyperv_flush_guest_mapping(u64 as);
#ifdef CONFIG_X86_64
void hv_apic_init(void);
@@ -366,6 +367,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
{
return NULL;
}
+static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index 9c0d4b588e3f..2e6245a023ef 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -28,6 +28,20 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others,
__entry->addr, __entry->end)
);
+TRACE_EVENT(hyperv_nested_flush_guest_mapping,
+ TP_PROTO(u64 as, int ret),
+ TP_ARGS(as, ret),
+
+ TP_STRUCT__entry(
+ __field(u64, as)
+ __field(int, ret)
+ ),
+ TP_fast_assign(__entry->as = as;
+ __entry->ret = ret;
+ ),
+ TP_printk("address space %llx ret %d", __entry->as, __entry->ret)
+ );
+
TRACE_EVENT(hyperv_send_ipi_mask,
TP_PROTO(const struct cpumask *cpus,
int vector),
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 95f9107449bf..9527ba5d62da 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -74,6 +74,7 @@
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000
#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
@@ -213,6 +214,8 @@ enum vmcs_field {
VMWRITE_BITMAP_HIGH = 0x00002029,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ ENCLS_EXITING_BITMAP = 0x0000202E,
+ ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index c535c2fdea13..86299efa804a 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -378,4 +378,41 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
+#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
+
+#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
+#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
+
+struct kvm_vmx_nested_state {
+ __u64 vmxon_pa;
+ __u64 vmcs_pa;
+
+ struct {
+ __u16 flags;
+ } smm;
+};
+
+/* for KVM_CAP_NESTED_STATE */
+struct kvm_nested_state {
+ /* KVM_STATE_* flags */
+ __u16 flags;
+
+ /* 0 for VMX, 1 for SVM. */
+ __u16 format;
+
+ /* 128 for SVM, 128 + VMCS size for VMX. */
+ __u32 size;
+
+ union {
+ /* VMXON, VMCS */
+ struct kvm_vmx_nested_state vmx;
+
+ /* Pad the header to 128 bytes. */
+ __u8 pad[120];
+ };
+
+ __u8 data[0];
+};
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 0ede697c3961..19980ec1a316 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -28,6 +28,7 @@
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
+#define KVM_FEATURE_PV_SEND_IPI 11
#define KVM_HINTS_REALTIME 0
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index dde437f5d14f..158ad1483c43 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -108,7 +108,7 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
cx->type);
}
snprintf(cx->desc,
- ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
+ ACPI_CX_DESC_LEN, "ACPI FFH MWAIT 0x%x",
cx->address);
out:
return retval;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 09aaabb2bbf1..0f471bd93417 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -444,6 +444,98 @@ static void __init sev_map_percpu_data(void)
}
#ifdef CONFIG_SMP
+#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
+
+static void __send_ipi_mask(const struct cpumask *mask, int vector)
+{
+ unsigned long flags;
+ int cpu, apic_id, icr;
+ int min = 0, max = 0;
+#ifdef CONFIG_X86_64
+ __uint128_t ipi_bitmap = 0;
+#else
+ u64 ipi_bitmap = 0;
+#endif
+
+ if (cpumask_empty(mask))
+ return;
+
+ local_irq_save(flags);
+
+ switch (vector) {
+ default:
+ icr = APIC_DM_FIXED | vector;
+ break;
+ case NMI_VECTOR:
+ icr = APIC_DM_NMI;
+ break;
+ }
+
+ for_each_cpu(cpu, mask) {
+ apic_id = per_cpu(x86_cpu_to_apicid, cpu);
+ if (!ipi_bitmap) {
+ min = max = apic_id;
+ } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
+ ipi_bitmap <<= min - apic_id;
+ min = apic_id;
+ } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
+ max = apic_id < max ? max : apic_id;
+ } else {
+ kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+ (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+ min = max = apic_id;
+ ipi_bitmap = 0;
+ }
+ __set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
+ }
+
+ if (ipi_bitmap) {
+ kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+ (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+ }
+
+ local_irq_restore(flags);
+}
+
+static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+ __send_ipi_mask(mask, vector);
+}
+
+static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ struct cpumask new_mask;
+ const struct cpumask *local_mask;
+
+ cpumask_copy(&new_mask, mask);
+ cpumask_clear_cpu(this_cpu, &new_mask);
+ local_mask = &new_mask;
+ __send_ipi_mask(local_mask, vector);
+}
+
+static void kvm_send_ipi_allbutself(int vector)
+{
+ kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
+}
+
+static void kvm_send_ipi_all(int vector)
+{
+ __send_ipi_mask(cpu_online_mask, vector);
+}
+
+/*
+ * Set the IPI entry points
+ */
+static void kvm_setup_pv_ipi(void)
+{
+ apic->send_IPI_mask = kvm_send_ipi_mask;
+ apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
+ apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
+ apic->send_IPI_all = kvm_send_ipi_all;
+ pr_info("KVM setup pv IPIs\n");
+}
+
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
@@ -611,13 +703,27 @@ static uint32_t __init kvm_detect(void)
return kvm_cpuid_base();
}
+static void __init kvm_apic_init(void)
+{
+#if defined(CONFIG_SMP)
+ if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
+ kvm_setup_pv_ipi();
+#endif
+}
+
+static void __init kvm_init_platform(void)
+{
+ kvmclock_init();
+ x86_platform.apic_post_init = kvm_apic_init;
+}
+
const __initconst struct hypervisor_x86 x86_hyper_kvm = {
.name = "KVM",
.detect = kvm_detect,
.type = X86_HYPER_KVM,
- .init.init_platform = kvmclock_init,
.init.guest_late_init = kvm_guest_init,
.init.x2apic_available = kvm_para_available,
+ .init.init_platform = kvm_init_platform,
};
static __init int activate_jump_labels(void)
@@ -736,6 +842,10 @@ void __init kvm_spinlock_init(void)
if (kvm_para_has_hint(KVM_HINTS_REALTIME))
return;
+ /* Don't use the pvqspinlock code if there is only 1 vCPU. */
+ if (num_possible_cpus() == 1)
+ return;
+
__pv_init_lock_hash();
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e042e3d47fd..7bcfa61375c0 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
(1 << KVM_FEATURE_PV_UNHALT) |
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
- (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
+ (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
+ (1 << KVM_FEATURE_PV_SEND_IPI);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4c4f4263420c..106482da6388 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4191,7 +4191,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
maxphyaddr = 36;
rsvd = rsvd_bits(maxphyaddr, 63);
if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE)
- rsvd &= ~CR3_PCID_INVD;
+ rsvd &= ~X86_CR3_PCID_NOFLUSH;
}
if (new_val & rsvd)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index af8caf965baa..01d209ab5481 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -235,7 +235,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
int ret;
- if (!synic->active)
+ if (!synic->active && !host)
return 1;
trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
@@ -295,11 +295,12 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
return ret;
}
-static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
+static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
+ bool host)
{
int ret;
- if (!synic->active)
+ if (!synic->active && !host)
return 1;
ret = 0;
@@ -1014,6 +1015,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
case HV_X64_MSR_TSC_EMULATION_STATUS:
hv->hv_tsc_emulation_status = data;
break;
+ case HV_X64_MSR_TIME_REF_COUNT:
+ /* read-only, but still ignore it if host-initiated */
+ if (!host)
+ return 1;
+ break;
default:
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
@@ -1101,6 +1107,12 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
data, host);
}
+ case HV_X64_MSR_TSC_FREQUENCY:
+ case HV_X64_MSR_APIC_FREQUENCY:
+ /* read-only, but still ignore it if host-initiated */
+ if (!host)
+ return 1;
+ break;
default:
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
@@ -1156,7 +1168,8 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return 0;
}
-static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
+ bool host)
{
u64 data = 0;
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
@@ -1183,7 +1196,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case HV_X64_MSR_SIMP:
case HV_X64_MSR_EOM:
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
- return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
+ return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host);
case HV_X64_MSR_STIMER0_CONFIG:
case HV_X64_MSR_STIMER1_CONFIG:
case HV_X64_MSR_STIMER2_CONFIG:
@@ -1229,7 +1242,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return kvm_hv_set_msr(vcpu, msr, data, host);
}
-int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
{
if (kvm_hv_msr_partition_wide(msr)) {
int r;
@@ -1239,7 +1252,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r;
} else
- return kvm_hv_get_msr(vcpu, msr, pdata);
+ return kvm_hv_get_msr(vcpu, msr, pdata, host);
}
static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 837465d69c6d..d6aa969e20f1 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -48,7 +48,7 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
}
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
-int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host);
bool kvm_hv_hypercall_enabled(struct kvm *kvm);
int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d536d457517b..0cefba28c864 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
irq->level, irq->trig_mode, dest_map);
}
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+ unsigned long ipi_bitmap_high, int min,
+ unsigned long icr, int op_64_bit)
+{
+ int i;
+ struct kvm_apic_map *map;
+ struct kvm_vcpu *vcpu;
+ struct kvm_lapic_irq irq = {0};
+ int cluster_size = op_64_bit ? 64 : 32;
+ int count = 0;
+
+ irq.vector = icr & APIC_VECTOR_MASK;
+ irq.delivery_mode = icr & APIC_MODE_MASK;
+ irq.level = (icr & APIC_INT_ASSERT) != 0;
+ irq.trig_mode = icr & APIC_INT_LEVELTRIG;
+
+ if (icr & APIC_DEST_MASK)
+ return -KVM_EINVAL;
+ if (icr & APIC_SHORT_MASK)
+ return -KVM_EINVAL;
+
+ rcu_read_lock();
+ map = rcu_dereference(kvm->arch.apic_map);
+
+ /* Bits above cluster_size are masked in the caller. */
+ for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
+
+ min += cluster_size;
+ for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
+
+ rcu_read_unlock();
+ return count;
+}
+
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
{
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a44e568363a4..a282321329b5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -178,7 +178,24 @@ struct kvm_shadow_walk_iterator {
unsigned index;
};
-#define for_each_shadow_entry(_vcpu, _addr, _walker) \
+static const union kvm_mmu_page_role mmu_base_role_mask = {
+ .cr0_wp = 1,
+ .cr4_pae = 1,
+ .nxe = 1,
+ .smep_andnot_wp = 1,
+ .smap_andnot_wp = 1,
+ .smm = 1,
+ .guest_mode = 1,
+ .ad_disabled = 1,
+};
+
+#define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker) \
+ for (shadow_walk_init_using_root(&(_walker), (_vcpu), \
+ (_root), (_addr)); \
+ shadow_walk_okay(&(_walker)); \
+ shadow_walk_next(&(_walker)))
+
+#define for_each_shadow_entry(_vcpu, _addr, _walker) \
for (shadow_walk_init(&(_walker), _vcpu, _addr); \
shadow_walk_okay(&(_walker)); \
shadow_walk_next(&(_walker)))
@@ -221,7 +238,20 @@ static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
PT64_EPT_EXECUTABLE_MASK;
static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
+/*
+ * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order
+ * to guard against L1TF attacks.
+ */
+static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
+
+/*
+ * The number of high-order 1 bits to use in the mask above.
+ */
+static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
+
static void mmu_spte_set(u64 *sptep, u64 spte);
+static union kvm_mmu_page_role
+kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
{
@@ -308,9 +338,13 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
{
unsigned int gen = kvm_current_mmio_generation(vcpu);
u64 mask = generation_mmio_spte_mask(gen);
+ u64 gpa = gfn << PAGE_SHIFT;
access &= ACC_WRITE_MASK | ACC_USER_MASK;
- mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT;
+ mask |= shadow_mmio_value | access;
+ mask |= gpa | shadow_nonpresent_or_rsvd_mask;
+ mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
+ << shadow_nonpresent_or_rsvd_mask_len;
trace_mark_mmio_spte(sptep, gfn, access, gen);
mmu_spte_set(sptep, mask);
@@ -323,8 +357,14 @@ static bool is_mmio_spte(u64 spte)
static gfn_t get_mmio_spte_gfn(u64 spte)
{
- u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
- return (spte & ~mask) >> PAGE_SHIFT;
+ u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask |
+ shadow_nonpresent_or_rsvd_mask;
+ u64 gpa = spte & ~mask;
+
+ gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
+ & shadow_nonpresent_or_rsvd_mask;
+
+ return gpa >> PAGE_SHIFT;
}
static unsigned get_mmio_spte_access(u64 spte)
@@ -381,7 +421,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
-static void kvm_mmu_clear_all_pte_masks(void)
+static void kvm_mmu_reset_all_pte_masks(void)
{
shadow_user_mask = 0;
shadow_accessed_mask = 0;
@@ -391,6 +431,18 @@ static void kvm_mmu_clear_all_pte_masks(void)
shadow_mmio_mask = 0;
shadow_present_mask = 0;
shadow_acc_track_mask = 0;
+
+ /*
+ * If the CPU has 46 or less physical address bits, then set an
+ * appropriate mask to guard against L1TF attacks. Otherwise, it is
+ * assumed that the CPU is not vulnerable to L1TF.
+ */
+ if (boot_cpu_data.x86_phys_bits <
+ 52 - shadow_nonpresent_or_rsvd_mask_len)
+ shadow_nonpresent_or_rsvd_mask =
+ rsvd_bits(boot_cpu_data.x86_phys_bits -
+ shadow_nonpresent_or_rsvd_mask_len,
+ boot_cpu_data.x86_phys_bits - 1);
}
static int is_cpuid_PSE36(void)
@@ -1986,7 +2038,7 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
return 0;
}
-static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
+static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root)
{
}
@@ -2117,12 +2169,8 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
- if (sp->role.cr4_pae != !!is_pae(vcpu)) {
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
- return false;
- }
-
- if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
+ if (sp->role.cr4_pae != !!is_pae(vcpu)
+ || vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return false;
}
@@ -2392,11 +2440,12 @@ out:
return sp;
}
-static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
- struct kvm_vcpu *vcpu, u64 addr)
+static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator,
+ struct kvm_vcpu *vcpu, hpa_t root,
+ u64 addr)
{
iterator->addr = addr;
- iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
+ iterator->shadow_addr = root;
iterator->level = vcpu->arch.mmu.shadow_root_level;
if (iterator->level == PT64_ROOT_4LEVEL &&
@@ -2405,6 +2454,12 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
--iterator->level;
if (iterator->level == PT32E_ROOT_LEVEL) {
+ /*
+ * prev_root is currently only used for 64-bit hosts. So only
+ * the active root_hpa is valid here.
+ */
+ BUG_ON(root != vcpu->arch.mmu.root_hpa);
+
iterator->shadow_addr
= vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
@@ -2414,6 +2469,13 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
}
}
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+ struct kvm_vcpu *vcpu, u64 addr)
+{
+ shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu.root_hpa,
+ addr);
+}
+
static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
{
if (iterator->level < PT_PAGE_TABLE_LEVEL)
@@ -2702,6 +2764,45 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
kvm_unsync_page(vcpu, sp);
}
+ /*
+ * We need to ensure that the marking of unsync pages is visible
+ * before the SPTE is updated to allow writes because
+ * kvm_mmu_sync_roots() checks the unsync flags without holding
+ * the MMU lock and so can race with this. If the SPTE was updated
+ * before the page had been marked as unsync-ed, something like the
+ * following could happen:
+ *
+ * CPU 1 CPU 2
+ * ---------------------------------------------------------------------
+ * 1.2 Host updates SPTE
+ * to be writable
+ * 2.1 Guest writes a GPTE for GVA X.
+ * (GPTE being in the guest page table shadowed
+ * by the SP from CPU 1.)
+ * This reads SPTE during the page table walk.
+ * Since SPTE.W is read as 1, there is no
+ * fault.
+ *
+ * 2.2 Guest issues TLB flush.
+ * That causes a VM Exit.
+ *
+ * 2.3 kvm_mmu_sync_pages() reads sp->unsync.
+ * Since it is false, so it just returns.
+ *
+ * 2.4 Guest accesses GVA X.
+ * Since the mapping in the SP was not updated,
+ * so the old mapping for GVA X incorrectly
+ * gets used.
+ * 1.1 Host marks SP
+ * as unsync
+ * (sp->unsync = true)
+ *
+ * The write barrier below ensures that 1.1 happens before 1.2 and thus
+ * the situation in 2.4 does not arise. The implicit barrier in 2.2
+ * pairs with this write barrier.
+ */
+ smp_wmb();
+
return false;
}
@@ -2724,6 +2825,10 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
return true;
}
+/* Bits which may be returned by set_spte() */
+#define SET_SPTE_WRITE_PROTECTED_PT BIT(0)
+#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
+
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int level,
gfn_t gfn, kvm_pfn_t pfn, bool speculative,
@@ -2800,7 +2905,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
pgprintk("%s: found shadow page for %llx, marking ro\n",
__func__, gfn);
- ret = 1;
+ ret |= SET_SPTE_WRITE_PROTECTED_PT;
pte_access &= ~ACC_WRITE_MASK;
spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
}
@@ -2816,7 +2921,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
set_pte:
if (mmu_spte_update(sptep, spte))
- kvm_flush_remote_tlbs(vcpu->kvm);
+ ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH;
done:
return ret;
}
@@ -2827,7 +2932,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
{
int was_rmapped = 0;
int rmap_count;
+ int set_spte_ret;
int ret = RET_PF_RETRY;
+ bool flush = false;
pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
*sptep, write_fault, gfn);
@@ -2844,22 +2951,25 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
child = page_header(pte & PT64_BASE_ADDR_MASK);
drop_parent_pte(child, sptep);
- kvm_flush_remote_tlbs(vcpu->kvm);
+ flush = true;
} else if (pfn != spte_to_pfn(*sptep)) {
pgprintk("hfn old %llx new %llx\n",
spte_to_pfn(*sptep), pfn);
drop_spte(vcpu->kvm, sptep);
- kvm_flush_remote_tlbs(vcpu->kvm);
+ flush = true;
} else
was_rmapped = 1;
}
- if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
- true, host_writable)) {
+ set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn,
+ speculative, true, host_writable);
+ if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
if (write_fault)
ret = RET_PF_EMULATE;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
+ if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush)
+ kvm_flush_remote_tlbs(vcpu->kvm);
if (unlikely(is_mmio_spte(*sptep)))
ret = RET_PF_EMULATE;
@@ -3358,26 +3468,47 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
*root_hpa = INVALID_PAGE;
}
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+/* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
{
int i;
LIST_HEAD(invalid_list);
struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
- if (!VALID_PAGE(mmu->root_hpa))
- return;
+ BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
+
+ /* Before acquiring the MMU lock, see if we need to do any real work. */
+ if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
+ VALID_PAGE(mmu->prev_roots[i].hpa))
+ break;
+
+ if (i == KVM_MMU_NUM_PREV_ROOTS)
+ return;
+ }
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
- (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
- mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
- } else {
- for (i = 0; i < 4; ++i)
- if (mmu->pae_root[i] != 0)
- mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i],
- &invalid_list);
- mmu->root_hpa = INVALID_PAGE;
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
+ mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
+ &invalid_list);
+
+ if (free_active_root) {
+ if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+ (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
+ mmu_free_root_page(vcpu->kvm, &mmu->root_hpa,
+ &invalid_list);
+ } else {
+ for (i = 0; i < 4; ++i)
+ if (mmu->pae_root[i] != 0)
+ mmu_free_root_page(vcpu->kvm,
+ &mmu->pae_root[i],
+ &invalid_list);
+ mmu->root_hpa = INVALID_PAGE;
+ }
}
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
@@ -3546,7 +3677,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
return mmu_alloc_shadow_roots(vcpu);
}
-static void mmu_sync_roots(struct kvm_vcpu *vcpu)
+void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
{
int i;
struct kvm_mmu_page *sp;
@@ -3558,14 +3689,39 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
return;
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
- kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+
if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
+
sp = page_header(root);
+
+ /*
+ * Even if another CPU was marking the SP as unsync-ed
+ * simultaneously, any guest page table changes are not
+ * guaranteed to be visible anyway until this VCPU issues a TLB
+ * flush strictly after those changes are made. We only need to
+ * ensure that the other CPU sets these flags before any actual
+ * changes to the page tables are made. The comments in
+ * mmu_need_write_protect() describe what could go wrong if this
+ * requirement isn't satisfied.
+ */
+ if (!smp_load_acquire(&sp->unsync) &&
+ !smp_load_acquire(&sp->unsync_children))
+ return;
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+
mmu_sync_children(vcpu, sp);
+
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
+ spin_unlock(&vcpu->kvm->mmu_lock);
return;
}
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
+
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
@@ -3575,13 +3731,8 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
mmu_sync_children(vcpu, sp);
}
}
- kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
-}
-void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
-{
- spin_lock(&vcpu->kvm->mmu_lock);
- mmu_sync_roots(vcpu);
+ kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
spin_unlock(&vcpu->kvm->mmu_lock);
}
EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
@@ -3948,16 +4099,107 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
context->update_pte = nonpaging_update_pte;
context->root_level = 0;
context->shadow_root_level = PT32E_ROOT_LEVEL;
- context->root_hpa = INVALID_PAGE;
context->direct_map = true;
context->nx = false;
}
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
+/*
+ * Find out if a previously cached root matching the new CR3/role is available.
+ * The current root is also inserted into the cache.
+ * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is
+ * returned.
+ * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and
+ * false is returned. This root should now be freed by the caller.
+ */
+static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+ union kvm_mmu_page_role new_role)
+{
+ uint i;
+ struct kvm_mmu_root_info root;
+ struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+ root.cr3 = mmu->get_cr3(vcpu);
+ root.hpa = mmu->root_hpa;
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ swap(root, mmu->prev_roots[i]);
+
+ if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) &&
+ page_header(root.hpa) != NULL &&
+ new_role.word == page_header(root.hpa)->role.word)
+ break;
+ }
+
+ mmu->root_hpa = root.hpa;
+
+ return i < KVM_MMU_NUM_PREV_ROOTS;
+}
+
+static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+ union kvm_mmu_page_role new_role,
+ bool skip_tlb_flush)
{
- kvm_mmu_free_roots(vcpu);
+ struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+ /*
+ * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
+ * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
+ * later if necessary.
+ */
+ if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+ mmu->root_level >= PT64_ROOT_4LEVEL) {
+ if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
+ return false;
+
+ if (cached_root_available(vcpu, new_cr3, new_role)) {
+ /*
+ * It is possible that the cached previous root page is
+ * obsolete because of a change in the MMU
+ * generation number. However, that is accompanied by
+ * KVM_REQ_MMU_RELOAD, which will free the root that we
+ * have set here and allocate a new one.
+ */
+
+ kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
+ if (!skip_tlb_flush) {
+ kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+ kvm_x86_ops->tlb_flush(vcpu, true);
+ }
+
+ /*
+ * The last MMIO access's GVA and GPA are cached in the
+ * VCPU. When switching to a new CR3, that GVA->GPA
+ * mapping may no longer be valid. So clear any cached
+ * MMIO info even when we don't need to sync the shadow
+ * page tables.
+ */
+ vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
+
+ __clear_sp_write_flooding_count(
+ page_header(mmu->root_hpa));
+
+ return true;
+ }
+ }
+
+ return false;
}
+static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3,
+ union kvm_mmu_page_role new_role,
+ bool skip_tlb_flush)
+{
+ if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush))
+ kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_CURRENT);
+}
+
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush)
+{
+ __kvm_mmu_new_cr3(vcpu, new_cr3, kvm_mmu_calc_root_page_role(vcpu),
+ skip_tlb_flush);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3);
+
static unsigned long get_cr3(struct kvm_vcpu *vcpu)
{
return kvm_read_cr3(vcpu);
@@ -4432,7 +4674,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
context->invlpg = paging64_invlpg;
context->update_pte = paging64_update_pte;
context->shadow_root_level = level;
- context->root_hpa = INVALID_PAGE;
context->direct_map = false;
}
@@ -4462,7 +4703,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
context->invlpg = paging32_invlpg;
context->update_pte = paging32_update_pte;
context->shadow_root_level = PT32E_ROOT_LEVEL;
- context->root_hpa = INVALID_PAGE;
context->direct_map = false;
}
@@ -4472,20 +4712,32 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
}
+static union kvm_mmu_page_role
+kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu)
+{
+ union kvm_mmu_page_role role = {0};
+
+ role.guest_mode = is_guest_mode(vcpu);
+ role.smm = is_smm(vcpu);
+ role.ad_disabled = (shadow_accessed_mask == 0);
+ role.level = kvm_x86_ops->get_tdp_level(vcpu);
+ role.direct = true;
+ role.access = ACC_ALL;
+
+ return role;
+}
+
static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
- context->base_role.word = 0;
- context->base_role.guest_mode = is_guest_mode(vcpu);
- context->base_role.smm = is_smm(vcpu);
- context->base_role.ad_disabled = (shadow_accessed_mask == 0);
+ context->base_role.word = mmu_base_role_mask.word &
+ kvm_calc_tdp_mmu_root_page_role(vcpu).word;
context->page_fault = tdp_page_fault;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
context->update_pte = nonpaging_update_pte;
context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
- context->root_hpa = INVALID_PAGE;
context->direct_map = true;
context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
context->get_cr3 = get_cr3;
@@ -4520,13 +4772,36 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
-void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
+static union kvm_mmu_page_role
+kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu)
{
+ union kvm_mmu_page_role role = {0};
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
- struct kvm_mmu *context = &vcpu->arch.mmu;
- MMU_WARN_ON(VALID_PAGE(context->root_hpa));
+ role.nxe = is_nx(vcpu);
+ role.cr4_pae = !!is_pae(vcpu);
+ role.cr0_wp = is_write_protection(vcpu);
+ role.smep_andnot_wp = smep && !is_write_protection(vcpu);
+ role.smap_andnot_wp = smap && !is_write_protection(vcpu);
+ role.guest_mode = is_guest_mode(vcpu);
+ role.smm = is_smm(vcpu);
+ role.direct = !is_paging(vcpu);
+ role.access = ACC_ALL;
+
+ if (!is_long_mode(vcpu))
+ role.level = PT32E_ROOT_LEVEL;
+ else if (is_la57_mode(vcpu))
+ role.level = PT64_ROOT_5LEVEL;
+ else
+ role.level = PT64_ROOT_4LEVEL;
+
+ return role;
+}
+
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu *context = &vcpu->arch.mmu;
if (!is_paging(vcpu))
nonpaging_init_context(vcpu, context);
@@ -4537,26 +4812,34 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
else
paging32_init_context(vcpu, context);
- context->base_role.nxe = is_nx(vcpu);
- context->base_role.cr4_pae = !!is_pae(vcpu);
- context->base_role.cr0_wp = is_write_protection(vcpu);
- context->base_role.smep_andnot_wp
- = smep && !is_write_protection(vcpu);
- context->base_role.smap_andnot_wp
- = smap && !is_write_protection(vcpu);
- context->base_role.guest_mode = is_guest_mode(vcpu);
- context->base_role.smm = is_smm(vcpu);
+ context->base_role.word = mmu_base_role_mask.word &
+ kvm_calc_shadow_mmu_root_page_role(vcpu).word;
reset_shadow_zero_bits_mask(vcpu, context);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
+static union kvm_mmu_page_role
+kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty)
+{
+ union kvm_mmu_page_role role = vcpu->arch.mmu.base_role;
+
+ role.level = PT64_ROOT_4LEVEL;
+ role.direct = false;
+ role.ad_disabled = !accessed_dirty;
+ role.guest_mode = true;
+ role.access = ACC_ALL;
+
+ return role;
+}
+
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
- bool accessed_dirty)
+ bool accessed_dirty, gpa_t new_eptp)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
+ union kvm_mmu_page_role root_page_role =
+ kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty);
- MMU_WARN_ON(VALID_PAGE(context->root_hpa));
-
+ __kvm_mmu_new_cr3(vcpu, new_eptp, root_page_role, false);
context->shadow_root_level = PT64_ROOT_4LEVEL;
context->nx = true;
@@ -4567,10 +4850,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->invlpg = ept_invlpg;
context->update_pte = ept_update_pte;
context->root_level = PT64_ROOT_4LEVEL;
- context->root_hpa = INVALID_PAGE;
context->direct_map = false;
- context->base_role.ad_disabled = !accessed_dirty;
- context->base_role.guest_mode = 1;
+ context->base_role.word = root_page_role.word & mmu_base_role_mask.word;
update_permission_bitmask(vcpu, context, true);
update_pkru_bitmask(vcpu, context, true);
update_last_nonleaf_level(vcpu, context);
@@ -4633,8 +4914,17 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
update_last_nonleaf_level(vcpu, g_context);
}
-static void init_kvm_mmu(struct kvm_vcpu *vcpu)
+void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
{
+ if (reset_roots) {
+ uint i;
+
+ vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+ }
+
if (mmu_is_nested(vcpu))
init_kvm_nested_mmu(vcpu);
else if (tdp_enabled)
@@ -4642,11 +4932,21 @@ static void init_kvm_mmu(struct kvm_vcpu *vcpu)
else
init_kvm_softmmu(vcpu);
}
+EXPORT_SYMBOL_GPL(kvm_init_mmu);
+
+static union kvm_mmu_page_role
+kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
+{
+ if (tdp_enabled)
+ return kvm_calc_tdp_mmu_root_page_role(vcpu);
+ else
+ return kvm_calc_shadow_mmu_root_page_role(vcpu);
+}
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
{
kvm_mmu_unload(vcpu);
- init_kvm_mmu(vcpu);
+ kvm_init_mmu(vcpu, true);
}
EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
@@ -4661,8 +4961,8 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
kvm_mmu_sync_roots(vcpu);
if (r)
goto out;
- /* set_cr3() should ensure TLB has been flushed */
- vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
+ kvm_mmu_load_cr3(vcpu);
+ kvm_x86_ops->tlb_flush(vcpu, true);
out:
return r;
}
@@ -4670,7 +4970,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
- kvm_mmu_free_roots(vcpu);
+ kvm_mmu_free_roots(vcpu, KVM_MMU_ROOTS_ALL);
WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
}
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@ -4823,16 +5123,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
u64 entry, gentry, *spte;
int npte;
bool remote_flush, local_flush;
- union kvm_mmu_page_role mask = { };
-
- mask.cr0_wp = 1;
- mask.cr4_pae = 1;
- mask.nxe = 1;
- mask.smep_andnot_wp = 1;
- mask.smap_andnot_wp = 1;
- mask.smm = 1;
- mask.guest_mode = 1;
- mask.ad_disabled = 1;
/*
* If we don't have indirect shadow pages, it means no page is
@@ -4876,7 +5166,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mmu_page_zap_pte(vcpu->kvm, sp, spte);
if (gentry &&
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
- & mask.word) && rmap_can_add(vcpu))
+ & mmu_base_role_mask.word) && rmap_can_add(vcpu))
mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
if (need_remote_flush(entry, *spte))
remote_flush = true;
@@ -5001,12 +5291,67 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
- vcpu->arch.mmu.invlpg(vcpu, gva);
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ int i;
+
+ /* INVLPG on a * non-canonical address is a NOP according to the SDM. */
+ if (is_noncanonical_address(gva, vcpu))
+ return;
+
+ mmu->invlpg(vcpu, gva, mmu->root_hpa);
+
+ /*
+ * INVLPG is required to invalidate any global mappings for the VA,
+ * irrespective of PCID. Since it would take us roughly similar amount
+ * of work to determine whether any of the prev_root mappings of the VA
+ * is marked global, or to just sync it blindly, so we might as well
+ * just always sync it.
+ *
+ * Mappings not reachable via the current cr3 or the prev_roots will be
+ * synced when switching to that cr3, so nothing needs to be done here
+ * for them.
+ */
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ if (VALID_PAGE(mmu->prev_roots[i].hpa))
+ mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+
+ kvm_x86_ops->tlb_flush_gva(vcpu, gva);
++vcpu->stat.invlpg;
}
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
+void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
+{
+ struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ bool tlb_flush = false;
+ uint i;
+
+ if (pcid == kvm_get_active_pcid(vcpu)) {
+ mmu->invlpg(vcpu, gva, mmu->root_hpa);
+ tlb_flush = true;
+ }
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ if (VALID_PAGE(mmu->prev_roots[i].hpa) &&
+ pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) {
+ mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa);
+ tlb_flush = true;
+ }
+ }
+
+ if (tlb_flush)
+ kvm_x86_ops->tlb_flush_gva(vcpu, gva);
+
+ ++vcpu->stat.invlpg;
+
+ /*
+ * Mappings not reachable via the current cr3 or the prev_roots will be
+ * synced when switching to that cr3, so nothing needs to be done here
+ * for them.
+ */
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
+
void kvm_enable_tdp(void)
{
tdp_enabled = true;
@@ -5030,6 +5375,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
struct page *page;
int i;
+ if (tdp_enabled)
+ return 0;
+
/*
* When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
* Therefore we need to allocate shadow page tables in the first
@@ -5048,11 +5396,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
int kvm_mmu_create(struct kvm_vcpu *vcpu)
{
+ uint i;
+
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
vcpu->arch.mmu.translate_gpa = translate_gpa;
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
+
return alloc_mmu_pages(vcpu);
}
@@ -5060,7 +5413,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
{
MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
- init_kvm_mmu(vcpu);
+ kvm_init_mmu(vcpu, true);
}
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
@@ -5500,7 +5853,7 @@ int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
- kvm_mmu_clear_all_pte_masks();
+ kvm_mmu_reset_all_pte_masks();
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 5b408c0ad612..1fab69c0b2f3 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -61,9 +61,10 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
- bool accessed_dirty);
+ bool accessed_dirty, gpa_t new_eptp);
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
@@ -85,6 +86,27 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
return kvm_mmu_load(vcpu);
}
+static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
+{
+ BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0);
+
+ return kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)
+ ? cr3 & X86_CR3_PCID_MASK
+ : 0;
+}
+
+static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
+{
+ return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
+}
+
+static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu)
+{
+ if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa |
+ kvm_get_active_pcid(vcpu));
+}
+
/*
* Currently, we have two sorts of write-protection, a) the first one
* write-protects guest page to sync the guest modification, b) another one is
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6288e9d7068e..14ffd973df54 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -181,7 +181,7 @@ no_present:
* set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
* to signify readability since it isn't used in the EPT case
*/
-static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
+static inline unsigned FNAME(gpte_access)(u64 gpte)
{
unsigned access;
#if PTTYPE == PTTYPE_EPT
@@ -394,8 +394,8 @@ retry_walk:
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
/* Convert to ACC_*_MASK flags for struct guest_walker. */
- walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask);
- walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask);
+ walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
+ walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
if (unlikely(errcode))
goto error;
@@ -508,7 +508,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
gfn = gpte_to_gfn(gpte);
- pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+ pte_access = sp->role.access & FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
no_dirty_log && (pte_access & ACC_WRITE_MASK));
@@ -856,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
}
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
+static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
@@ -871,13 +871,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
*/
mmu_topup_memory_caches(vcpu);
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
+ if (!VALID_PAGE(root_hpa)) {
WARN_ON(1);
return;
}
spin_lock(&vcpu->kvm->mmu_lock);
- for_each_shadow_entry(vcpu, gva, iterator) {
+ for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
level = iterator.level;
sptep = iterator.sptep;
@@ -968,6 +968,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
int i, nr_present = 0;
bool host_writable;
gpa_t first_pte_gpa;
+ int set_spte_ret = 0;
/* direct kvm_mmu_page can not be unsync. */
BUG_ON(sp->role.direct);
@@ -1002,7 +1003,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access;
- pte_access &= FNAME(gpte_access)(vcpu, gpte);
+ pte_access &= FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
@@ -1024,12 +1025,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
- set_spte(vcpu, &sp->spt[i], pte_access,
- PT_PAGE_TABLE_LEVEL, gfn,
- spte_to_pfn(sp->spt[i]), true, false,
- host_writable);
+ set_spte_ret |= set_spte(vcpu, &sp->spt[i],
+ pte_access, PT_PAGE_TABLE_LEVEL,
+ gfn, spte_to_pfn(sp->spt[i]),
+ true, false, host_writable);
}
+ if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
+ kvm_flush_remote_tlbs(vcpu->kvm);
+
return nr_present;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f059a73f0fd0..6276140044d0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2884,7 +2884,6 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
svm->vmcb->control.nested_cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_NPT);
- svm_flush_tlb(vcpu, true);
}
static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@ -5435,6 +5434,13 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
svm->asid_generation--;
}
+static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ invlpga(gva, svm->vmcb->control.asid);
+}
+
static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
{
}
@@ -5580,8 +5586,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
clgi();
- local_irq_enable();
-
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
* it's non-zero. Since vmentry is serialising on affected CPUs, there
@@ -5590,6 +5594,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
*/
x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+ local_irq_enable();
+
asm volatile (
"push %%" _ASM_BP "; \n\t"
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@@ -5712,12 +5718,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
-
reload_tss(vcpu);
local_irq_disable();
+ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
@@ -5766,7 +5772,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = __sme_set(root);
mark_dirty(svm->vmcb, VMCB_CR);
- svm_flush_tlb(vcpu, true);
}
static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
@@ -5779,8 +5784,6 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
/* Also sync guest cr3 here in case we live migrate */
svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
mark_dirty(svm->vmcb, VMCB_CR);
-
- svm_flush_tlb(vcpu, true);
}
static int is_disabled(void)
@@ -7090,6 +7093,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.set_rflags = svm_set_rflags,
.tlb_flush = svm_flush_tlb,
+ .tlb_flush_gva = svm_flush_tlb_gva,
.run = svm_vcpu_run,
.handle_exit = handle_exit,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 46b428c0990e..8dae47e7267a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -38,6 +38,7 @@
#include "kvm_cache_regs.h"
#include "x86.h"
+#include <asm/asm.h>
#include <asm/cpu.h>
#include <asm/io.h>
#include <asm/desc.h>
@@ -197,12 +198,14 @@ static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_
static const struct {
const char *option;
- enum vmx_l1d_flush_state cmd;
+ bool for_parse;
} vmentry_l1d_param[] = {
- {"auto", VMENTER_L1D_FLUSH_AUTO},
- {"never", VMENTER_L1D_FLUSH_NEVER},
- {"cond", VMENTER_L1D_FLUSH_COND},
- {"always", VMENTER_L1D_FLUSH_ALWAYS},
+ [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
+ [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
+ [VMENTER_L1D_FLUSH_COND] = {"cond", true},
+ [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
+ [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
+ [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
};
#define L1D_CACHE_ORDER 4
@@ -218,15 +221,15 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
return 0;
}
- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
- u64 msr;
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+ u64 msr;
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
- if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
- l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
- return 0;
- }
- }
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+ if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+ return 0;
+ }
+ }
/* If set to auto use the default l1tf mitigation method */
if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
@@ -286,8 +289,9 @@ static int vmentry_l1d_flush_parse(const char *s)
if (s) {
for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
- if (sysfs_streq(s, vmentry_l1d_param[i].option))
- return vmentry_l1d_param[i].cmd;
+ if (vmentry_l1d_param[i].for_parse &&
+ sysfs_streq(s, vmentry_l1d_param[i].option))
+ return i;
}
}
return -EINVAL;
@@ -297,13 +301,13 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
{
int l1tf, ret;
- if (!boot_cpu_has(X86_BUG_L1TF))
- return 0;
-
l1tf = vmentry_l1d_flush_parse(s);
if (l1tf < 0)
return l1tf;
+ if (!boot_cpu_has(X86_BUG_L1TF))
+ return 0;
+
/*
* Has vmx_init() run already? If not then this is the pre init
* parameter parsing. In that case just store the value and let
@@ -323,6 +327,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
{
+ if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
+ return sprintf(s, "???\n");
+
return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
@@ -332,23 +339,54 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = {
};
module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
+enum ept_pointers_status {
+ EPT_POINTERS_CHECK = 0,
+ EPT_POINTERS_MATCH = 1,
+ EPT_POINTERS_MISMATCH = 2
+};
+
struct kvm_vmx {
struct kvm kvm;
unsigned int tss_addr;
bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;
+
+ enum ept_pointers_status ept_pointers_match;
+ spinlock_t ept_pointer_lock;
};
#define NR_AUTOLOAD_MSRS 8
+struct vmcs_hdr {
+ u32 revision_id:31;
+ u32 shadow_vmcs:1;
+};
+
struct vmcs {
- u32 revision_id;
+ struct vmcs_hdr hdr;
u32 abort;
char data[0];
};
/*
+ * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT
+ * and whose values change infrequently, but are not constant. I.e. this is
+ * used as a write-through cache of the corresponding VMCS fields.
+ */
+struct vmcs_host_state {
+ unsigned long cr3; /* May not match real cr3 */
+ unsigned long cr4; /* May not match real cr4 */
+ unsigned long gs_base;
+ unsigned long fs_base;
+
+ u16 fs_sel, gs_sel, ldt_sel;
+#ifdef CONFIG_X86_64
+ u16 ds_sel, es_sel;
+#endif
+};
+
+/*
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
* loaded on this CPU (so we can clear them if the CPU goes down).
@@ -359,14 +397,13 @@ struct loaded_vmcs {
int cpu;
bool launched;
bool nmi_known_unmasked;
- unsigned long vmcs_host_cr3; /* May not match real cr3 */
- unsigned long vmcs_host_cr4; /* May not match real cr4 */
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
s64 vnmi_blocked_time;
unsigned long *msr_bitmap;
struct list_head loaded_vmcss_on_cpu_link;
+ struct vmcs_host_state host_state;
};
struct shared_msr_entry {
@@ -397,7 +434,7 @@ struct __packed vmcs12 {
/* According to the Intel spec, a VMCS region must start with the
* following two fields. Then follow implementation-specific data.
*/
- u32 revision_id;
+ struct vmcs_hdr hdr;
u32 abort;
u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
@@ -565,7 +602,7 @@ struct __packed vmcs12 {
"Offset of " #field " in struct vmcs12 has changed.")
static inline void vmx_check_vmcs12_offsets(void) {
- CHECK_OFFSET(revision_id, 0);
+ CHECK_OFFSET(hdr, 0);
CHECK_OFFSET(abort, 4);
CHECK_OFFSET(launch_state, 8);
CHECK_OFFSET(io_bitmap_a, 40);
@@ -784,6 +821,12 @@ struct nested_vmx {
*/
struct vmcs12 *cached_vmcs12;
/*
+ * Cache of the guest's shadow VMCS, existing outside of guest
+ * memory. Loaded from guest memory during VM entry. Flushed
+ * to guest memory during VM exit.
+ */
+ struct vmcs12 *cached_shadow_vmcs12;
+ /*
* Indicates if the shadow vmcs must be updated with the
* data hold by vmcs12
*/
@@ -933,25 +976,20 @@ struct vcpu_vmx {
/*
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
* non-nested (L1) guest, it always points to vmcs01. For a nested
- * guest (L2), it points to a different VMCS.
+ * guest (L2), it points to a different VMCS. loaded_cpu_state points
+ * to the VMCS whose state is loaded into the CPU registers that only
+ * need to be switched when transitioning to/from the kernel; a NULL
+ * value indicates that host state is loaded.
*/
struct loaded_vmcs vmcs01;
struct loaded_vmcs *loaded_vmcs;
+ struct loaded_vmcs *loaded_cpu_state;
bool __launched; /* temporary, used in vmx_vcpu_run */
struct msr_autoload {
struct vmx_msrs guest;
struct vmx_msrs host;
} msr_autoload;
- struct {
- int loaded;
- u16 fs_sel, gs_sel, ldt_sel;
-#ifdef CONFIG_X86_64
- u16 ds_sel, es_sel;
-#endif
- int gs_ldt_reload_needed;
- int fs_reload_needed;
- u64 msr_host_bndcfgs;
- } host_state;
+
struct {
int vm86_active;
ulong save_rflags;
@@ -1001,6 +1039,7 @@ struct vcpu_vmx {
*/
u64 msr_ia32_feature_control;
u64 msr_ia32_feature_control_valid_bits;
+ u64 ept_pointer;
};
enum segment_cache_field {
@@ -1220,6 +1259,11 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
return to_vmx(vcpu)->nested.cached_vmcs12;
}
+static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->nested.cached_shadow_vmcs12;
+}
+
static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu);
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa);
@@ -1490,6 +1534,48 @@ static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
* GUEST_IA32_RTIT_CTL = 0x00002814,
*/
}
+
+/* check_ept_pointer() should be under protection of ept_pointer_lock. */
+static void check_ept_pointer_match(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ u64 tmp_eptp = INVALID_PAGE;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!VALID_PAGE(tmp_eptp)) {
+ tmp_eptp = to_vmx(vcpu)->ept_pointer;
+ } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
+ to_kvm_vmx(kvm)->ept_pointers_match
+ = EPT_POINTERS_MISMATCH;
+ return;
+ }
+ }
+
+ to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
+}
+
+static int vmx_hv_remote_flush_tlb(struct kvm *kvm)
+{
+ int ret;
+
+ spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+
+ if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
+ check_ept_pointer_match(kvm);
+
+ if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
+ ret = -ENOTSUPP;
+ goto out;
+ }
+
+ ret = hyperv_flush_guest_mapping(
+ to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer);
+
+out:
+ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ return ret;
+}
#else /* !IS_ENABLED(CONFIG_HYPERV) */
static inline void evmcs_write64(unsigned long field, u64 value) {}
static inline void evmcs_write32(unsigned long field, u32 value) {}
@@ -1604,6 +1690,12 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
}
+static inline bool cpu_has_vmx_encls_vmexit(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_ENCLS_EXITING;
+}
+
/*
* Comment's format: document - errata name - stepping - processor name.
* Refer from
@@ -1864,6 +1956,12 @@ static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
CPU_BASED_MONITOR_TRAP_FLAG;
}
+static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
+ SECONDARY_EXEC_SHADOW_VMCS;
+}
+
static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
{
return vmcs12->cpu_based_vm_exec_control & bit;
@@ -1944,6 +2042,11 @@ static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
VMX_VMFUNC_EPTP_SWITCHING);
}
+static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS);
+}
+
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1974,11 +2077,12 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
u64 rsvd : 48;
u64 gva;
} operand = { vpid, 0, gva };
+ bool error;
- asm volatile (__ex(ASM_VMX_INVVPID)
- /* CF==1 or ZF==1 --> rc = -1 */
- "; ja 1f ; ud2 ; 1:"
- : : "a"(&operand), "c"(ext) : "cc", "memory");
+ asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na)
+ : CC_OUT(na) (error) : "a"(&operand), "c"(ext)
+ : "memory");
+ BUG_ON(error);
}
static inline void __invept(int ext, u64 eptp, gpa_t gpa)
@@ -1986,11 +2090,12 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
struct {
u64 eptp, gpa;
} operand = {eptp, gpa};
+ bool error;
- asm volatile (__ex(ASM_VMX_INVEPT)
- /* CF==1 or ZF==1 --> rc = -1 */
- "; ja 1f ; ud2 ; 1:\n"
- : : "a" (&operand), "c" (ext) : "cc", "memory");
+ asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na)
+ : CC_OUT(na) (error) : "a" (&operand), "c" (ext)
+ : "memory");
+ BUG_ON(error);
}
static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
@@ -2006,12 +2111,12 @@ static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
static void vmcs_clear(struct vmcs *vmcs)
{
u64 phys_addr = __pa(vmcs);
- u8 error;
+ bool error;
- asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
- : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
- : "cc", "memory");
- if (error)
+ asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na)
+ : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
+ : "memory");
+ if (unlikely(error))
printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
vmcs, phys_addr);
}
@@ -2028,15 +2133,15 @@ static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
static void vmcs_load(struct vmcs *vmcs)
{
u64 phys_addr = __pa(vmcs);
- u8 error;
+ bool error;
if (static_branch_unlikely(&enable_evmcs))
return evmcs_load(phys_addr);
- asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
- : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
- : "cc", "memory");
- if (error)
+ asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na)
+ : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr)
+ : "memory");
+ if (unlikely(error))
printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
vmcs, phys_addr);
}
@@ -2114,6 +2219,19 @@ static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
__loaded_vmcs_clear, loaded_vmcs, 1);
}
+static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
+{
+ if (vpid == 0)
+ return true;
+
+ if (cpu_has_vmx_invvpid_individual_addr()) {
+ __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr);
+ return true;
+ }
+
+ return false;
+}
+
static inline void vpid_sync_vcpu_single(int vpid)
{
if (vpid == 0)
@@ -2248,10 +2366,10 @@ static noinline void vmwrite_error(unsigned long field, unsigned long value)
static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
{
- u8 error;
+ bool error;
- asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0"
- : "=q"(error) : "a"(value), "d"(field) : "cc");
+ asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na)
+ : CC_OUT(na) (error) : "a"(value), "d"(field));
if (unlikely(error))
vmwrite_error(field, value);
}
@@ -2735,121 +2853,150 @@ static unsigned long segment_base(u16 selector)
}
#endif
-static void vmx_save_host_state(struct kvm_vcpu *vcpu)
+static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmcs_host_state *host_state;
#ifdef CONFIG_X86_64
int cpu = raw_smp_processor_id();
- unsigned long fs_base, kernel_gs_base;
#endif
+ unsigned long fs_base, gs_base;
+ u16 fs_sel, gs_sel;
int i;
- if (vmx->host_state.loaded)
+ if (vmx->loaded_cpu_state)
return;
- vmx->host_state.loaded = 1;
+ vmx->loaded_cpu_state = vmx->loaded_vmcs;
+ host_state = &vmx->loaded_cpu_state->host_state;
+
/*
* Set host fs and gs selectors. Unfortunately, 22.2.3 does not
* allow segment selectors with cpl > 0 or ti == 1.
*/
- vmx->host_state.ldt_sel = kvm_read_ldt();
- vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
+ host_state->ldt_sel = kvm_read_ldt();
#ifdef CONFIG_X86_64
+ savesegment(ds, host_state->ds_sel);
+ savesegment(es, host_state->es_sel);
+
+ gs_base = cpu_kernelmode_gs_base(cpu);
if (likely(is_64bit_mm(current->mm))) {
save_fsgs_for_kvm();
- vmx->host_state.fs_sel = current->thread.fsindex;
- vmx->host_state.gs_sel = current->thread.gsindex;
+ fs_sel = current->thread.fsindex;
+ gs_sel = current->thread.gsindex;
fs_base = current->thread.fsbase;
- kernel_gs_base = current->thread.gsbase;
+ vmx->msr_host_kernel_gs_base = current->thread.gsbase;
} else {
-#endif
- savesegment(fs, vmx->host_state.fs_sel);
- savesegment(gs, vmx->host_state.gs_sel);
-#ifdef CONFIG_X86_64
+ savesegment(fs, fs_sel);
+ savesegment(gs, gs_sel);
fs_base = read_msr(MSR_FS_BASE);
- kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
- }
-#endif
- if (!(vmx->host_state.fs_sel & 7)) {
- vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
- vmx->host_state.fs_reload_needed = 0;
- } else {
- vmcs_write16(HOST_FS_SELECTOR, 0);
- vmx->host_state.fs_reload_needed = 1;
- }
- if (!(vmx->host_state.gs_sel & 7))
- vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
- else {
- vmcs_write16(HOST_GS_SELECTOR, 0);
- vmx->host_state.gs_ldt_reload_needed = 1;
+ vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
}
-#ifdef CONFIG_X86_64
- savesegment(ds, vmx->host_state.ds_sel);
- savesegment(es, vmx->host_state.es_sel);
-
- vmcs_writel(HOST_FS_BASE, fs_base);
- vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
-
- vmx->msr_host_kernel_gs_base = kernel_gs_base;
if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#else
- vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
- vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
+ savesegment(fs, fs_sel);
+ savesegment(gs, gs_sel);
+ fs_base = segment_base(fs_sel);
+ gs_base = segment_base(gs_sel);
#endif
- if (boot_cpu_has(X86_FEATURE_MPX))
- rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
+
+ if (unlikely(fs_sel != host_state->fs_sel)) {
+ if (!(fs_sel & 7))
+ vmcs_write16(HOST_FS_SELECTOR, fs_sel);
+ else
+ vmcs_write16(HOST_FS_SELECTOR, 0);
+ host_state->fs_sel = fs_sel;
+ }
+ if (unlikely(gs_sel != host_state->gs_sel)) {
+ if (!(gs_sel & 7))
+ vmcs_write16(HOST_GS_SELECTOR, gs_sel);
+ else
+ vmcs_write16(HOST_GS_SELECTOR, 0);
+ host_state->gs_sel = gs_sel;
+ }
+ if (unlikely(fs_base != host_state->fs_base)) {
+ vmcs_writel(HOST_FS_BASE, fs_base);
+ host_state->fs_base = fs_base;
+ }
+ if (unlikely(gs_base != host_state->gs_base)) {
+ vmcs_writel(HOST_GS_BASE, gs_base);
+ host_state->gs_base = gs_base;
+ }
+
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
vmx->guest_msrs[i].data,
vmx->guest_msrs[i].mask);
}
-static void __vmx_load_host_state(struct vcpu_vmx *vmx)
+static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
{
- if (!vmx->host_state.loaded)
+ struct vmcs_host_state *host_state;
+
+ if (!vmx->loaded_cpu_state)
return;
+ WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
+ host_state = &vmx->loaded_cpu_state->host_state;
+
++vmx->vcpu.stat.host_state_reload;
- vmx->host_state.loaded = 0;
+ vmx->loaded_cpu_state = NULL;
+
#ifdef CONFIG_X86_64
if (is_long_mode(&vmx->vcpu))
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#endif
- if (vmx->host_state.gs_ldt_reload_needed) {
- kvm_load_ldt(vmx->host_state.ldt_sel);
+ if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
+ kvm_load_ldt(host_state->ldt_sel);
#ifdef CONFIG_X86_64
- load_gs_index(vmx->host_state.gs_sel);
+ load_gs_index(host_state->gs_sel);
#else
- loadsegment(gs, vmx->host_state.gs_sel);
+ loadsegment(gs, host_state->gs_sel);
#endif
}
- if (vmx->host_state.fs_reload_needed)
- loadsegment(fs, vmx->host_state.fs_sel);
+ if (host_state->fs_sel & 7)
+ loadsegment(fs, host_state->fs_sel);
#ifdef CONFIG_X86_64
- if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) {
- loadsegment(ds, vmx->host_state.ds_sel);
- loadsegment(es, vmx->host_state.es_sel);
+ if (unlikely(host_state->ds_sel | host_state->es_sel)) {
+ loadsegment(ds, host_state->ds_sel);
+ loadsegment(es, host_state->es_sel);
}
#endif
invalidate_tss_limit();
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
- if (vmx->host_state.msr_host_bndcfgs)
- wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
load_fixmap_gdt(raw_smp_processor_id());
}
-static void vmx_load_host_state(struct vcpu_vmx *vmx)
+#ifdef CONFIG_X86_64
+static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
{
- preempt_disable();
- __vmx_load_host_state(vmx);
- preempt_enable();
+ if (is_long_mode(&vmx->vcpu)) {
+ preempt_disable();
+ if (vmx->loaded_cpu_state)
+ rdmsrl(MSR_KERNEL_GS_BASE,
+ vmx->msr_guest_kernel_gs_base);
+ preempt_enable();
+ }
+ return vmx->msr_guest_kernel_gs_base;
}
+static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
+{
+ if (is_long_mode(&vmx->vcpu)) {
+ preempt_disable();
+ if (vmx->loaded_cpu_state)
+ wrmsrl(MSR_KERNEL_GS_BASE, data);
+ preempt_enable();
+ }
+ vmx->msr_guest_kernel_gs_base = data;
+}
+#endif
+
static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
{
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -2991,7 +3138,7 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{
vmx_vcpu_pi_put(vcpu);
- __vmx_load_host_state(to_vmx(vcpu));
+ vmx_prepare_switch_to_host(to_vmx(vcpu));
}
static bool emulation_required(struct kvm_vcpu *vcpu)
@@ -3212,7 +3359,7 @@ static bool vmx_rdtscp_supported(void)
static bool vmx_invpcid_supported(void)
{
- return cpu_has_vmx_invpcid() && enable_ept;
+ return cpu_has_vmx_invpcid();
}
/*
@@ -3455,6 +3602,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING;
+ /*
+ * We can emulate "VMCS shadowing," even if the hardware
+ * doesn't support it.
+ */
+ msrs->secondary_ctls_high |=
+ SECONDARY_EXEC_SHADOW_VMCS;
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
@@ -3922,8 +4075,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_GS_BASE);
break;
case MSR_KERNEL_GS_BASE:
- vmx_load_host_state(vmx);
- msr_info->data = vmx->msr_guest_kernel_gs_base;
+ msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
break;
#endif
case MSR_EFER:
@@ -4023,8 +4175,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_GS_BASE, data);
break;
case MSR_KERNEL_GS_BASE:
- vmx_load_host_state(vmx);
- vmx->msr_guest_kernel_gs_base = data;
+ vmx_write_guest_kernel_gs_base(vmx, data);
break;
#endif
case MSR_IA32_SYSENTER_CS:
@@ -4412,7 +4563,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_RDRAND_EXITING |
SECONDARY_EXEC_ENABLE_PML |
SECONDARY_EXEC_TSC_SCALING |
- SECONDARY_EXEC_ENABLE_VMFUNC;
+ SECONDARY_EXEC_ENABLE_VMFUNC |
+ SECONDARY_EXEC_ENCLS_EXITING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -4559,7 +4711,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
return 0;
}
-static struct vmcs *alloc_vmcs_cpu(int cpu)
+static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu)
{
int node = cpu_to_node(cpu);
struct page *pages;
@@ -4573,10 +4725,12 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
/* KVM supports Enlightened VMCS v1 only */
if (static_branch_unlikely(&enable_evmcs))
- vmcs->revision_id = KVM_EVMCS_VERSION;
+ vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
else
- vmcs->revision_id = vmcs_config.revision_id;
+ vmcs->hdr.revision_id = vmcs_config.revision_id;
+ if (shadow)
+ vmcs->hdr.shadow_vmcs = 1;
return vmcs;
}
@@ -4600,14 +4754,14 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
}
-static struct vmcs *alloc_vmcs(void)
+static struct vmcs *alloc_vmcs(bool shadow)
{
- return alloc_vmcs_cpu(raw_smp_processor_id());
+ return alloc_vmcs_cpu(shadow, raw_smp_processor_id());
}
static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
{
- loaded_vmcs->vmcs = alloc_vmcs();
+ loaded_vmcs->vmcs = alloc_vmcs(false);
if (!loaded_vmcs->vmcs)
return -ENOMEM;
@@ -4629,6 +4783,9 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
evmcs->hv_enlightenments_control.msr_bitmap = 1;
}
}
+
+ memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
+
return 0;
out_vmcs:
@@ -4738,7 +4895,7 @@ static __init int alloc_kvm_area(void)
for_each_possible_cpu(cpu) {
struct vmcs *vmcs;
- vmcs = alloc_vmcs_cpu(cpu);
+ vmcs = alloc_vmcs_cpu(false, cpu);
if (!vmcs) {
free_kvm_area();
return -ENOMEM;
@@ -4755,7 +4912,7 @@ static __init int alloc_kvm_area(void)
* physical CPU.
*/
if (static_branch_unlikely(&enable_evmcs))
- vmcs->revision_id = vmcs_config.revision_id;
+ vmcs->hdr.revision_id = vmcs_config.revision_id;
per_cpu(vmxarea, cpu) = vmcs;
}
@@ -4912,10 +5069,18 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
return;
/*
- * Force kernel_gs_base reloading before EFER changes, as control
- * of this msr depends on is_long_mode().
+ * MSR_KERNEL_GS_BASE is not intercepted when the guest is in
+ * 64-bit mode as a 64-bit kernel may frequently access the
+ * MSR. This means we need to manually save/restore the MSR
+ * when switching between guest and host state, but only if
+ * the guest is in 64-bit mode. Sync our cached value if the
+ * guest is transitioning to 32-bit mode and the CPU contains
+ * guest state, i.e. the cache is stale.
*/
- vmx_load_host_state(to_vmx(vcpu));
+#ifdef CONFIG_X86_64
+ if (!(efer & EFER_LMA))
+ (void)vmx_read_guest_kernel_gs_base(vmx);
+#endif
vcpu->arch.efer = efer;
if (efer & EFER_LMA) {
vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
@@ -4972,6 +5137,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
}
+static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
+{
+ int vpid = to_vmx(vcpu)->vpid;
+
+ if (!vpid_sync_vcpu_addr(vpid, addr))
+ vpid_sync_context(vpid);
+
+ /*
+ * If VPIDs are not supported or enabled, then the above is a no-op.
+ * But we don't really need a TLB flush in that case anyway, because
+ * each VM entry/exit includes an implicit flush when VPID is 0.
+ */
+}
+
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
{
ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -5153,6 +5332,7 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long guest_cr3;
u64 eptp;
@@ -5160,15 +5340,23 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (enable_ept) {
eptp = construct_eptp(vcpu, cr3);
vmcs_write64(EPT_POINTER, eptp);
+
+ if (kvm_x86_ops->tlb_remote_flush) {
+ spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ to_vmx(vcpu)->ept_pointer = eptp;
+ to_kvm_vmx(kvm)->ept_pointers_match
+ = EPT_POINTERS_CHECK;
+ spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
+ }
+
if (enable_unrestricted_guest || is_paging(vcpu) ||
is_guest_mode(vcpu))
guest_cr3 = kvm_read_cr3(vcpu);
else
- guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr;
+ guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
ept_load_pdptrs(vcpu);
}
- vmx_flush_tlb(vcpu, true);
vmcs_writel(GUEST_CR3, guest_cr3);
}
@@ -6104,19 +6292,19 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
*/
cr3 = __read_cr3();
vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
- vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
/* Save the most likely value for this task's CR4 in the VMCS. */
cr4 = cr4_read_shadow();
vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
- vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+ vmx->loaded_vmcs->host_state.cr4 = cr4;
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
#ifdef CONFIG_X86_64
/*
* Load null selectors, so we can avoid reloading them in
- * __vmx_load_host_state(), in case userspace uses the null selectors
- * too (the expected case).
+ * vmx_prepare_switch_to_host(), in case userspace uses
+ * the null selectors too (the expected case).
*/
vmcs_write16(HOST_DS_SELECTOR, 0);
vmcs_write16(HOST_ES_SELECTOR, 0);
@@ -6241,8 +6429,6 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (!enable_ept) {
exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
enable_unrestricted_guest = 0;
- /* Enable INVPCID for non-ept guests may cause performance regression. */
- exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
}
if (!enable_unrestricted_guest)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
@@ -6371,9 +6557,6 @@ static void ept_set_mmio_spte_mask(void)
*/
static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
-#ifdef CONFIG_X86_64
- unsigned long a;
-#endif
int i;
if (enable_shadow_vmcs) {
@@ -6428,15 +6611,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
vmx_set_constant_host_state(vmx);
-#ifdef CONFIG_X86_64
- rdmsrl(MSR_FS_BASE, a);
- vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
- rdmsrl(MSR_GS_BASE, a);
- vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */
-#else
vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
-#endif
if (cpu_has_vmx_vmfunc())
vmcs_write64(VM_FUNCTION_CONTROL, 0);
@@ -6485,6 +6661,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
+
+ if (cpu_has_vmx_encls_vmexit())
+ vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
}
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -7670,6 +7849,7 @@ static void vmx_enable_tdp(void)
static __init int hardware_setup(void)
{
+ unsigned long host_bndcfgs;
int r = -ENOMEM, i;
rdmsrl_safe(MSR_EFER, &host_efer);
@@ -7694,6 +7874,11 @@ static __init int hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
+ if (boot_cpu_has(X86_FEATURE_MPX)) {
+ rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
+ WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
+ }
+
if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
!(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
@@ -7730,6 +7915,12 @@ static __init int hardware_setup(void)
if (enable_ept && !cpu_has_vmx_ept_2m_page())
kvm_disable_largepages();
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
+ && enable_ept)
+ kvm_x86_ops->tlb_remote_flush = vmx_hv_remote_flush_tlb;
+#endif
+
if (!cpu_has_vmx_ple()) {
ple_gap = 0;
ple_window = 0;
@@ -7756,6 +7947,11 @@ static __init int hardware_setup(void)
else
kvm_disable_tdp();
+ if (!nested) {
+ kvm_x86_ops->get_nested_state = NULL;
+ kvm_x86_ops->set_nested_state = NULL;
+ }
+
/*
* Only enable PML when hardware supports PML feature, and both EPT
* and EPT A/D bit features are enabled -- PML depends on them to work.
@@ -8032,10 +8228,35 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer)
return 0;
}
+/*
+ * Allocate a shadow VMCS and associate it with the currently loaded
+ * VMCS, unless such a shadow VMCS already exists. The newly allocated
+ * VMCS is also VMCLEARed, so that it is ready for use.
+ */
+static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
+
+ /*
+ * We should allocate a shadow vmcs for vmcs01 only when L1
+ * executes VMXON and free it when L1 executes VMXOFF.
+ * As it is invalid to execute VMXON twice, we shouldn't reach
+ * here when vmcs01 already have an allocated shadow vmcs.
+ */
+ WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
+
+ if (!loaded_vmcs->shadow_vmcs) {
+ loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
+ if (loaded_vmcs->shadow_vmcs)
+ vmcs_clear(loaded_vmcs->shadow_vmcs);
+ }
+ return loaded_vmcs->shadow_vmcs;
+}
+
static int enter_vmx_operation(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct vmcs *shadow_vmcs;
int r;
r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
@@ -8046,16 +8267,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
if (!vmx->nested.cached_vmcs12)
goto out_cached_vmcs12;
- if (enable_shadow_vmcs) {
- shadow_vmcs = alloc_vmcs();
- if (!shadow_vmcs)
- goto out_shadow_vmcs;
- /* mark vmcs as shadow */
- shadow_vmcs->revision_id |= (1u << 31);
- /* init shadow vmcs */
- vmcs_clear(shadow_vmcs);
- vmx->vmcs01.shadow_vmcs = shadow_vmcs;
- }
+ vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
+ if (!vmx->nested.cached_shadow_vmcs12)
+ goto out_cached_shadow_vmcs12;
+
+ if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
+ goto out_shadow_vmcs;
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL_PINNED);
@@ -8067,6 +8284,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
return 0;
out_shadow_vmcs:
+ kfree(vmx->nested.cached_shadow_vmcs12);
+
+out_cached_shadow_vmcs12:
kfree(vmx->nested.cached_vmcs12);
out_cached_vmcs12:
@@ -8109,7 +8329,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
/* CPL=0 must be checked manually. */
if (vmx_get_cpl(vcpu)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
+ kvm_inject_gp(vcpu, 0);
return 1;
}
@@ -8172,15 +8392,16 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
*/
static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
{
- if (vmx_get_cpl(vcpu)) {
+ if (!to_vmx(vcpu)->nested.vmxon) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 0;
}
- if (!to_vmx(vcpu)->nested.vmxon) {
- kvm_queue_exception(vcpu, UD_VECTOR);
+ if (vmx_get_cpl(vcpu)) {
+ kvm_inject_gp(vcpu, 0);
return 0;
}
+
return 1;
}
@@ -8233,6 +8454,7 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->vmcs01.shadow_vmcs = NULL;
}
kfree(vmx->nested.cached_vmcs12);
+ kfree(vmx->nested.cached_shadow_vmcs12);
/* Unpin physical memory we referred to in the vmcs02 */
if (vmx->nested.apic_access_page) {
kvm_release_page_dirty(vmx->nested.apic_access_page);
@@ -8318,7 +8540,7 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
* some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
* 64-bit fields are to be returned).
*/
-static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
+static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
unsigned long field, u64 *ret)
{
short offset = vmcs_field_to_offset(field);
@@ -8327,7 +8549,7 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
if (offset < 0)
return offset;
- p = ((char *)(get_vmcs12(vcpu))) + offset;
+ p = (char *)vmcs12 + offset;
switch (vmcs_field_width(field)) {
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
@@ -8349,10 +8571,10 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
}
-static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
+static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
unsigned long field, u64 field_value){
short offset = vmcs_field_to_offset(field);
- char *p = ((char *) get_vmcs12(vcpu)) + offset;
+ char *p = (char *)vmcs12 + offset;
if (offset < 0)
return offset;
@@ -8405,7 +8627,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
field_value = __vmcs_readl(field);
- vmcs12_write_any(&vmx->vcpu, field, field_value);
+ vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value);
}
/*
* Skip the VM-exit information fields if they are read-only.
@@ -8440,7 +8662,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
for (q = 0; q < ARRAY_SIZE(fields); q++) {
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
- vmcs12_read_any(&vmx->vcpu, field, &field_value);
+ vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
__vmcs_writel(field, field_value);
}
}
@@ -8470,6 +8692,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
gva_t gva = 0;
+ struct vmcs12 *vmcs12;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -8477,10 +8700,24 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_vmcs12(vcpu))
return kvm_skip_emulated_instruction(vcpu);
+ if (!is_guest_mode(vcpu))
+ vmcs12 = get_vmcs12(vcpu);
+ else {
+ /*
+ * When vmcs->vmcs_link_pointer is -1ull, any VMREAD
+ * to shadowed-field sets the ALU flags for VMfailInvalid.
+ */
+ if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ vmcs12 = get_shadow_vmcs12(vcpu);
+ }
+
/* Decode instruction info and find the field to read */
field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
/* Read the field, zero-extended to a u64 field_value */
- if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
+ if (vmcs12_read_any(vmcs12, field, &field_value) < 0) {
nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
return kvm_skip_emulated_instruction(vcpu);
}
@@ -8522,6 +8759,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
*/
u64 field_value = 0;
struct x86_exception e;
+ struct vmcs12 *vmcs12;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -8556,23 +8794,44 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
- if (vmcs12_write_any(vcpu, field, field_value) < 0) {
+ if (!is_guest_mode(vcpu))
+ vmcs12 = get_vmcs12(vcpu);
+ else {
+ /*
+ * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE
+ * to shadowed-field sets the ALU flags for VMfailInvalid.
+ */
+ if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) {
+ nested_vmx_failInvalid(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ vmcs12 = get_shadow_vmcs12(vcpu);
+
+ }
+
+ if (vmcs12_write_any(vmcs12, field, field_value) < 0) {
nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
return kvm_skip_emulated_instruction(vcpu);
}
- switch (field) {
+ /*
+ * Do not track vmcs12 dirty-state if in guest-mode
+ * as we actually dirty shadow vmcs12 instead of vmcs12.
+ */
+ if (!is_guest_mode(vcpu)) {
+ switch (field) {
#define SHADOW_FIELD_RW(x) case x:
#include "vmx_shadow_fields.h"
- /*
- * The fields that can be updated by L1 without a vmexit are
- * always updated in the vmcs02, the others go down the slow
- * path of prepare_vmcs02.
- */
- break;
- default:
- vmx->nested.dirty_vmcs12 = true;
- break;
+ /*
+ * The fields that can be updated by L1 without a vmexit are
+ * always updated in the vmcs02, the others go down the slow
+ * path of prepare_vmcs02.
+ */
+ break;
+ default:
+ vmx->nested.dirty_vmcs12 = true;
+ break;
+ }
}
nested_vmx_succeed(vcpu);
@@ -8623,7 +8882,9 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
new_vmcs12 = kmap(page);
- if (new_vmcs12->revision_id != VMCS12_REVISION) {
+ if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
+ (new_vmcs12->hdr.shadow_vmcs &&
+ !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
kunmap(page);
kvm_release_page_clean(page);
nested_vmx_failValid(vcpu,
@@ -8821,6 +9082,105 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
+static int handle_invpcid(struct kvm_vcpu *vcpu)
+{
+ u32 vmx_instruction_info;
+ unsigned long type;
+ bool pcid_enabled;
+ gva_t gva;
+ struct x86_exception e;
+ unsigned i;
+ unsigned long roots_to_free = 0;
+ struct {
+ u64 pcid;
+ u64 gla;
+ } operand;
+
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+ if (type > 3) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ /* According to the Intel instruction reference, the memory operand
+ * is read even if it isn't needed (e.g., for type==all)
+ */
+ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+ vmx_instruction_info, false, &gva))
+ return 1;
+
+ if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
+ kvm_inject_page_fault(vcpu, &e);
+ return 1;
+ }
+
+ if (operand.pcid >> 12 != 0) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+
+ switch (type) {
+ case INVPCID_TYPE_INDIV_ADDR:
+ if ((!pcid_enabled && (operand.pcid != 0)) ||
+ is_noncanonical_address(operand.gla, vcpu)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
+ return kvm_skip_emulated_instruction(vcpu);
+
+ case INVPCID_TYPE_SINGLE_CTXT:
+ if (!pcid_enabled && (operand.pcid != 0)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ if (kvm_get_active_pcid(vcpu) == operand.pcid) {
+ kvm_mmu_sync_roots(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+ if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3)
+ == operand.pcid)
+ roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+
+ kvm_mmu_free_roots(vcpu, roots_to_free);
+ /*
+ * If neither the current cr3 nor any of the prev_roots use the
+ * given PCID, then nothing needs to be done here because a
+ * resync will happen anyway before switching to any other CR3.
+ */
+
+ return kvm_skip_emulated_instruction(vcpu);
+
+ case INVPCID_TYPE_ALL_NON_GLOBAL:
+ /*
+ * Currently, KVM doesn't mark global entries in the shadow
+ * page tables, so a non-global flush just degenerates to a
+ * global flush. If needed, we could optimize this later by
+ * keeping track of global entries in shadow page tables.
+ */
+
+ /* fall-through */
+ case INVPCID_TYPE_ALL_INCL_GLOBAL:
+ kvm_mmu_unload(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+
+ default:
+ BUG(); /* We have already checked above that type <= 3 */
+ }
+}
+
static int handle_pml_full(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
@@ -8970,6 +9330,17 @@ fail:
return 1;
}
+static int handle_encls(struct kvm_vcpu *vcpu)
+{
+ /*
+ * SGX virtualization is not yet supported. There is no software
+ * enable bit for SGX, so we have to trap ENCLS and inject a #UD
+ * to prevent the guest from executing ENCLS.
+ */
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -9024,8 +9395,10 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
+ [EXIT_REASON_INVPCID] = handle_invpcid,
[EXIT_REASON_VMFUNC] = handle_vmfunc,
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
+ [EXIT_REASON_ENCLS] = handle_encls,
};
static const int kvm_vmx_max_exit_handlers =
@@ -9196,6 +9569,30 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
return false;
}
+static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12, gpa_t bitmap)
+{
+ u32 vmx_instruction_info;
+ unsigned long field;
+ u8 b;
+
+ if (!nested_cpu_has_shadow_vmcs(vmcs12))
+ return true;
+
+ /* Decode instruction info and find the field to access */
+ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
+
+ /* Out-of-range fields always cause a VM exit from L2 to L1 */
+ if (field >> 15)
+ return true;
+
+ if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
+ return true;
+
+ return 1 & (b >> (field & 7));
+}
+
/*
* Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we
* should handle it ourselves in L0 (and then continue L2). Only call this
@@ -9280,10 +9677,15 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
+ case EXIT_REASON_VMREAD:
+ return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
+ vmcs12->vmread_bitmap);
+ case EXIT_REASON_VMWRITE:
+ return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
+ vmcs12->vmwrite_bitmap);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
- case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
- case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
+ case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
/*
@@ -9367,6 +9769,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
case EXIT_REASON_VMFUNC:
/* VM functions are emulated through L2->L0 vmexits. */
return false;
+ case EXIT_REASON_ENCLS:
+ /* SGX is never exposed to L1 */
+ return false;
default:
return true;
}
@@ -10244,15 +10649,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
cr3 = __get_current_cr3_fast();
- if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
+ if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
vmcs_writel(HOST_CR3, cr3);
- vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
+ vmx->loaded_vmcs->host_state.cr3 = cr3;
}
cr4 = cr4_read_shadow();
- if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
+ if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
vmcs_writel(HOST_CR4, cr4);
- vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
+ vmx->loaded_vmcs->host_state.cr4 = cr4;
}
/* When single-stepping over STI and MOV SS, we must clear the
@@ -10448,9 +10853,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* The sysexit path does not restore ds/es, so we must set them to
* a reasonable value ourselves.
*
- * We can't defer this to vmx_load_host_state() since that function
- * may be executed in interrupt context, which saves and restore segments
- * around it, nullifying its effect.
+ * We can't defer this to vmx_prepare_switch_to_host() since that
+ * function may be executed in interrupt context, which saves and
+ * restore segments around it, nullifying its effect.
*/
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
@@ -10511,8 +10916,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
return;
cpu = get_cpu();
- vmx->loaded_vmcs = vmcs;
vmx_vcpu_put(vcpu);
+ vmx->loaded_vmcs = vmcs;
vmx_vcpu_load(vcpu, cpu);
put_cpu();
}
@@ -10652,6 +11057,8 @@ free_vcpu:
static int vmx_vm_init(struct kvm *kvm)
{
+ spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
+
if (!ple_gap)
kvm->arch.pause_in_guest = true;
@@ -10876,11 +11283,11 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu)))
return 1;
- kvm_mmu_unload(vcpu);
kvm_init_shadow_ept_mmu(vcpu,
to_vmx(vcpu)->nested.msrs.ept_caps &
VMX_EPT_EXECUTE_ONLY_BIT,
- nested_ept_ad_enabled(vcpu));
+ nested_ept_ad_enabled(vcpu),
+ nested_ept_get_cr3(vcpu));
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
@@ -10928,9 +11335,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12);
-static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
- struct vmcs12 *vmcs12)
+static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct page *page;
u64 hpa;
@@ -11171,6 +11578,38 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
return true;
}
+static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ struct vmcs12 *shadow;
+ struct page *page;
+
+ if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
+ vmcs12->vmcs_link_pointer == -1ull)
+ return;
+
+ shadow = get_shadow_vmcs12(vcpu);
+ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
+
+ memcpy(shadow, kmap(page), VMCS12_SIZE);
+
+ kunmap(page);
+ kvm_release_page_clean(page);
+}
+
+static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
+ vmcs12->vmcs_link_pointer == -1ull)
+ return;
+
+ kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
+ get_shadow_vmcs12(vcpu), VMCS12_SIZE);
+}
+
static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -11228,11 +11667,12 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
unsigned long count_field,
unsigned long addr_field)
{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
int maxphyaddr;
u64 count, addr;
- if (vmcs12_read_any(vcpu, count_field, &count) ||
- vmcs12_read_any(vcpu, addr_field, &addr)) {
+ if (vmcs12_read_any(vmcs12, count_field, &count) ||
+ vmcs12_read_any(vmcs12, addr_field, &addr)) {
WARN_ON(1);
return -EINVAL;
}
@@ -11282,6 +11722,19 @@ static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
return 0;
}
+static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ if (!nested_cpu_has_shadow_vmcs(vmcs12))
+ return 0;
+
+ if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) ||
+ !page_address_valid(vcpu, vmcs12->vmwrite_bitmap))
+ return -EINVAL;
+
+ return 0;
+}
+
static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
struct vmx_msr_entry *e)
{
@@ -11431,12 +11884,16 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
return 1;
}
}
-
- vcpu->arch.cr3 = cr3;
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
}
- kvm_mmu_reset_context(vcpu);
+ if (!nested_ept)
+ kvm_mmu_new_cr3(vcpu, cr3, false);
+
+ vcpu->arch.cr3 = cr3;
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+
+ kvm_init_mmu(vcpu, false);
+
return 0;
}
@@ -11523,7 +11980,8 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* Set host-state according to L0's settings (vmcs12 is irrelevant here)
* Some constant fields are set here by vmx_set_constant_host_state().
* Other fields are different per CPU, and will be set later when
- * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
+ * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest()
+ * is called.
*/
vmx_set_constant_host_state(vmx);
@@ -11595,11 +12053,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
- /*
- * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR,
- * HOST_FS_BASE, HOST_GS_BASE.
- */
-
if (vmx->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
@@ -11664,6 +12117,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control |= vmcs12_exec_ctrl;
}
+ /* VMCS shadowing for L2 is emulated for now */
+ exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
+
if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
vmcs_write16(GUEST_INTR_STATUS,
vmcs12->guest_intr_status);
@@ -11676,6 +12132,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
vmcs_write64(APIC_ACCESS_ADDR, -1ull);
+ if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
+ vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
+
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
@@ -11883,6 +12342,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (nested_vmx_check_pml_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
vmx->nested.msrs.procbased_ctls_low,
vmx->nested.msrs.procbased_ctls_high) ||
@@ -11983,6 +12445,33 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
return 0;
}
+static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ int r;
+ struct page *page;
+ struct vmcs12 *shadow;
+
+ if (vmcs12->vmcs_link_pointer == -1ull)
+ return 0;
+
+ if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))
+ return -EINVAL;
+
+ page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer);
+ if (is_error_page(page))
+ return -EINVAL;
+
+ r = 0;
+ shadow = kmap(page);
+ if (shadow->hdr.revision_id != VMCS12_REVISION ||
+ shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))
+ r = -EINVAL;
+ kunmap(page);
+ kvm_release_page_clean(page);
+ return r;
+}
+
static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
u32 *exit_qual)
{
@@ -11994,8 +12483,7 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
return 1;
- if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS) &&
- vmcs12->vmcs_link_pointer != -1ull) {
+ if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
*exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
return 1;
}
@@ -12042,12 +12530,17 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
return 0;
}
-static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
+/*
+ * If exit_qual is NULL, this is being called from state restore (either RSM
+ * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
+ */
+static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- u32 exit_qual;
- int r;
+ bool from_vmentry = !!exit_qual;
+ u32 dummy_exit_qual;
+ int r = 0;
enter_guest_mode(vcpu);
@@ -12061,17 +12554,28 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
vcpu->arch.tsc_offset += vmcs12->tsc_offset;
r = EXIT_REASON_INVALID_STATE;
- if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
+ if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual))
goto fail;
- nested_get_vmcs12_pages(vcpu, vmcs12);
+ if (from_vmentry) {
+ nested_get_vmcs12_pages(vcpu);
- r = EXIT_REASON_MSR_LOAD_FAIL;
- exit_qual = nested_vmx_load_msr(vcpu,
- vmcs12->vm_entry_msr_load_addr,
- vmcs12->vm_entry_msr_load_count);
- if (exit_qual)
- goto fail;
+ r = EXIT_REASON_MSR_LOAD_FAIL;
+ *exit_qual = nested_vmx_load_msr(vcpu,
+ vmcs12->vm_entry_msr_load_addr,
+ vmcs12->vm_entry_msr_load_count);
+ if (*exit_qual)
+ goto fail;
+ } else {
+ /*
+ * The MMU is not initialized to point at the right entities yet and
+ * "get pages" would need to read data from the guest (i.e. we will
+ * need to perform gpa to hpa translation). Request a call
+ * to nested_get_vmcs12_pages before the next VM-entry. The MSRs
+ * have already been set at vmentry time and should not be reset.
+ */
+ kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
+ }
/*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -12086,8 +12590,7 @@ fail:
vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
leave_guest_mode(vcpu);
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual);
- return 1;
+ return r;
}
/*
@@ -12110,6 +12613,17 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
vmcs12 = get_vmcs12(vcpu);
+ /*
+ * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
+ * that there *is* a valid VMCS pointer, RFLAGS.CF is set
+ * rather than RFLAGS.ZF, and no error number is stored to the
+ * VM-instruction error field.
+ */
+ if (vmcs12->hdr.shadow_vmcs) {
+ nested_vmx_failInvalid(vcpu);
+ goto out;
+ }
+
if (enable_shadow_vmcs)
copy_shadow_to_vmcs12(vmx);
@@ -12164,16 +12678,29 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
*/
vmx->nested.nested_run_pending = 1;
- ret = enter_vmx_non_root_mode(vcpu);
+ ret = enter_vmx_non_root_mode(vcpu, &exit_qual);
if (ret) {
+ nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual);
vmx->nested.nested_run_pending = 0;
- return ret;
+ return 1;
}
/* Hide L1D cache contents from the nested guest. */
vmx->vcpu.arch.l1tf_flush_l1d = true;
/*
+ * Must happen outside of enter_vmx_non_root_mode() as it will
+ * also be used as part of restoring nVMX state for
+ * snapshot restore (migration).
+ *
+ * In this flow, it is assumed that vmcs12 cache was
+ * trasferred as part of captured nVMX state and should
+ * therefore not be read from guest memory (which may not
+ * exist on destination host yet).
+ */
+ nested_cache_shadow_vmcs12(vcpu, vmcs12);
+
+ /*
* If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
* by event injection, halt vcpu.
*/
@@ -12682,6 +13209,17 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
exit_qualification);
+ /*
+ * Must happen outside of sync_vmcs12() as it will
+ * also be used to capture vmcs12 cache as part of
+ * capturing nVMX state for snapshot (migration).
+ *
+ * Otherwise, this flush will dirty guest memory at a
+ * point it is already assumed by user-space to be
+ * immutable.
+ */
+ nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
+
if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
vmcs12->vm_exit_msr_store_count))
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
@@ -13256,7 +13794,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
if (vmx->nested.smm.guest_mode) {
vcpu->arch.hflags &= ~HF_SMM_MASK;
- ret = enter_vmx_non_root_mode(vcpu);
+ ret = enter_vmx_non_root_mode(vcpu, NULL);
vcpu->arch.hflags |= HF_SMM_MASK;
if (ret)
return ret;
@@ -13271,6 +13809,199 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
return 0;
}
+static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ u32 user_data_size)
+{
+ struct vcpu_vmx *vmx;
+ struct vmcs12 *vmcs12;
+ struct kvm_nested_state kvm_state = {
+ .flags = 0,
+ .format = 0,
+ .size = sizeof(kvm_state),
+ .vmx.vmxon_pa = -1ull,
+ .vmx.vmcs_pa = -1ull,
+ };
+
+ if (!vcpu)
+ return kvm_state.size + 2 * VMCS12_SIZE;
+
+ vmx = to_vmx(vcpu);
+ vmcs12 = get_vmcs12(vcpu);
+ if (nested_vmx_allowed(vcpu) &&
+ (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
+ kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
+ kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
+
+ if (vmx->nested.current_vmptr != -1ull) {
+ kvm_state.size += VMCS12_SIZE;
+
+ if (is_guest_mode(vcpu) &&
+ nested_cpu_has_shadow_vmcs(vmcs12) &&
+ vmcs12->vmcs_link_pointer != -1ull)
+ kvm_state.size += VMCS12_SIZE;
+ }
+
+ if (vmx->nested.smm.vmxon)
+ kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
+
+ if (vmx->nested.smm.guest_mode)
+ kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
+
+ if (is_guest_mode(vcpu)) {
+ kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
+
+ if (vmx->nested.nested_run_pending)
+ kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+ }
+ }
+
+ if (user_data_size < kvm_state.size)
+ goto out;
+
+ if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
+ return -EFAULT;
+
+ if (vmx->nested.current_vmptr == -1ull)
+ goto out;
+
+ /*
+ * When running L2, the authoritative vmcs12 state is in the
+ * vmcs02. When running L1, the authoritative vmcs12 state is
+ * in the shadow vmcs linked to vmcs01, unless
+ * sync_shadow_vmcs is set, in which case, the authoritative
+ * vmcs12 state is in the vmcs12 already.
+ */
+ if (is_guest_mode(vcpu))
+ sync_vmcs12(vcpu, vmcs12);
+ else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs)
+ copy_shadow_to_vmcs12(vmx);
+
+ if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
+ return -EFAULT;
+
+ if (nested_cpu_has_shadow_vmcs(vmcs12) &&
+ vmcs12->vmcs_link_pointer != -1ull) {
+ if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE,
+ get_shadow_vmcs12(vcpu), sizeof(*vmcs12)))
+ return -EFAULT;
+ }
+
+out:
+ return kvm_state.size;
+}
+
+static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state __user *user_kvm_nested_state,
+ struct kvm_nested_state *kvm_state)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmcs12 *vmcs12;
+ u32 exit_qual;
+ int ret;
+
+ if (kvm_state->format != 0)
+ return -EINVAL;
+
+ if (!nested_vmx_allowed(vcpu))
+ return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
+
+ if (kvm_state->vmx.vmxon_pa == -1ull) {
+ if (kvm_state->vmx.smm.flags)
+ return -EINVAL;
+
+ if (kvm_state->vmx.vmcs_pa != -1ull)
+ return -EINVAL;
+
+ vmx_leave_nested(vcpu);
+ return 0;
+ }
+
+ if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
+ return -EINVAL;
+
+ if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+ return -EINVAL;
+
+ if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
+ !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
+ return -EINVAL;
+
+ if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+ (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+ return -EINVAL;
+
+ if (kvm_state->vmx.smm.flags &
+ ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
+ return -EINVAL;
+
+ if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+ !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
+ return -EINVAL;
+
+ vmx_leave_nested(vcpu);
+ if (kvm_state->vmx.vmxon_pa == -1ull)
+ return 0;
+
+ vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
+ ret = enter_vmx_operation(vcpu);
+ if (ret)
+ return ret;
+
+ set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
+
+ if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
+ vmx->nested.smm.vmxon = true;
+ vmx->nested.vmxon = false;
+
+ if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
+ vmx->nested.smm.guest_mode = true;
+ }
+
+ vmcs12 = get_vmcs12(vcpu);
+ if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12)))
+ return -EFAULT;
+
+ if (vmcs12->hdr.revision_id != VMCS12_REVISION)
+ return -EINVAL;
+
+ if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+ return 0;
+
+ vmx->nested.nested_run_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
+ if (nested_cpu_has_shadow_vmcs(vmcs12) &&
+ vmcs12->vmcs_link_pointer != -1ull) {
+ struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
+ if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
+ return -EINVAL;
+
+ if (copy_from_user(shadow_vmcs12,
+ user_kvm_nested_state->data + VMCS12_SIZE,
+ sizeof(*vmcs12)))
+ return -EFAULT;
+
+ if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
+ !shadow_vmcs12->hdr.shadow_vmcs)
+ return -EINVAL;
+ }
+
+ if (check_vmentry_prereqs(vcpu, vmcs12) ||
+ check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+ return -EINVAL;
+
+ if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
+ vmx->nested.nested_run_pending = 1;
+
+ vmx->nested.dirty_vmcs12 = true;
+ ret = enter_vmx_non_root_mode(vcpu, NULL);
+ if (ret)
+ return -EINVAL;
+
+ return 0;
+}
+
static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -13290,7 +14021,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.vcpu_free = vmx_free_vcpu,
.vcpu_reset = vmx_vcpu_reset,
- .prepare_guest_switch = vmx_save_host_state,
+ .prepare_guest_switch = vmx_prepare_switch_to_guest,
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
@@ -13323,6 +14054,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.set_rflags = vmx_set_rflags,
.tlb_flush = vmx_flush_tlb,
+ .tlb_flush_gva = vmx_flush_tlb_gva,
.run = vmx_vcpu_run,
.handle_exit = vmx_handle_exit,
@@ -13405,6 +14137,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.setup_mce = vmx_setup_mce,
+ .get_nested_state = vmx_get_nested_state,
+ .set_nested_state = vmx_set_nested_state,
+ .get_vmcs12_pages = nested_get_vmcs12_pages,
+
.smi_allowed = vmx_smi_allowed,
.pre_enter_smm = vmx_pre_enter_smm,
.pre_leave_smm = vmx_pre_leave_smm,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a5caa5e5480c..506bd2b4b8bb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -848,16 +848,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
+ bool skip_tlb_flush = false;
#ifdef CONFIG_X86_64
bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
- if (pcid_enabled)
- cr3 &= ~CR3_PCID_INVD;
+ if (pcid_enabled) {
+ skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
+ cr3 &= ~X86_CR3_PCID_NOFLUSH;
+ }
#endif
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
- kvm_mmu_sync_roots(vcpu);
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ if (!skip_tlb_flush) {
+ kvm_mmu_sync_roots(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
return 0;
}
@@ -868,9 +873,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1;
+ kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
vcpu->arch.cr3 = cr3;
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
- kvm_mmu_new_cr3(vcpu);
+
return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2185,10 +2191,11 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.mcg_status = data;
break;
case MSR_IA32_MCG_CTL:
- if (!(mcg_cap & MCG_CTL_P))
+ if (!(mcg_cap & MCG_CTL_P) &&
+ (data || !msr_info->host_initiated))
return 1;
if (data != 0 && data != ~(u64)0)
- return -1;
+ return 1;
vcpu->arch.mcg_ctl = data;
break;
default:
@@ -2576,7 +2583,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
}
EXPORT_SYMBOL_GPL(kvm_get_msr);
-static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
{
u64 data;
u64 mcg_cap = vcpu->arch.mcg_cap;
@@ -2591,7 +2598,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
data = vcpu->arch.mcg_cap;
break;
case MSR_IA32_MCG_CTL:
- if (!(mcg_cap & MCG_CTL_P))
+ if (!(mcg_cap & MCG_CTL_P) && !host)
return 1;
data = vcpu->arch.mcg_ctl;
break;
@@ -2724,7 +2731,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
- return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
+ return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
+ msr_info->host_initiated);
case MSR_K7_CLK_CTL:
/*
* Provide expected ramp-up count for K7. All other
@@ -2745,7 +2753,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_TSC_EMULATION_CONTROL:
case HV_X64_MSR_TSC_EMULATION_STATUS:
return kvm_hv_get_msr_common(vcpu,
- msr_info->index, &msr_info->data);
+ msr_info->index, &msr_info->data,
+ msr_info->host_initiated);
break;
case MSR_IA32_BBL_CR_CTL3:
/* This legacy MSR exists but isn't fully documented in current
@@ -2969,6 +2978,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_X2APIC_API:
r = KVM_X2APIC_API_VALID_FLAGS;
break;
+ case KVM_CAP_NESTED_STATE:
+ r = kvm_x86_ops->get_nested_state ?
+ kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
+ break;
default:
break;
}
@@ -3985,6 +3998,56 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
+ case KVM_GET_NESTED_STATE: {
+ struct kvm_nested_state __user *user_kvm_nested_state = argp;
+ u32 user_data_size;
+
+ r = -EINVAL;
+ if (!kvm_x86_ops->get_nested_state)
+ break;
+
+ BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
+ if (get_user(user_data_size, &user_kvm_nested_state->size))
+ return -EFAULT;
+
+ r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
+ user_data_size);
+ if (r < 0)
+ return r;
+
+ if (r > user_data_size) {
+ if (put_user(r, &user_kvm_nested_state->size))
+ return -EFAULT;
+ return -E2BIG;
+ }
+ r = 0;
+ break;
+ }
+ case KVM_SET_NESTED_STATE: {
+ struct kvm_nested_state __user *user_kvm_nested_state = argp;
+ struct kvm_nested_state kvm_state;
+
+ r = -EINVAL;
+ if (!kvm_x86_ops->set_nested_state)
+ break;
+
+ if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
+ return -EFAULT;
+
+ if (kvm_state.size < sizeof(kvm_state))
+ return -EINVAL;
+
+ if (kvm_state.flags &
+ ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
+ return -EINVAL;
+
+ /* nested_run_pending implies guest_mode. */
+ if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
+ return -EINVAL;
+
+ r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+ break;
+ }
default:
r = -EINVAL;
}
@@ -6503,20 +6566,22 @@ static void kvm_set_mmio_spte_mask(void)
* Set the reserved bits and the present bit of an paging-structure
* entry to generate page fault with PFER.RSV = 1.
*/
- /* Mask the reserved physical address bits. */
- mask = rsvd_bits(maxphyaddr, 51);
+
+ /*
+ * Mask the uppermost physical address bit, which would be reserved as
+ * long as the supported physical address width is less than 52.
+ */
+ mask = 1ull << 51;
/* Set the present bit. */
mask |= 1ull;
-#ifdef CONFIG_X86_64
/*
* If reserved bit is not supported, clear the present bit to disable
* mmio page fault.
*/
- if (maxphyaddr == 52)
+ if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
mask &= ~1ull;
-#endif
kvm_mmu_set_mmio_spte_mask(mask, mask);
}
@@ -6769,6 +6834,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_CLOCK_PAIRING:
ret = kvm_pv_clock_pairing(vcpu, a0, a1);
break;
+ case KVM_HC_SEND_IPI:
+ ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
+ break;
#endif
default:
ret = -KVM_ENOSYS;
@@ -7235,8 +7303,9 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end)
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end,
+ bool blockable)
{
unsigned long apic_address;
@@ -7247,6 +7316,8 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (start <= apic_address && apic_address < end)
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+
+ return 0;
}
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
@@ -7287,6 +7358,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) {
+ if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
+ kvm_x86_ops->get_vmcs12_pages(vcpu);
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -7302,6 +7375,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
+ if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
+ kvm_mmu_load_cr3(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
kvm_vcpu_flush_tlb(vcpu, true);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
@@ -8013,6 +8088,10 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
+ (sregs->cr4 & X86_CR4_OSXSAVE))
+ return -EINVAL;
+
if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
/*
* When EFER.LME and CR0.PG are set, the processor is in
@@ -8043,10 +8122,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
struct desc_ptr dt;
int ret = -EINVAL;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
- (sregs->cr4 & X86_CR4_OSXSAVE))
- goto out;
-
if (kvm_valid_sregs(vcpu, sregs))
goto out;
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 801491e98890..04d038f3b6fa 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -4,12 +4,15 @@ config ZONE_DMA
config XTENSA
def_bool y
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select COMMON_CLK
+ select DMA_NONCOHERENT_OPS
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_IRQ_SHOW
@@ -72,9 +75,6 @@ config TRACE_IRQFLAGS_SUPPORT
config MMU
def_bool n
-config VARIANT_IRQ_SWITCH
- def_bool n
-
config HAVE_XTENSA_GPIO32
def_bool n
@@ -244,6 +244,23 @@ config INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX
If in doubt, say Y.
+config MEMMAP_CACHEATTR
+ hex "Cache attributes for the memory address space"
+ depends on !MMU
+ default 0x22222222
+ help
+ These cache attributes are set up for noMMU systems. Each hex digit
+ specifies cache attributes for the corresponding 512MB memory
+ region: bits 0..3 -- for addresses 0x00000000..0x1fffffff,
+ bits 4..7 -- for addresses 0x20000000..0x3fffffff, and so on.
+
+ Cache attribute values are specific for the MMU type, so e.g.
+ for region protection MMUs: 2 is cache bypass, 4 is WB cached,
+ 1 is WT cached, f is illegal. For ful MMU: bit 0 makes it executable,
+ bit 1 makes it writable, bits 2..3 meaning is 0: cache bypass,
+ 1: WB cache, 2: WT cache, 3: special (c and e are illegal, f is
+ reserved).
+
config KSEG_PADDR
hex "Physical address of the KSEG mapping"
depends on INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX && MMU
@@ -413,6 +430,10 @@ config XTENSA_PLATFORM_XTFPGA
endchoice
+config PLATFORM_NR_IRQS
+ int
+ default 3 if XTENSA_PLATFORM_XT2000
+ default 0
config XTENSA_CPU_CLOCK
int "CPU clock rate [MHz]"
@@ -450,6 +471,15 @@ config BUILTIN_DTB
string "DTB to build into the kernel image"
depends on OF
+config PARSE_BOOTPARAM
+ bool "Parse bootparam block"
+ default y
+ help
+ Parse parameters passed to the kernel from the bootloader. It may
+ be disabled if the kernel is known to run without the bootloader.
+
+ If unsure, say Y.
+
config BLK_DEV_SIMDISK
tristate "Host file-based simulated block device support"
default n
@@ -506,25 +536,13 @@ config PLATFORM_WANT_DEFAULT_MEM
def_bool n
config DEFAULT_MEM_START
- hex "Physical address of the default memory area start"
- depends on PLATFORM_WANT_DEFAULT_MEM
- default 0x00000000 if MMU
- default 0x60000000 if !MMU
- help
- This is the base address of the default memory area.
- Default memory area has platform-specific meaning, it may be used
- for e.g. early cache initialization.
-
- If unsure, leave the default value here.
-
-config DEFAULT_MEM_SIZE
- hex "Maximal size of the default memory area"
- depends on PLATFORM_WANT_DEFAULT_MEM
- default 0x04000000
+ hex
+ prompt "PAGE_OFFSET/PHYS_OFFSET" if !MMU && PLATFORM_WANT_DEFAULT_MEM
+ default 0x60000000 if PLATFORM_WANT_DEFAULT_MEM
+ default 0x00000000
help
- This is the size of the default memory area.
- Default memory area has platform-specific meaning, it may be used
- for e.g. early cache initialization.
+ This is the base address used for both PAGE_OFFSET and PHYS_OFFSET
+ in noMMU configurations.
If unsure, leave the default value here.
diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S
index b6aa85328ac0..29c68426ab56 100644
--- a/arch/xtensa/boot/boot-elf/bootstrap.S
+++ b/arch/xtensa/boot/boot-elf/bootstrap.S
@@ -15,10 +15,6 @@
*/
#include <asm/bootparam.h>
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <asm/cacheasm.h>
#include <asm/initialize_mmu.h>
#include <asm/vectors.h>
#include <linux/linkage.h>
@@ -33,16 +29,18 @@ _ResetVector:
.begin no-absolute-literals
.literal_position
- .align 4
-RomInitAddr:
#if defined(CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX) && \
XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
- .word CONFIG_KERNEL_LOAD_ADDRESS
+ .literal RomInitAddr, CONFIG_KERNEL_LOAD_ADDRESS
#else
- .word KERNELOFFSET
+ .literal RomInitAddr, KERNELOFFSET
#endif
-RomBootParam:
- .word _bootparam
+#ifndef CONFIG_PARSE_BOOTPARAM
+ .literal RomBootParam, 0
+#else
+ .literal RomBootParam, _bootparam
+
+ .align 4
_bootparam:
.short BP_TAG_FIRST
.short 4
@@ -50,6 +48,7 @@ _bootparam:
.short BP_TAG_LAST
.short 0
.long 0
+#endif
.align 4
_SetupMMU:
diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig
index 624f9b3a3878..f3fc4f970ca8 100644
--- a/arch/xtensa/configs/nommu_kc705_defconfig
+++ b/arch/xtensa/configs/nommu_kc705_defconfig
@@ -33,13 +33,13 @@ CONFIG_XTENSA_VARIANT_CUSTOM_NAME="de212"
# CONFIG_XTENSA_VARIANT_MMU is not set
CONFIG_XTENSA_UNALIGNED_USER=y
CONFIG_PREEMPT=y
+CONFIG_MEMMAP_CACHEATTR=0xfff2442f
# CONFIG_PCI is not set
CONFIG_XTENSA_PLATFORM_XTFPGA=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0x9d050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=256M@0x60000000"
CONFIG_USE_OF=y
CONFIG_BUILTIN_DTB="kc705_nommu"
-CONFIG_DEFAULT_MEM_SIZE=0x10000000
CONFIG_BINFMT_FLAT=y
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index e5e1e61c538c..82c756431b49 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += compat.h
generic-y += device.h
generic-y += div64.h
generic-y += dma-contiguous.h
+generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += exec.h
generic-y += extable.h
diff --git a/arch/xtensa/include/asm/cacheasm.h b/arch/xtensa/include/asm/cacheasm.h
index 2041abb10a23..34545ecfdd6b 100644
--- a/arch/xtensa/include/asm/cacheasm.h
+++ b/arch/xtensa/include/asm/cacheasm.h
@@ -31,16 +31,32 @@
*
*/
- .macro __loop_cache_all ar at insn size line_width
- movi \ar, 0
+ .macro __loop_cache_unroll ar at insn size line_width max_immed
+
+ .if (1 << (\line_width)) > (\max_immed)
+ .set _reps, 1
+ .elseif (2 << (\line_width)) > (\max_immed)
+ .set _reps, 2
+ .else
+ .set _reps, 4
+ .endif
+
+ __loopi \ar, \at, \size, (_reps << (\line_width))
+ .set _index, 0
+ .rep _reps
+ \insn \ar, _index << (\line_width)
+ .set _index, _index + 1
+ .endr
+ __endla \ar, \at, _reps << (\line_width)
+
+ .endm
+
- __loopi \ar, \at, \size, (4 << (\line_width))
- \insn \ar, 0 << (\line_width)
- \insn \ar, 1 << (\line_width)
- \insn \ar, 2 << (\line_width)
- \insn \ar, 3 << (\line_width)
- __endla \ar, \at, 4 << (\line_width)
+ .macro __loop_cache_all ar at insn size line_width max_immed
+
+ movi \ar, 0
+ __loop_cache_unroll \ar, \at, \insn, \size, \line_width, \max_immed
.endm
@@ -57,14 +73,9 @@
.endm
- .macro __loop_cache_page ar at insn line_width
+ .macro __loop_cache_page ar at insn line_width max_immed
- __loopi \ar, \at, PAGE_SIZE, 4 << (\line_width)
- \insn \ar, 0 << (\line_width)
- \insn \ar, 1 << (\line_width)
- \insn \ar, 2 << (\line_width)
- \insn \ar, 3 << (\line_width)
- __endla \ar, \at, 4 << (\line_width)
+ __loop_cache_unroll \ar, \at, \insn, PAGE_SIZE, \line_width, \max_immed
.endm
@@ -72,7 +83,8 @@
.macro ___unlock_dcache_all ar at
#if XCHAL_DCACHE_LINE_LOCKABLE && XCHAL_DCACHE_SIZE
- __loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH
+ __loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE \
+ XCHAL_DCACHE_LINEWIDTH 240
#endif
.endm
@@ -81,7 +93,8 @@
.macro ___unlock_icache_all ar at
#if XCHAL_ICACHE_LINE_LOCKABLE && XCHAL_ICACHE_SIZE
- __loop_cache_all \ar \at iiu XCHAL_ICACHE_SIZE XCHAL_ICACHE_LINEWIDTH
+ __loop_cache_all \ar \at iiu XCHAL_ICACHE_SIZE \
+ XCHAL_ICACHE_LINEWIDTH 240
#endif
.endm
@@ -90,7 +103,8 @@
.macro ___flush_invalidate_dcache_all ar at
#if XCHAL_DCACHE_SIZE
- __loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH
+ __loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE \
+ XCHAL_DCACHE_LINEWIDTH 240
#endif
.endm
@@ -99,7 +113,8 @@
.macro ___flush_dcache_all ar at
#if XCHAL_DCACHE_SIZE
- __loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH
+ __loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE \
+ XCHAL_DCACHE_LINEWIDTH 240
#endif
.endm
@@ -108,8 +123,8 @@
.macro ___invalidate_dcache_all ar at
#if XCHAL_DCACHE_SIZE
- __loop_cache_all \ar \at dii __stringify(DCACHE_WAY_SIZE) \
- XCHAL_DCACHE_LINEWIDTH
+ __loop_cache_all \ar \at dii XCHAL_DCACHE_SIZE \
+ XCHAL_DCACHE_LINEWIDTH 1020
#endif
.endm
@@ -118,8 +133,8 @@
.macro ___invalidate_icache_all ar at
#if XCHAL_ICACHE_SIZE
- __loop_cache_all \ar \at iii __stringify(ICACHE_WAY_SIZE) \
- XCHAL_ICACHE_LINEWIDTH
+ __loop_cache_all \ar \at iii XCHAL_ICACHE_SIZE \
+ XCHAL_ICACHE_LINEWIDTH 1020
#endif
.endm
@@ -166,7 +181,7 @@
.macro ___flush_invalidate_dcache_page ar as
#if XCHAL_DCACHE_SIZE
- __loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH
+ __loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH 1020
#endif
.endm
@@ -175,7 +190,7 @@
.macro ___flush_dcache_page ar as
#if XCHAL_DCACHE_SIZE
- __loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH
+ __loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH 1020
#endif
.endm
@@ -184,7 +199,7 @@
.macro ___invalidate_dcache_page ar as
#if XCHAL_DCACHE_SIZE
- __loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH
+ __loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH 1020
#endif
.endm
@@ -193,7 +208,7 @@
.macro ___invalidate_icache_page ar as
#if XCHAL_ICACHE_SIZE
- __loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH
+ __loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH 1020
#endif
.endm
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h
deleted file mode 100644
index 44098800dad7..000000000000
--- a/arch/xtensa/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2003 - 2005 Tensilica Inc.
- * Copyright (C) 2015 Cadence Design Systems Inc.
- */
-
-#ifndef _XTENSA_DMA_MAPPING_H
-#define _XTENSA_DMA_MAPPING_H
-
-#include <asm/cache.h>
-#include <asm/io.h>
-
-#include <linux/mm.h>
-#include <linux/scatterlist.h>
-
-extern const struct dma_map_ops xtensa_dma_map_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
- return &xtensa_dma_map_ops;
-}
-
-#endif /* _XTENSA_DMA_MAPPING_H */
diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h
index 42410f253597..10e9852b2fb4 100644
--- a/arch/xtensa/include/asm/initialize_mmu.h
+++ b/arch/xtensa/include/asm/initialize_mmu.h
@@ -177,36 +177,36 @@
#endif /* defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU &&
XCHAL_HAVE_SPANNING_WAY */
-#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS && \
- (XCHAL_DCACHE_SIZE || XCHAL_ICACHE_SIZE)
- /* Enable data and instruction cache in the DEFAULT_MEMORY region
- * if the processor has DTLB and ITLB.
- */
+ .endm
+
+ .macro initialize_cacheattr
- movi a5, PLATFORM_DEFAULT_MEM_START | XCHAL_SPANNING_WAY
+#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS
+#if CONFIG_MEMMAP_CACHEATTR == 0x22222222 && XCHAL_HAVE_PTP_MMU
+#error Default MEMMAP_CACHEATTR of 0x22222222 does not work with full MMU.
+#endif
+
+ movi a5, XCHAL_SPANNING_WAY
movi a6, ~_PAGE_ATTRIB_MASK
- movi a7, CA_WRITEBACK
+ movi a4, CONFIG_MEMMAP_CACHEATTR
movi a8, 0x20000000
- movi a9, PLATFORM_DEFAULT_MEM_SIZE
- j 2f
1:
- sub a9, a9, a8
-2:
-#if XCHAL_DCACHE_SIZE
rdtlb1 a3, a5
+ xor a3, a3, a4
and a3, a3, a6
- or a3, a3, a7
+ xor a3, a3, a4
wdtlb a3, a5
-#endif
-#if XCHAL_ICACHE_SIZE
- ritlb1 a4, a5
- and a4, a4, a6
- or a4, a4, a7
- witlb a4, a5
-#endif
+ ritlb1 a3, a5
+ xor a3, a3, a4
+ and a3, a3, a6
+ xor a3, a3, a4
+ witlb a3, a5
+
add a5, a5, a8
- bltu a8, a9, 1b
+ srli a4, a4, 4
+ bgeu a5, a8, 1b
+ isync
#endif
.endm
diff --git a/arch/xtensa/include/asm/irq.h b/arch/xtensa/include/asm/irq.h
index 19707db966f1..6c6ed23e0c79 100644
--- a/arch/xtensa/include/asm/irq.h
+++ b/arch/xtensa/include/asm/irq.h
@@ -12,32 +12,17 @@
#define _XTENSA_IRQ_H
#include <linux/init.h>
-#include <platform/hardware.h>
#include <variant/core.h>
-#ifdef CONFIG_VARIANT_IRQ_SWITCH
-#include <variant/irq.h>
+#ifdef CONFIG_PLATFORM_NR_IRQS
+# define PLATFORM_NR_IRQS CONFIG_PLATFORM_NR_IRQS
#else
-static inline void variant_irq_enable(unsigned int irq) { }
-static inline void variant_irq_disable(unsigned int irq) { }
-#endif
-
-#ifndef VARIANT_NR_IRQS
-# define VARIANT_NR_IRQS 0
-#endif
-#ifndef PLATFORM_NR_IRQS
# define PLATFORM_NR_IRQS 0
#endif
#define XTENSA_NR_IRQS XCHAL_NUM_INTERRUPTS
-#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS + 1)
+#define NR_IRQS (XTENSA_NR_IRQS + PLATFORM_NR_IRQS + 1)
#define XTENSA_PIC_LINUX_IRQ(hwirq) ((hwirq) + 1)
-#if VARIANT_NR_IRQS == 0
-static inline void variant_init_irq(void) { }
-#else
-void variant_init_irq(void) __init;
-#endif
-
static __inline__ int irq_canonicalize(int irq)
{
return (irq);
diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h
index 2317c835a4db..9c12babc016c 100644
--- a/arch/xtensa/include/asm/kmem_layout.h
+++ b/arch/xtensa/include/asm/kmem_layout.h
@@ -63,12 +63,6 @@
#error XCHAL_KSEG_PADDR is not properly aligned to XCHAL_KSEG_ALIGNMENT
#endif
-#else
-
-#define XCHAL_KSEG_CACHED_VADDR __XTENSA_UL_CONST(0xd0000000)
-#define XCHAL_KSEG_BYPASS_VADDR __XTENSA_UL_CONST(0xd8000000)
-#define XCHAL_KSEG_SIZE __XTENSA_UL_CONST(0x08000000)
-
#endif
#ifndef CONFIG_KASAN
diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 5d69c11c01b8..09c56cba442e 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -14,7 +14,6 @@
#include <asm/processor.h>
#include <asm/types.h>
#include <asm/cache.h>
-#include <platform/hardware.h>
#include <asm/kmem_layout.h>
/*
@@ -31,8 +30,8 @@
#define MAX_LOW_PFN (PHYS_PFN(XCHAL_KSEG_PADDR) + \
PHYS_PFN(XCHAL_KSEG_SIZE))
#else
-#define PAGE_OFFSET PLATFORM_DEFAULT_MEM_START
-#define PHYS_OFFSET PLATFORM_DEFAULT_MEM_START
+#define PAGE_OFFSET _AC(CONFIG_DEFAULT_MEM_START, UL)
+#define PHYS_OFFSET _AC(CONFIG_DEFAULT_MEM_START, UL)
#define MAX_LOW_PFN PHYS_PFN(0xfffffffful)
#endif
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 38802259978f..29cfe421cf41 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -66,6 +66,7 @@
#define FIRST_USER_ADDRESS 0UL
#define FIRST_USER_PGD_NR (FIRST_USER_ADDRESS >> PGDIR_SHIFT)
+#ifdef CONFIG_MMU
/*
* Virtual memory area. We keep a distance to other memory regions to be
* on the safe side. We also use this area for cache aliasing.
@@ -80,6 +81,13 @@
#define TLBTEMP_SIZE ICACHE_WAY_SIZE
#endif
+#else
+
+#define VMALLOC_START __XTENSA_UL_CONST(0)
+#define VMALLOC_END __XTENSA_UL_CONST(0xffffffff)
+
+#endif
+
/*
* For the Xtensa architecture, the PTE layout is as follows:
*
diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h
index f8fbef67bc5f..560483356a06 100644
--- a/arch/xtensa/include/asm/platform.h
+++ b/arch/xtensa/include/asm/platform.h
@@ -75,4 +75,31 @@ extern void platform_calibrate_ccount (void);
*/
void cpu_reset(void) __attribute__((noreturn));
+/*
+ * Memory caching is platform-dependent in noMMU xtensa configurations.
+ * The following set of functions should be implemented in platform code
+ * in order to enable coherent DMA memory operations when CONFIG_MMU is not
+ * enabled. Default implementations do nothing and issue a warning.
+ */
+
+/*
+ * Check whether p points to a cached memory.
+ */
+bool platform_vaddr_cached(const void *p);
+
+/*
+ * Check whether p points to an uncached memory.
+ */
+bool platform_vaddr_uncached(const void *p);
+
+/*
+ * Return pointer to an uncached view of the cached sddress p.
+ */
+void *platform_vaddr_to_uncached(void *p);
+
+/*
+ * Return pointer to a cached view of the uncached sddress p.
+ */
+void *platform_vaddr_to_cached(void *p);
+
#endif /* _XTENSA_PLATFORM_H */
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 5b0027d4ecc0..e4ccb88b7996 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -11,7 +11,6 @@
#define _XTENSA_PROCESSOR_H
#include <variant/core.h>
-#include <platform/hardware.h>
#include <linux/compiler.h>
#include <asm/ptrace.h>
diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h
index 65d3da9db19b..7111280c8842 100644
--- a/arch/xtensa/include/asm/vectors.h
+++ b/arch/xtensa/include/asm/vectors.h
@@ -19,7 +19,6 @@
#define _XTENSA_VECTORS_H
#include <variant/core.h>
-#include <platform/hardware.h>
#include <asm/kmem_layout.h>
#if XCHAL_HAVE_PTP_MMU
diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index 9c4e9433e536..2f76118ecf62 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -181,6 +181,8 @@ ENTRY(_startup)
isync
+ initialize_cacheattr
+
#ifdef CONFIG_HAVE_SMP
movi a2, CCON # MX External Register to Configure Cache
movi a3, 1
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index 18e4ef34ac45..a48bf2d10ac2 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -158,7 +158,6 @@ void __init init_IRQ(void)
#ifdef CONFIG_SMP
ipi_init();
#endif
- variant_init_irq();
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index a02dc563d290..1fc138b6bc0a 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -16,26 +16,25 @@
*/
#include <linux/dma-contiguous.h>
+#include <linux/dma-noncoherent.h>
#include <linux/dma-direct.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/string.h>
#include <linux/types.h>
#include <asm/cacheflush.h>
#include <asm/io.h>
+#include <asm/platform.h>
-static void do_cache_op(dma_addr_t dma_handle, size_t size,
+static void do_cache_op(phys_addr_t paddr, size_t size,
void (*fn)(unsigned long, unsigned long))
{
- unsigned long off = dma_handle & (PAGE_SIZE - 1);
- unsigned long pfn = PFN_DOWN(dma_handle);
+ unsigned long off = paddr & (PAGE_SIZE - 1);
+ unsigned long pfn = PFN_DOWN(paddr);
struct page *page = pfn_to_page(pfn);
if (!PageHighMem(page))
- fn((unsigned long)bus_to_virt(dma_handle), size);
+ fn((unsigned long)phys_to_virt(paddr), size);
else
while (size > 0) {
size_t sz = min_t(size_t, size, PAGE_SIZE - off);
@@ -49,14 +48,13 @@ static void do_cache_op(dma_addr_t dma_handle, size_t size,
}
}
-static void xtensa_sync_single_for_cpu(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
switch (dir) {
case DMA_BIDIRECTIONAL:
case DMA_FROM_DEVICE:
- do_cache_op(dma_handle, size, __invalidate_dcache_range);
+ do_cache_op(paddr, size, __invalidate_dcache_range);
break;
case DMA_NONE:
@@ -68,15 +66,14 @@ static void xtensa_sync_single_for_cpu(struct device *dev,
}
}
-static void xtensa_sync_single_for_device(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction dir)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
switch (dir) {
case DMA_BIDIRECTIONAL:
case DMA_TO_DEVICE:
if (XCHAL_DCACHE_IS_WRITEBACK)
- do_cache_op(dma_handle, size, __flush_dcache_range);
+ do_cache_op(paddr, size, __flush_dcache_range);
break;
case DMA_NONE:
@@ -88,43 +85,66 @@ static void xtensa_sync_single_for_device(struct device *dev,
}
}
-static void xtensa_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction dir)
+#ifdef CONFIG_MMU
+bool platform_vaddr_cached(const void *p)
{
- struct scatterlist *s;
- int i;
+ unsigned long addr = (unsigned long)p;
- for_each_sg(sg, s, nents, i) {
- xtensa_sync_single_for_cpu(dev, sg_dma_address(s),
- sg_dma_len(s), dir);
- }
+ return addr >= XCHAL_KSEG_CACHED_VADDR &&
+ addr - XCHAL_KSEG_CACHED_VADDR < XCHAL_KSEG_SIZE;
}
-static void xtensa_sync_sg_for_device(struct device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction dir)
+bool platform_vaddr_uncached(const void *p)
{
- struct scatterlist *s;
- int i;
+ unsigned long addr = (unsigned long)p;
- for_each_sg(sg, s, nents, i) {
- xtensa_sync_single_for_device(dev, sg_dma_address(s),
- sg_dma_len(s), dir);
- }
+ return addr >= XCHAL_KSEG_BYPASS_VADDR &&
+ addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE;
+}
+
+void *platform_vaddr_to_uncached(void *p)
+{
+ return p + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
+}
+
+void *platform_vaddr_to_cached(void *p)
+{
+ return p + XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
+}
+#else
+bool __attribute__((weak)) platform_vaddr_cached(const void *p)
+{
+ WARN_ONCE(1, "Default %s implementation is used\n", __func__);
+ return true;
+}
+
+bool __attribute__((weak)) platform_vaddr_uncached(const void *p)
+{
+ WARN_ONCE(1, "Default %s implementation is used\n", __func__);
+ return false;
+}
+
+void __attribute__((weak)) *platform_vaddr_to_uncached(void *p)
+{
+ WARN_ONCE(1, "Default %s implementation is used\n", __func__);
+ return p;
+}
+
+void __attribute__((weak)) *platform_vaddr_to_cached(void *p)
+{
+ WARN_ONCE(1, "Default %s implementation is used\n", __func__);
+ return p;
}
+#endif
/*
* Note: We assume that the full memory space is always mapped to 'kseg'
* Otherwise we have to use page attributes (not implemented).
*/
-static void *xtensa_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *handle, gfp_t flag,
- unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+ gfp_t flag, unsigned long attrs)
{
- unsigned long ret;
- unsigned long uncached;
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page *page = NULL;
@@ -147,6 +167,10 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
*handle = phys_to_dma(dev, page_to_phys(page));
+ if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+ return page;
+ }
+
#ifdef CONFIG_MMU
if (PageHighMem(page)) {
void *p;
@@ -161,27 +185,21 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
return p;
}
#endif
- ret = (unsigned long)page_address(page);
- BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
- ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
-
- uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
- __invalidate_dcache_range(ret, size);
-
- return (void *)uncached;
+ BUG_ON(!platform_vaddr_cached(page_address(page)));
+ __invalidate_dcache_range((unsigned long)page_address(page), size);
+ return platform_vaddr_to_uncached(page_address(page));
}
-static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, unsigned long attrs)
{
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- unsigned long addr = (unsigned long)vaddr;
struct page *page;
- if (addr >= XCHAL_KSEG_BYPASS_VADDR &&
- addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE) {
- addr += XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
- page = virt_to_page(addr);
+ if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+ page = vaddr;
+ } else if (platform_vaddr_uncached(vaddr)) {
+ page = virt_to_page(platform_vaddr_to_cached(vaddr));
} else {
#ifdef CONFIG_MMU
dma_common_free_remap(vaddr, size, VM_MAP);
@@ -192,72 +210,3 @@ static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr,
if (!dma_release_from_contiguous(dev, page, count))
__free_pages(page, get_order(size));
}
-
-static dma_addr_t xtensa_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- dma_addr_t dma_handle = page_to_phys(page) + offset;
-
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- xtensa_sync_single_for_device(dev, dma_handle, size, dir);
-
- return dma_handle;
-}
-
-static void xtensa_unmap_page(struct device *dev, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- xtensa_sync_single_for_cpu(dev, dma_handle, size, dir);
-}
-
-static int xtensa_map_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nents, i) {
- s->dma_address = xtensa_map_page(dev, sg_page(s), s->offset,
- s->length, dir, attrs);
- }
- return nents;
-}
-
-static void xtensa_unmap_sg(struct device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nents, i) {
- xtensa_unmap_page(dev, sg_dma_address(s),
- sg_dma_len(s), dir, attrs);
- }
-}
-
-int xtensa_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return 0;
-}
-
-const struct dma_map_ops xtensa_dma_map_ops = {
- .alloc = xtensa_dma_alloc,
- .free = xtensa_dma_free,
- .map_page = xtensa_map_page,
- .unmap_page = xtensa_unmap_page,
- .map_sg = xtensa_map_sg,
- .unmap_sg = xtensa_unmap_sg,
- .sync_single_for_cpu = xtensa_sync_single_for_cpu,
- .sync_single_for_device = xtensa_sync_single_for_device,
- .sync_sg_for_cpu = xtensa_sync_sg_for_cpu,
- .sync_sg_for_device = xtensa_sync_sg_for_device,
- .mapping_error = xtensa_dma_mapping_error,
-};
-EXPORT_SYMBOL(xtensa_dma_map_ops);
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 686a27444bba..351283b60df6 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -47,8 +47,6 @@
#include <asm/smp.h>
#include <asm/sysmem.h>
-#include <platform/hardware.h>
-
#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
struct screen_info screen_info = {
.orig_x = 0,
@@ -81,6 +79,7 @@ static char __initdata command_line[COMMAND_LINE_SIZE];
static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
#endif
+#ifdef CONFIG_PARSE_BOOTPARAM
/*
* Boot parameter parsing.
*
@@ -178,6 +177,13 @@ static int __init parse_bootparam(const bp_tag_t* tag)
return 0;
}
+#else
+static int __init parse_bootparam(const bp_tag_t *tag)
+{
+ pr_info("Ignoring boot parameters at %p\n", tag);
+ return 0;
+}
+#endif
#ifdef CONFIG_OF
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 70b731edc7b8..a1c3edb8ad56 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -20,7 +20,7 @@
#include <asm/vectors.h>
#include <variant/core.h>
-#include <platform/hardware.h>
+
OUTPUT_ARCH(xtensa)
ENTRY(_start)
diff --git a/arch/xtensa/platforms/iss/include/platform/hardware.h b/arch/xtensa/platforms/iss/include/platform/hardware.h
deleted file mode 100644
index 6930c12adc16..000000000000
--- a/arch/xtensa/platforms/iss/include/platform/hardware.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * include/asm-xtensa/platform-iss/hardware.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 Tensilica Inc.
- */
-
-/*
- * This file contains the default configuration of ISS.
- */
-
-#ifndef _XTENSA_PLATFORM_ISS_HARDWARE_H
-#define _XTENSA_PLATFORM_ISS_HARDWARE_H
-
-/*
- * Memory configuration.
- */
-
-#define PLATFORM_DEFAULT_MEM_START 0x00000000
-#define PLATFORM_DEFAULT_MEM_SIZE 0x08000000
-
-/*
- * Interrupt configuration.
- */
-
-#endif /* _XTENSA_PLATFORM_ISS_HARDWARE_H */
diff --git a/arch/xtensa/platforms/xt2000/include/platform/hardware.h b/arch/xtensa/platforms/xt2000/include/platform/hardware.h
index 886ef156ded3..8e5e0d6a81ec 100644
--- a/arch/xtensa/platforms/xt2000/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xt2000/include/platform/hardware.h
@@ -17,17 +17,6 @@
#include <variant/core.h>
-/*
- * Memory configuration.
- */
-
-#define PLATFORM_DEFAULT_MEM_START 0x00000000
-#define PLATFORM_DEFAULT_MEM_SIZE 0x08000000
-
-/*
- * Number of platform IRQs
- */
-#define PLATFORM_NR_IRQS 3
/*
* On-board components.
*/
diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
index 1fda7e20dfcb..30d9cb6cf168 100644
--- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
@@ -17,15 +17,6 @@
#ifndef __XTENSA_XTAVNET_HARDWARE_H
#define __XTENSA_XTAVNET_HARDWARE_H
-/* Memory configuration. */
-
-#define PLATFORM_DEFAULT_MEM_START __XTENSA_UL(CONFIG_DEFAULT_MEM_START)
-#define PLATFORM_DEFAULT_MEM_SIZE __XTENSA_UL(CONFIG_DEFAULT_MEM_SIZE)
-
-/* Interrupt configuration. */
-
-#define PLATFORM_NR_IRQS 0
-
/* Default assignment of LX60 devices to external interrupts. */
#ifdef CONFIG_XTENSA_MX
diff --git a/arch/xtensa/variants/test_kc705_be/include/variant/core.h b/arch/xtensa/variants/test_kc705_be/include/variant/core.h
new file mode 100644
index 000000000000..a4ebdf7197d7
--- /dev/null
+++ b/arch/xtensa/variants/test_kc705_be/include/variant/core.h
@@ -0,0 +1,575 @@
+/*
+ * xtensa/config/core-isa.h -- HAL definitions that are dependent on Xtensa
+ * processor CORE configuration
+ *
+ * See <xtensa/config/core.h>, which includes this file, for more details.
+ */
+
+/* Xtensa processor core configuration information.
+
+ Copyright (c) 1999-2015 Tensilica Inc.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef _XTENSA_CORE_CONFIGURATION_H
+#define _XTENSA_CORE_CONFIGURATION_H
+
+
+/****************************************************************************
+ Parameters Useful for Any Code, USER or PRIVILEGED
+ ****************************************************************************/
+
+/*
+ * Note: Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is
+ * configured, and a value of 0 otherwise. These macros are always defined.
+ */
+
+
+/*----------------------------------------------------------------------
+ ISA
+ ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_BE 1 /* big-endian byte ordering */
+#define XCHAL_HAVE_WINDOWED 1 /* windowed registers option */
+#define XCHAL_NUM_AREGS 32 /* num of physical addr regs */
+#define XCHAL_NUM_AREGS_LOG2 5 /* log2(XCHAL_NUM_AREGS) */
+#define XCHAL_MAX_INSTRUCTION_SIZE 8 /* max instr bytes (3..8) */
+#define XCHAL_HAVE_DEBUG 1 /* debug option */
+#define XCHAL_HAVE_DENSITY 1 /* 16-bit instructions */
+#define XCHAL_HAVE_LOOPS 1 /* zero-overhead loops */
+#define XCHAL_LOOP_BUFFER_SIZE 0 /* zero-ov. loop instr buffer size */
+#define XCHAL_HAVE_NSA 1 /* NSA/NSAU instructions */
+#define XCHAL_HAVE_MINMAX 1 /* MIN/MAX instructions */
+#define XCHAL_HAVE_SEXT 1 /* SEXT instruction */
+#define XCHAL_HAVE_DEPBITS 0 /* DEPBITS instruction */
+#define XCHAL_HAVE_CLAMPS 1 /* CLAMPS instruction */
+#define XCHAL_HAVE_MUL16 1 /* MUL16S/MUL16U instructions */
+#define XCHAL_HAVE_MUL32 1 /* MULL instruction */
+#define XCHAL_HAVE_MUL32_HIGH 1 /* MULUH/MULSH instructions */
+#define XCHAL_HAVE_DIV32 1 /* QUOS/QUOU/REMS/REMU instructions */
+#define XCHAL_HAVE_L32R 1 /* L32R instruction */
+#define XCHAL_HAVE_ABSOLUTE_LITERALS 0 /* non-PC-rel (extended) L32R */
+#define XCHAL_HAVE_CONST16 0 /* CONST16 instruction */
+#define XCHAL_HAVE_ADDX 1 /* ADDX#/SUBX# instructions */
+#define XCHAL_HAVE_WIDE_BRANCHES 0 /* B*.W18 or B*.W15 instr's */
+#define XCHAL_HAVE_PREDICTED_BRANCHES 0 /* B[EQ/EQZ/NE/NEZ]T instr's */
+#define XCHAL_HAVE_CALL4AND12 1 /* (obsolete option) */
+#define XCHAL_HAVE_ABS 1 /* ABS instruction */
+/*#define XCHAL_HAVE_POPC 0*/ /* POPC instruction */
+/*#define XCHAL_HAVE_CRC 0*/ /* CRC instruction */
+#define XCHAL_HAVE_RELEASE_SYNC 1 /* L32AI/S32RI instructions */
+#define XCHAL_HAVE_S32C1I 1 /* S32C1I instruction */
+#define XCHAL_HAVE_SPECULATION 0 /* speculation */
+#define XCHAL_HAVE_FULL_RESET 1 /* all regs/state reset */
+#define XCHAL_NUM_CONTEXTS 1 /* */
+#define XCHAL_NUM_MISC_REGS 2 /* num of scratch regs (0..4) */
+#define XCHAL_HAVE_TAP_MASTER 0 /* JTAG TAP control instr's */
+#define XCHAL_HAVE_PRID 1 /* processor ID register */
+#define XCHAL_HAVE_EXTERN_REGS 1 /* WER/RER instructions */
+#define XCHAL_HAVE_MX 0 /* MX core (Tensilica internal) */
+#define XCHAL_HAVE_MP_INTERRUPTS 0 /* interrupt distributor port */
+#define XCHAL_HAVE_MP_RUNSTALL 0 /* core RunStall control port */
+#define XCHAL_HAVE_PSO 0 /* Power Shut-Off */
+#define XCHAL_HAVE_PSO_CDM 0 /* core/debug/mem pwr domains */
+#define XCHAL_HAVE_PSO_FULL_RETENTION 0 /* all regs preserved on PSO */
+#define XCHAL_HAVE_THREADPTR 1 /* THREADPTR register */
+#define XCHAL_HAVE_BOOLEANS 1 /* boolean registers */
+#define XCHAL_HAVE_CP 1 /* CPENABLE reg (coprocessor) */
+#define XCHAL_CP_MAXCFG 8 /* max allowed cp id plus one */
+#define XCHAL_HAVE_MAC16 1 /* MAC16 package */
+
+#define XCHAL_HAVE_FUSION 0 /* Fusion*/
+#define XCHAL_HAVE_FUSION_FP 0 /* Fusion FP option */
+#define XCHAL_HAVE_FUSION_LOW_POWER 0 /* Fusion Low Power option */
+#define XCHAL_HAVE_FUSION_AES 0 /* Fusion BLE/Wifi AES-128 CCM option */
+#define XCHAL_HAVE_FUSION_CONVENC 0 /* Fusion Conv Encode option */
+#define XCHAL_HAVE_FUSION_LFSR_CRC 0 /* Fusion LFSR-CRC option */
+#define XCHAL_HAVE_FUSION_BITOPS 0 /* Fusion Bit Operations Support option */
+#define XCHAL_HAVE_FUSION_AVS 0 /* Fusion AVS option */
+#define XCHAL_HAVE_FUSION_16BIT_BASEBAND 0 /* Fusion 16-bit Baseband option */
+#define XCHAL_HAVE_HIFIPRO 0 /* HiFiPro Audio Engine pkg */
+#define XCHAL_HAVE_HIFI4 0 /* HiFi4 Audio Engine pkg */
+#define XCHAL_HAVE_HIFI4_VFPU 0 /* HiFi4 Audio Engine VFPU option */
+#define XCHAL_HAVE_HIFI3 0 /* HiFi3 Audio Engine pkg */
+#define XCHAL_HAVE_HIFI3_VFPU 0 /* HiFi3 Audio Engine VFPU option */
+#define XCHAL_HAVE_HIFI2 1 /* HiFi2 Audio Engine pkg */
+#define XCHAL_HAVE_HIFI2_MUL32X24 1 /* HiFi2 and 32x24 MACs */
+#define XCHAL_HAVE_HIFI2EP 1 /* HiFi2EP */
+#define XCHAL_HAVE_HIFI_MINI 0
+
+
+#define XCHAL_HAVE_VECTORFPU2005 0 /* vector or user floating-point pkg */
+#define XCHAL_HAVE_USER_DPFPU 0 /* user DP floating-point pkg */
+#define XCHAL_HAVE_USER_SPFPU 0 /* user DP floating-point pkg */
+#define XCHAL_HAVE_FP 0 /* single prec floating point */
+#define XCHAL_HAVE_FP_DIV 0 /* FP with DIV instructions */
+#define XCHAL_HAVE_FP_RECIP 0 /* FP with RECIP instructions */
+#define XCHAL_HAVE_FP_SQRT 0 /* FP with SQRT instructions */
+#define XCHAL_HAVE_FP_RSQRT 0 /* FP with RSQRT instructions */
+#define XCHAL_HAVE_DFP 0 /* double precision FP pkg */
+#define XCHAL_HAVE_DFP_DIV 0 /* DFP with DIV instructions */
+#define XCHAL_HAVE_DFP_RECIP 0 /* DFP with RECIP instructions*/
+#define XCHAL_HAVE_DFP_SQRT 0 /* DFP with SQRT instructions */
+#define XCHAL_HAVE_DFP_RSQRT 0 /* DFP with RSQRT instructions*/
+#define XCHAL_HAVE_DFP_ACCEL 0 /* double precision FP acceleration pkg */
+#define XCHAL_HAVE_DFP_accel XCHAL_HAVE_DFP_ACCEL /* for backward compatibility */
+
+#define XCHAL_HAVE_DFPU_SINGLE_ONLY 0 /* DFPU Coprocessor, single precision only */
+#define XCHAL_HAVE_DFPU_SINGLE_DOUBLE 0 /* DFPU Coprocessor, single and double precision */
+#define XCHAL_HAVE_VECTRA1 0 /* Vectra I pkg */
+#define XCHAL_HAVE_VECTRALX 0 /* Vectra LX pkg */
+#define XCHAL_HAVE_PDX4 0 /* PDX4 */
+#define XCHAL_HAVE_CONNXD2 0 /* ConnX D2 pkg */
+#define XCHAL_HAVE_CONNXD2_DUALLSFLIX 0 /* ConnX D2 & Dual LoadStore Flix */
+#define XCHAL_HAVE_BBE16 0 /* ConnX BBE16 pkg */
+#define XCHAL_HAVE_BBE16_RSQRT 0 /* BBE16 & vector recip sqrt */
+#define XCHAL_HAVE_BBE16_VECDIV 0 /* BBE16 & vector divide */
+#define XCHAL_HAVE_BBE16_DESPREAD 0 /* BBE16 & despread */
+#define XCHAL_HAVE_BBENEP 0 /* ConnX BBENEP pkgs */
+#define XCHAL_HAVE_BSP3 0 /* ConnX BSP3 pkg */
+#define XCHAL_HAVE_BSP3_TRANSPOSE 0 /* BSP3 & transpose32x32 */
+#define XCHAL_HAVE_SSP16 0 /* ConnX SSP16 pkg */
+#define XCHAL_HAVE_SSP16_VITERBI 0 /* SSP16 & viterbi */
+#define XCHAL_HAVE_TURBO16 0 /* ConnX Turbo16 pkg */
+#define XCHAL_HAVE_BBP16 0 /* ConnX BBP16 pkg */
+#define XCHAL_HAVE_FLIX3 0 /* basic 3-way FLIX option */
+#define XCHAL_HAVE_GRIVPEP 0 /* GRIVPEP is General Release of IVPEP */
+#define XCHAL_HAVE_GRIVPEP_HISTOGRAM 0 /* Histogram option on GRIVPEP */
+
+
+/*----------------------------------------------------------------------
+ MISC
+ ----------------------------------------------------------------------*/
+
+#define XCHAL_NUM_LOADSTORE_UNITS 1 /* load/store units */
+#define XCHAL_NUM_WRITEBUFFER_ENTRIES 8 /* size of write buffer */
+#define XCHAL_INST_FETCH_WIDTH 8 /* instr-fetch width in bytes */
+#define XCHAL_DATA_WIDTH 8 /* data width in bytes */
+#define XCHAL_DATA_PIPE_DELAY 1 /* d-side pipeline delay
+ (1 = 5-stage, 2 = 7-stage) */
+#define XCHAL_CLOCK_GATING_GLOBAL 0 /* global clock gating */
+#define XCHAL_CLOCK_GATING_FUNCUNIT 0 /* funct. unit clock gating */
+/* In T1050, applies to selected core load and store instructions (see ISA): */
+#define XCHAL_UNALIGNED_LOAD_EXCEPTION 1 /* unaligned loads cause exc. */
+#define XCHAL_UNALIGNED_STORE_EXCEPTION 1 /* unaligned stores cause exc.*/
+#define XCHAL_UNALIGNED_LOAD_HW 0 /* unaligned loads work in hw */
+#define XCHAL_UNALIGNED_STORE_HW 0 /* unaligned stores work in hw*/
+
+#define XCHAL_SW_VERSION 1100002 /* sw version of this header */
+
+#define XCHAL_CORE_ID "test_kc705_be" /* alphanum core name
+ (CoreID) set in the Xtensa
+ Processor Generator */
+
+#define XCHAL_BUILD_UNIQUE_ID 0x00058D8C /* 22-bit sw build ID */
+
+/*
+ * These definitions describe the hardware targeted by this software.
+ */
+#define XCHAL_HW_CONFIGID0 0xC1B3FFFF /* ConfigID hi 32 bits*/
+#define XCHAL_HW_CONFIGID1 0x1C858D8C /* ConfigID lo 32 bits*/
+#define XCHAL_HW_VERSION_NAME "LX6.0.2" /* full version name */
+#define XCHAL_HW_VERSION_MAJOR 2600 /* major ver# of targeted hw */
+#define XCHAL_HW_VERSION_MINOR 2 /* minor ver# of targeted hw */
+#define XCHAL_HW_VERSION 260002 /* major*100+minor */
+#define XCHAL_HW_REL_LX6 1
+#define XCHAL_HW_REL_LX6_0 1
+#define XCHAL_HW_REL_LX6_0_2 1
+#define XCHAL_HW_CONFIGID_RELIABLE 1
+/* If software targets a *range* of hardware versions, these are the bounds: */
+#define XCHAL_HW_MIN_VERSION_MAJOR 2600 /* major v of earliest tgt hw */
+#define XCHAL_HW_MIN_VERSION_MINOR 2 /* minor v of earliest tgt hw */
+#define XCHAL_HW_MIN_VERSION 260002 /* earliest targeted hw */
+#define XCHAL_HW_MAX_VERSION_MAJOR 2600 /* major v of latest tgt hw */
+#define XCHAL_HW_MAX_VERSION_MINOR 2 /* minor v of latest tgt hw */
+#define XCHAL_HW_MAX_VERSION 260002 /* latest targeted hw */
+
+
+/*----------------------------------------------------------------------
+ CACHE
+ ----------------------------------------------------------------------*/
+
+#define XCHAL_ICACHE_LINESIZE 32 /* I-cache line size in bytes */
+#define XCHAL_DCACHE_LINESIZE 32 /* D-cache line size in bytes */
+#define XCHAL_ICACHE_LINEWIDTH 5 /* log2(I line size in bytes) */
+#define XCHAL_DCACHE_LINEWIDTH 5 /* log2(D line size in bytes) */
+
+#define XCHAL_ICACHE_SIZE 16384 /* I-cache size in bytes or 0 */
+#define XCHAL_DCACHE_SIZE 16384 /* D-cache size in bytes or 0 */
+
+#define XCHAL_DCACHE_IS_WRITEBACK 1 /* writeback feature */
+#define XCHAL_DCACHE_IS_COHERENT 0 /* MP coherence feature */
+
+#define XCHAL_HAVE_PREFETCH 1 /* PREFCTL register */
+#define XCHAL_HAVE_PREFETCH_L1 0 /* prefetch to L1 dcache */
+#define XCHAL_PREFETCH_CASTOUT_LINES 1 /* dcache pref. castout bufsz */
+#define XCHAL_PREFETCH_ENTRIES 8 /* cache prefetch entries */
+#define XCHAL_PREFETCH_BLOCK_ENTRIES 0 /* prefetch block streams */
+#define XCHAL_HAVE_CACHE_BLOCKOPS 0 /* block prefetch for caches */
+#define XCHAL_HAVE_ICACHE_TEST 1 /* Icache test instructions */
+#define XCHAL_HAVE_DCACHE_TEST 1 /* Dcache test instructions */
+#define XCHAL_HAVE_ICACHE_DYN_WAYS 0 /* Icache dynamic way support */
+#define XCHAL_HAVE_DCACHE_DYN_WAYS 0 /* Dcache dynamic way support */
+
+
+
+
+/****************************************************************************
+ Parameters Useful for PRIVILEGED (Supervisory or Non-Virtualized) Code
+ ****************************************************************************/
+
+
+#ifndef XTENSA_HAL_NON_PRIVILEGED_ONLY
+
+/*----------------------------------------------------------------------
+ CACHE
+ ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_PIF 1 /* any outbound PIF present */
+
+/* If present, cache size in bytes == (ways * 2^(linewidth + setwidth)). */
+
+/* Number of cache sets in log2(lines per way): */
+#define XCHAL_ICACHE_SETWIDTH 7
+#define XCHAL_DCACHE_SETWIDTH 7
+
+/* Cache set associativity (number of ways): */
+#define XCHAL_ICACHE_WAYS 4
+#define XCHAL_DCACHE_WAYS 4
+
+/* Cache features: */
+#define XCHAL_ICACHE_LINE_LOCKABLE 1
+#define XCHAL_DCACHE_LINE_LOCKABLE 1
+#define XCHAL_ICACHE_ECC_PARITY 0
+#define XCHAL_DCACHE_ECC_PARITY 0
+
+/* Cache access size in bytes (affects operation of SICW instruction): */
+#define XCHAL_ICACHE_ACCESS_SIZE 8
+#define XCHAL_DCACHE_ACCESS_SIZE 8
+
+#define XCHAL_DCACHE_BANKS 1 /* number of banks */
+
+/* Number of encoded cache attr bits (see <xtensa/hal.h> for decoded bits): */
+#define XCHAL_CA_BITS 4
+
+/* Whether MEMCTL register has anything useful */
+#define XCHAL_USE_MEMCTL (((XCHAL_LOOP_BUFFER_SIZE > 0) || \
+ XCHAL_DCACHE_IS_COHERENT || \
+ XCHAL_HAVE_ICACHE_DYN_WAYS || \
+ XCHAL_HAVE_DCACHE_DYN_WAYS) && \
+ (XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RE_2012_0))
+
+
+/*----------------------------------------------------------------------
+ INTERNAL I/D RAM/ROMs and XLMI
+ ----------------------------------------------------------------------*/
+
+#define XCHAL_NUM_INSTROM 0 /* number of core instr. ROMs */
+#define XCHAL_NUM_INSTRAM 0 /* number of core instr. RAMs */
+#define XCHAL_NUM_DATAROM 0 /* number of core data ROMs */
+#define XCHAL_NUM_DATARAM 0 /* number of core data RAMs */
+#define XCHAL_NUM_URAM 0 /* number of core unified RAMs*/
+#define XCHAL_NUM_XLMI 0 /* number of core XLMI ports */
+
+#define XCHAL_HAVE_IMEM_LOADSTORE 1 /* can load/store to IROM/IRAM*/
+
+
+/*----------------------------------------------------------------------
+ INTERRUPTS and TIMERS
+ ----------------------------------------------------------------------*/
+
+#define XCHAL_HAVE_INTERRUPTS 1 /* interrupt option */
+#define XCHAL_HAVE_HIGHPRI_INTERRUPTS 1 /* med/high-pri. interrupts */
+#define XCHAL_HAVE_NMI 1 /* non-maskable interrupt */
+#define XCHAL_HAVE_CCOUNT 1 /* CCOUNT reg. (timer option) */
+#define XCHAL_NUM_TIMERS 3 /* number of CCOMPAREn regs */
+#define XCHAL_NUM_INTERRUPTS 22 /* number of interrupts */
+#define XCHAL_NUM_INTERRUPTS_LOG2 5 /* ceil(log2(NUM_INTERRUPTS)) */
+#define XCHAL_NUM_EXTINTERRUPTS 16 /* num of external interrupts */
+#define XCHAL_NUM_INTLEVELS 6 /* number of interrupt levels
+ (not including level zero) */
+#define XCHAL_EXCM_LEVEL 4 /* level masked by PS.EXCM */
+ /* (always 1 in XEA1; levels 2 .. EXCM_LEVEL are "medium priority") */
+
+/* Masks of interrupts at each interrupt level: */
+#define XCHAL_INTLEVEL1_MASK 0x001F00BF
+#define XCHAL_INTLEVEL2_MASK 0x00000140
+#define XCHAL_INTLEVEL3_MASK 0x00200E00
+#define XCHAL_INTLEVEL4_MASK 0x00008000
+#define XCHAL_INTLEVEL5_MASK 0x00003000
+#define XCHAL_INTLEVEL6_MASK 0x00000000
+#define XCHAL_INTLEVEL7_MASK 0x00004000
+
+/* Masks of interrupts at each range 1..n of interrupt levels: */
+#define XCHAL_INTLEVEL1_ANDBELOW_MASK 0x001F00BF
+#define XCHAL_INTLEVEL2_ANDBELOW_MASK 0x001F01FF
+#define XCHAL_INTLEVEL3_ANDBELOW_MASK 0x003F0FFF
+#define XCHAL_INTLEVEL4_ANDBELOW_MASK 0x003F8FFF
+#define XCHAL_INTLEVEL5_ANDBELOW_MASK 0x003FBFFF
+#define XCHAL_INTLEVEL6_ANDBELOW_MASK 0x003FBFFF
+#define XCHAL_INTLEVEL7_ANDBELOW_MASK 0x003FFFFF
+
+/* Level of each interrupt: */
+#define XCHAL_INT0_LEVEL 1
+#define XCHAL_INT1_LEVEL 1
+#define XCHAL_INT2_LEVEL 1
+#define XCHAL_INT3_LEVEL 1
+#define XCHAL_INT4_LEVEL 1
+#define XCHAL_INT5_LEVEL 1
+#define XCHAL_INT6_LEVEL 2
+#define XCHAL_INT7_LEVEL 1
+#define XCHAL_INT8_LEVEL 2
+#define XCHAL_INT9_LEVEL 3
+#define XCHAL_INT10_LEVEL 3
+#define XCHAL_INT11_LEVEL 3
+#define XCHAL_INT12_LEVEL 5
+#define XCHAL_INT13_LEVEL 5
+#define XCHAL_INT14_LEVEL 7
+#define XCHAL_INT15_LEVEL 4
+#define XCHAL_INT16_LEVEL 1
+#define XCHAL_INT17_LEVEL 1
+#define XCHAL_INT18_LEVEL 1
+#define XCHAL_INT19_LEVEL 1
+#define XCHAL_INT20_LEVEL 1
+#define XCHAL_INT21_LEVEL 3
+#define XCHAL_DEBUGLEVEL 6 /* debug interrupt level */
+#define XCHAL_HAVE_DEBUG_EXTERN_INT 1 /* OCD external db interrupt */
+#define XCHAL_NMILEVEL 7 /* NMI "level" (for use with
+ EXCSAVE/EPS/EPC_n, RFI n) */
+
+/* Type of each interrupt: */
+#define XCHAL_INT0_TYPE XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT1_TYPE XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT2_TYPE XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT3_TYPE XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT4_TYPE XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT5_TYPE XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT6_TYPE XTHAL_INTTYPE_TIMER
+#define XCHAL_INT7_TYPE XTHAL_INTTYPE_SOFTWARE
+#define XCHAL_INT8_TYPE XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT9_TYPE XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT10_TYPE XTHAL_INTTYPE_TIMER
+#define XCHAL_INT11_TYPE XTHAL_INTTYPE_SOFTWARE
+#define XCHAL_INT12_TYPE XTHAL_INTTYPE_EXTERN_LEVEL
+#define XCHAL_INT13_TYPE XTHAL_INTTYPE_TIMER
+#define XCHAL_INT14_TYPE XTHAL_INTTYPE_NMI
+#define XCHAL_INT15_TYPE XTHAL_INTTYPE_PROFILING
+#define XCHAL_INT16_TYPE XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT17_TYPE XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT18_TYPE XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT19_TYPE XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT20_TYPE XTHAL_INTTYPE_EXTERN_EDGE
+#define XCHAL_INT21_TYPE XTHAL_INTTYPE_EXTERN_EDGE
+
+/* Masks of interrupts for each type of interrupt: */
+#define XCHAL_INTTYPE_MASK_UNCONFIGURED 0xFFC00000
+#define XCHAL_INTTYPE_MASK_SOFTWARE 0x00000880
+#define XCHAL_INTTYPE_MASK_EXTERN_EDGE 0x003F0000
+#define XCHAL_INTTYPE_MASK_EXTERN_LEVEL 0x0000133F
+#define XCHAL_INTTYPE_MASK_TIMER 0x00002440
+#define XCHAL_INTTYPE_MASK_NMI 0x00004000
+#define XCHAL_INTTYPE_MASK_WRITE_ERROR 0x00000000
+#define XCHAL_INTTYPE_MASK_PROFILING 0x00008000
+
+/* Interrupt numbers assigned to specific interrupt sources: */
+#define XCHAL_TIMER0_INTERRUPT 6 /* CCOMPARE0 */
+#define XCHAL_TIMER1_INTERRUPT 10 /* CCOMPARE1 */
+#define XCHAL_TIMER2_INTERRUPT 13 /* CCOMPARE2 */
+#define XCHAL_TIMER3_INTERRUPT XTHAL_TIMER_UNCONFIGURED
+#define XCHAL_NMI_INTERRUPT 14 /* non-maskable interrupt */
+#define XCHAL_PROFILING_INTERRUPT 15 /* profiling interrupt */
+
+/* Interrupt numbers for levels at which only one interrupt is configured: */
+#define XCHAL_INTLEVEL4_NUM 15
+#define XCHAL_INTLEVEL7_NUM 14
+/* (There are many interrupts each at level(s) 1, 2, 3, 5.) */
+
+
+/*
+ * External interrupt mapping.
+ * These macros describe how Xtensa processor interrupt numbers
+ * (as numbered internally, eg. in INTERRUPT and INTENABLE registers)
+ * map to external BInterrupt<n> pins, for those interrupts
+ * configured as external (level-triggered, edge-triggered, or NMI).
+ * See the Xtensa processor databook for more details.
+ */
+
+/* Core interrupt numbers mapped to each EXTERNAL BInterrupt pin number: */
+#define XCHAL_EXTINT0_NUM 0 /* (intlevel 1) */
+#define XCHAL_EXTINT1_NUM 1 /* (intlevel 1) */
+#define XCHAL_EXTINT2_NUM 2 /* (intlevel 1) */
+#define XCHAL_EXTINT3_NUM 3 /* (intlevel 1) */
+#define XCHAL_EXTINT4_NUM 4 /* (intlevel 1) */
+#define XCHAL_EXTINT5_NUM 5 /* (intlevel 1) */
+#define XCHAL_EXTINT6_NUM 8 /* (intlevel 2) */
+#define XCHAL_EXTINT7_NUM 9 /* (intlevel 3) */
+#define XCHAL_EXTINT8_NUM 12 /* (intlevel 5) */
+#define XCHAL_EXTINT9_NUM 14 /* (intlevel 7) */
+#define XCHAL_EXTINT10_NUM 16 /* (intlevel 1) */
+#define XCHAL_EXTINT11_NUM 17 /* (intlevel 1) */
+#define XCHAL_EXTINT12_NUM 18 /* (intlevel 1) */
+#define XCHAL_EXTINT13_NUM 19 /* (intlevel 1) */
+#define XCHAL_EXTINT14_NUM 20 /* (intlevel 1) */
+#define XCHAL_EXTINT15_NUM 21 /* (intlevel 3) */
+/* EXTERNAL BInterrupt pin numbers mapped to each core interrupt number: */
+#define XCHAL_INT0_EXTNUM 0 /* (intlevel 1) */
+#define XCHAL_INT1_EXTNUM 1 /* (intlevel 1) */
+#define XCHAL_INT2_EXTNUM 2 /* (intlevel 1) */
+#define XCHAL_INT3_EXTNUM 3 /* (intlevel 1) */
+#define XCHAL_INT4_EXTNUM 4 /* (intlevel 1) */
+#define XCHAL_INT5_EXTNUM 5 /* (intlevel 1) */
+#define XCHAL_INT8_EXTNUM 6 /* (intlevel 2) */
+#define XCHAL_INT9_EXTNUM 7 /* (intlevel 3) */
+#define XCHAL_INT12_EXTNUM 8 /* (intlevel 5) */
+#define XCHAL_INT14_EXTNUM 9 /* (intlevel 7) */
+#define XCHAL_INT16_EXTNUM 10 /* (intlevel 1) */
+#define XCHAL_INT17_EXTNUM 11 /* (intlevel 1) */
+#define XCHAL_INT18_EXTNUM 12 /* (intlevel 1) */
+#define XCHAL_INT19_EXTNUM 13 /* (intlevel 1) */
+#define XCHAL_INT20_EXTNUM 14 /* (intlevel 1) */
+#define XCHAL_INT21_EXTNUM 15 /* (intlevel 3) */
+
+
+/*----------------------------------------------------------------------
+ EXCEPTIONS and VECTORS
+ ----------------------------------------------------------------------*/
+
+#define XCHAL_XEA_VERSION 2 /* Xtensa Exception Architecture
+ number: 1 == XEA1 (old)
+ 2 == XEA2 (new)
+ 0 == XEAX (extern) or TX */
+#define XCHAL_HAVE_XEA1 0 /* Exception Architecture 1 */
+#define XCHAL_HAVE_XEA2 1 /* Exception Architecture 2 */
+#define XCHAL_HAVE_XEAX 0 /* External Exception Arch. */
+#define XCHAL_HAVE_EXCEPTIONS 1 /* exception option */
+#define XCHAL_HAVE_HALT 0 /* halt architecture option */
+#define XCHAL_HAVE_BOOTLOADER 0 /* boot loader (for TX) */
+#define XCHAL_HAVE_MEM_ECC_PARITY 0 /* local memory ECC/parity */
+#define XCHAL_HAVE_VECTOR_SELECT 1 /* relocatable vectors */
+#define XCHAL_HAVE_VECBASE 1 /* relocatable vectors */
+#define XCHAL_VECBASE_RESET_VADDR 0x00002000 /* VECBASE reset value */
+#define XCHAL_VECBASE_RESET_PADDR 0x00002000
+#define XCHAL_RESET_VECBASE_OVERLAP 0
+
+#define XCHAL_RESET_VECTOR0_VADDR 0xFE000000
+#define XCHAL_RESET_VECTOR0_PADDR 0xFE000000
+#define XCHAL_RESET_VECTOR1_VADDR 0x00001000
+#define XCHAL_RESET_VECTOR1_PADDR 0x00001000
+#define XCHAL_RESET_VECTOR_VADDR 0xFE000000
+#define XCHAL_RESET_VECTOR_PADDR 0xFE000000
+#define XCHAL_USER_VECOFS 0x00000340
+#define XCHAL_USER_VECTOR_VADDR 0x00002340
+#define XCHAL_USER_VECTOR_PADDR 0x00002340
+#define XCHAL_KERNEL_VECOFS 0x00000300
+#define XCHAL_KERNEL_VECTOR_VADDR 0x00002300
+#define XCHAL_KERNEL_VECTOR_PADDR 0x00002300
+#define XCHAL_DOUBLEEXC_VECOFS 0x000003C0
+#define XCHAL_DOUBLEEXC_VECTOR_VADDR 0x000023C0
+#define XCHAL_DOUBLEEXC_VECTOR_PADDR 0x000023C0
+#define XCHAL_WINDOW_OF4_VECOFS 0x00000000
+#define XCHAL_WINDOW_UF4_VECOFS 0x00000040
+#define XCHAL_WINDOW_OF8_VECOFS 0x00000080
+#define XCHAL_WINDOW_UF8_VECOFS 0x000000C0
+#define XCHAL_WINDOW_OF12_VECOFS 0x00000100
+#define XCHAL_WINDOW_UF12_VECOFS 0x00000140
+#define XCHAL_WINDOW_VECTORS_VADDR 0x00002000
+#define XCHAL_WINDOW_VECTORS_PADDR 0x00002000
+#define XCHAL_INTLEVEL2_VECOFS 0x00000180
+#define XCHAL_INTLEVEL2_VECTOR_VADDR 0x00002180
+#define XCHAL_INTLEVEL2_VECTOR_PADDR 0x00002180
+#define XCHAL_INTLEVEL3_VECOFS 0x000001C0
+#define XCHAL_INTLEVEL3_VECTOR_VADDR 0x000021C0
+#define XCHAL_INTLEVEL3_VECTOR_PADDR 0x000021C0
+#define XCHAL_INTLEVEL4_VECOFS 0x00000200
+#define XCHAL_INTLEVEL4_VECTOR_VADDR 0x00002200
+#define XCHAL_INTLEVEL4_VECTOR_PADDR 0x00002200
+#define XCHAL_INTLEVEL5_VECOFS 0x00000240
+#define XCHAL_INTLEVEL5_VECTOR_VADDR 0x00002240
+#define XCHAL_INTLEVEL5_VECTOR_PADDR 0x00002240
+#define XCHAL_INTLEVEL6_VECOFS 0x00000280
+#define XCHAL_INTLEVEL6_VECTOR_VADDR 0x00002280
+#define XCHAL_INTLEVEL6_VECTOR_PADDR 0x00002280
+#define XCHAL_DEBUG_VECOFS XCHAL_INTLEVEL6_VECOFS
+#define XCHAL_DEBUG_VECTOR_VADDR XCHAL_INTLEVEL6_VECTOR_VADDR
+#define XCHAL_DEBUG_VECTOR_PADDR XCHAL_INTLEVEL6_VECTOR_PADDR
+#define XCHAL_NMI_VECOFS 0x000002C0
+#define XCHAL_NMI_VECTOR_VADDR 0x000022C0
+#define XCHAL_NMI_VECTOR_PADDR 0x000022C0
+#define XCHAL_INTLEVEL7_VECOFS XCHAL_NMI_VECOFS
+#define XCHAL_INTLEVEL7_VECTOR_VADDR XCHAL_NMI_VECTOR_VADDR
+#define XCHAL_INTLEVEL7_VECTOR_PADDR XCHAL_NMI_VECTOR_PADDR
+
+
+/*----------------------------------------------------------------------
+ DEBUG MODULE
+ ----------------------------------------------------------------------*/
+
+/* Misc */
+#define XCHAL_HAVE_DEBUG_ERI 1 /* ERI to debug module */
+#define XCHAL_HAVE_DEBUG_APB 0 /* APB to debug module */
+#define XCHAL_HAVE_DEBUG_JTAG 1 /* JTAG to debug module */
+
+/* On-Chip Debug (OCD) */
+#define XCHAL_HAVE_OCD 1 /* OnChipDebug option */
+#define XCHAL_NUM_IBREAK 2 /* number of IBREAKn regs */
+#define XCHAL_NUM_DBREAK 2 /* number of DBREAKn regs */
+#define XCHAL_HAVE_OCD_DIR_ARRAY 0 /* faster OCD option (to LX4) */
+#define XCHAL_HAVE_OCD_LS32DDR 1 /* L32DDR/S32DDR (faster OCD) */
+
+/* TRAX (in core) */
+#define XCHAL_HAVE_TRAX 1 /* TRAX in debug module */
+#define XCHAL_TRAX_MEM_SIZE 262144 /* TRAX memory size in bytes */
+#define XCHAL_TRAX_MEM_SHAREABLE 1 /* start/end regs; ready sig. */
+#define XCHAL_TRAX_ATB_WIDTH 0 /* ATB width (bits), 0=no ATB */
+#define XCHAL_TRAX_TIME_WIDTH 0 /* timestamp bitwidth, 0=none */
+
+/* Perf counters */
+#define XCHAL_NUM_PERF_COUNTERS 8 /* performance counters */
+
+
+/*----------------------------------------------------------------------
+ MMU
+ ----------------------------------------------------------------------*/
+
+/* See core-matmap.h header file for more details. */
+
+#define XCHAL_HAVE_TLBS 1 /* inverse of HAVE_CACHEATTR */
+#define XCHAL_HAVE_SPANNING_WAY 1 /* one way maps I+D 4GB vaddr */
+#define XCHAL_SPANNING_WAY 6 /* TLB spanning way number */
+#define XCHAL_HAVE_IDENTITY_MAP 0 /* vaddr == paddr always */
+#define XCHAL_HAVE_CACHEATTR 0 /* CACHEATTR register present */
+#define XCHAL_HAVE_MIMIC_CACHEATTR 0 /* region protection */
+#define XCHAL_HAVE_XLT_CACHEATTR 0 /* region prot. w/translation */
+#define XCHAL_HAVE_PTP_MMU 1 /* full MMU (with page table
+ [autorefill] and protection)
+ usable for an MMU-based OS */
+/* If none of the above last 4 are set, it's a custom TLB configuration. */
+#define XCHAL_ITLB_ARF_ENTRIES_LOG2 2 /* log2(autorefill way size) */
+#define XCHAL_DTLB_ARF_ENTRIES_LOG2 2 /* log2(autorefill way size) */
+
+#define XCHAL_MMU_ASID_BITS 8 /* number of bits in ASIDs */
+#define XCHAL_MMU_RINGS 4 /* number of rings (1..4) */
+#define XCHAL_MMU_RING_BITS 2 /* num of bits in RING field */
+
+#endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */
+
+
+#endif /* _XTENSA_CORE_CONFIGURATION_H */
+
diff --git a/arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h b/arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h
new file mode 100644
index 000000000000..b87fe1a5177b
--- /dev/null
+++ b/arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h
@@ -0,0 +1,308 @@
+/*
+ * tie-asm.h -- compile-time HAL assembler definitions dependent on CORE & TIE
+ *
+ * NOTE: This header file is not meant to be included directly.
+ */
+
+/* This header file contains assembly-language definitions (assembly
+ macros, etc.) for this specific Xtensa processor's TIE extensions
+ and options. It is customized to this Xtensa processor configuration.
+
+ Copyright (c) 1999-2015 Cadence Design Systems Inc.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef _XTENSA_CORE_TIE_ASM_H
+#define _XTENSA_CORE_TIE_ASM_H
+
+/* Selection parameter values for save-area save/restore macros: */
+/* Option vs. TIE: */
+#define XTHAL_SAS_TIE 0x0001 /* custom extension or coprocessor */
+#define XTHAL_SAS_OPT 0x0002 /* optional (and not a coprocessor) */
+#define XTHAL_SAS_ANYOT 0x0003 /* both of the above */
+/* Whether used automatically by compiler: */
+#define XTHAL_SAS_NOCC 0x0004 /* not used by compiler w/o special opts/code */
+#define XTHAL_SAS_CC 0x0008 /* used by compiler without special opts/code */
+#define XTHAL_SAS_ANYCC 0x000C /* both of the above */
+/* ABI handling across function calls: */
+#define XTHAL_SAS_CALR 0x0010 /* caller-saved */
+#define XTHAL_SAS_CALE 0x0020 /* callee-saved */
+#define XTHAL_SAS_GLOB 0x0040 /* global across function calls (in thread) */
+#define XTHAL_SAS_ANYABI 0x0070 /* all of the above three */
+/* Misc */
+#define XTHAL_SAS_ALL 0xFFFF /* include all default NCP contents */
+#define XTHAL_SAS3(optie,ccuse,abi) ( ((optie) & XTHAL_SAS_ANYOT) \
+ | ((ccuse) & XTHAL_SAS_ANYCC) \
+ | ((abi) & XTHAL_SAS_ANYABI) )
+
+
+ /*
+ * Macro to store all non-coprocessor (extra) custom TIE and optional state
+ * (not including zero-overhead loop registers).
+ * Required parameters:
+ * ptr Save area pointer address register (clobbered)
+ * (register must contain a 4 byte aligned address).
+ * at1..at4 Four temporary address registers (first XCHAL_NCP_NUM_ATMPS
+ * registers are clobbered, the remaining are unused).
+ * Optional parameters:
+ * continue If macro invoked as part of a larger store sequence, set to 1
+ * if this is not the first in the sequence. Defaults to 0.
+ * ofs Offset from start of larger sequence (from value of first ptr
+ * in sequence) at which to store. Defaults to next available space
+ * (or 0 if <continue> is 0).
+ * select Select what category(ies) of registers to store, as a bitmask
+ * (see XTHAL_SAS_xxx constants). Defaults to all registers.
+ * alloc Select what category(ies) of registers to allocate; if any
+ * category is selected here that is not in <select>, space for
+ * the corresponding registers is skipped without doing any store.
+ */
+ .macro xchal_ncp_store ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0
+ xchal_sa_start \continue, \ofs
+ // Optional global registers used by default by the compiler:
+ .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\select)
+ xchal_sa_align \ptr, 0, 1020, 4, 4
+ rur.THREADPTR \at1 // threadptr option
+ s32i \at1, \ptr, .Lxchal_ofs_+0
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 4
+ .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\alloc)) == 0
+ xchal_sa_align \ptr, 0, 1020, 4, 4
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 4
+ .endif
+ // Optional caller-saved registers used by default by the compiler:
+ .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\select)
+ xchal_sa_align \ptr, 0, 1016, 4, 4
+ rsr.ACCLO \at1 // MAC16 option
+ s32i \at1, \ptr, .Lxchal_ofs_+0
+ rsr.ACCHI \at1 // MAC16 option
+ s32i \at1, \ptr, .Lxchal_ofs_+4
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 8
+ .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+ xchal_sa_align \ptr, 0, 1016, 4, 4
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 8
+ .endif
+ // Optional caller-saved registers not used by default by the compiler:
+ .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select)
+ xchal_sa_align \ptr, 0, 1000, 4, 4
+ rsr.BR \at1 // boolean option
+ s32i \at1, \ptr, .Lxchal_ofs_+0
+ rsr.SCOMPARE1 \at1 // conditional store option
+ s32i \at1, \ptr, .Lxchal_ofs_+4
+ rsr.M0 \at1 // MAC16 option
+ s32i \at1, \ptr, .Lxchal_ofs_+8
+ rsr.M1 \at1 // MAC16 option
+ s32i \at1, \ptr, .Lxchal_ofs_+12
+ rsr.M2 \at1 // MAC16 option
+ s32i \at1, \ptr, .Lxchal_ofs_+16
+ rsr.M3 \at1 // MAC16 option
+ s32i \at1, \ptr, .Lxchal_ofs_+20
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 24
+ .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+ xchal_sa_align \ptr, 0, 1000, 4, 4
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 24
+ .endif
+ .endm // xchal_ncp_store
+
+ /*
+ * Macro to load all non-coprocessor (extra) custom TIE and optional state
+ * (not including zero-overhead loop registers).
+ * Required parameters:
+ * ptr Save area pointer address register (clobbered)
+ * (register must contain a 4 byte aligned address).
+ * at1..at4 Four temporary address registers (first XCHAL_NCP_NUM_ATMPS
+ * registers are clobbered, the remaining are unused).
+ * Optional parameters:
+ * continue If macro invoked as part of a larger load sequence, set to 1
+ * if this is not the first in the sequence. Defaults to 0.
+ * ofs Offset from start of larger sequence (from value of first ptr
+ * in sequence) at which to load. Defaults to next available space
+ * (or 0 if <continue> is 0).
+ * select Select what category(ies) of registers to load, as a bitmask
+ * (see XTHAL_SAS_xxx constants). Defaults to all registers.
+ * alloc Select what category(ies) of registers to allocate; if any
+ * category is selected here that is not in <select>, space for
+ * the corresponding registers is skipped without doing any load.
+ */
+ .macro xchal_ncp_load ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0
+ xchal_sa_start \continue, \ofs
+ // Optional global registers used by default by the compiler:
+ .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\select)
+ xchal_sa_align \ptr, 0, 1020, 4, 4
+ l32i \at1, \ptr, .Lxchal_ofs_+0
+ wur.THREADPTR \at1 // threadptr option
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 4
+ .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\alloc)) == 0
+ xchal_sa_align \ptr, 0, 1020, 4, 4
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 4
+ .endif
+ // Optional caller-saved registers used by default by the compiler:
+ .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\select)
+ xchal_sa_align \ptr, 0, 1016, 4, 4
+ l32i \at1, \ptr, .Lxchal_ofs_+0
+ wsr.ACCLO \at1 // MAC16 option
+ l32i \at1, \ptr, .Lxchal_ofs_+4
+ wsr.ACCHI \at1 // MAC16 option
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 8
+ .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+ xchal_sa_align \ptr, 0, 1016, 4, 4
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 8
+ .endif
+ // Optional caller-saved registers not used by default by the compiler:
+ .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select)
+ xchal_sa_align \ptr, 0, 1000, 4, 4
+ l32i \at1, \ptr, .Lxchal_ofs_+0
+ wsr.BR \at1 // boolean option
+ l32i \at1, \ptr, .Lxchal_ofs_+4
+ wsr.SCOMPARE1 \at1 // conditional store option
+ l32i \at1, \ptr, .Lxchal_ofs_+8
+ wsr.M0 \at1 // MAC16 option
+ l32i \at1, \ptr, .Lxchal_ofs_+12
+ wsr.M1 \at1 // MAC16 option
+ l32i \at1, \ptr, .Lxchal_ofs_+16
+ wsr.M2 \at1 // MAC16 option
+ l32i \at1, \ptr, .Lxchal_ofs_+20
+ wsr.M3 \at1 // MAC16 option
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 24
+ .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+ xchal_sa_align \ptr, 0, 1000, 4, 4
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 24
+ .endif
+ .endm // xchal_ncp_load
+
+
+#define XCHAL_NCP_NUM_ATMPS 1
+
+ /*
+ * Macro to store the state of TIE coprocessor AudioEngineLX.
+ * Required parameters:
+ * ptr Save area pointer address register (clobbered)
+ * (register must contain a 8 byte aligned address).
+ * at1..at4 Four temporary address registers (first XCHAL_CP1_NUM_ATMPS
+ * registers are clobbered, the remaining are unused).
+ * Optional parameters are the same as for xchal_ncp_store.
+ */
+#define xchal_cp_AudioEngineLX_store xchal_cp1_store
+ .macro xchal_cp1_store ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0
+ xchal_sa_start \continue, \ofs
+ // Custom caller-saved registers not used by default by the compiler:
+ .ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select)
+ xchal_sa_align \ptr, 0, 0, 8, 8
+ rur.AE_OVF_SAR \at1 // ureg 240
+ s32i \at1, \ptr, .Lxchal_ofs_+0
+ rur.AE_BITHEAD \at1 // ureg 241
+ s32i \at1, \ptr, .Lxchal_ofs_+4
+ rur.AE_TS_FTS_BU_BP \at1 // ureg 242
+ s32i \at1, \ptr, .Lxchal_ofs_+8
+ rur.AE_SD_NO \at1 // ureg 243
+ s32i \at1, \ptr, .Lxchal_ofs_+12
+ rur.AE_CBEGIN0 \at1 // ureg 246
+ s32i \at1, \ptr, .Lxchal_ofs_+16
+ rur.AE_CEND0 \at1 // ureg 247
+ s32i \at1, \ptr, .Lxchal_ofs_+20
+ ae_sp24x2s.i aep0, \ptr, .Lxchal_ofs_+24
+ ae_sp24x2s.i aep1, \ptr, .Lxchal_ofs_+32
+ ae_sp24x2s.i aep2, \ptr, .Lxchal_ofs_+40
+ ae_sp24x2s.i aep3, \ptr, .Lxchal_ofs_+48
+ ae_sp24x2s.i aep4, \ptr, .Lxchal_ofs_+56
+ addi \ptr, \ptr, 64
+ ae_sp24x2s.i aep5, \ptr, .Lxchal_ofs_+0
+ ae_sp24x2s.i aep6, \ptr, .Lxchal_ofs_+8
+ ae_sp24x2s.i aep7, \ptr, .Lxchal_ofs_+16
+ ae_sq56s.i aeq0, \ptr, .Lxchal_ofs_+24
+ ae_sq56s.i aeq1, \ptr, .Lxchal_ofs_+32
+ ae_sq56s.i aeq2, \ptr, .Lxchal_ofs_+40
+ ae_sq56s.i aeq3, \ptr, .Lxchal_ofs_+48
+ .set .Lxchal_pofs_, .Lxchal_pofs_ + 64
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 56
+ .elseif ((XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+ xchal_sa_align \ptr, 0, 0, 8, 8
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 120
+ .endif
+ .endm // xchal_cp1_store
+
+ /*
+ * Macro to load the state of TIE coprocessor AudioEngineLX.
+ * Required parameters:
+ * ptr Save area pointer address register (clobbered)
+ * (register must contain a 8 byte aligned address).
+ * at1..at4 Four temporary address registers (first XCHAL_CP1_NUM_ATMPS
+ * registers are clobbered, the remaining are unused).
+ * Optional parameters are the same as for xchal_ncp_load.
+ */
+#define xchal_cp_AudioEngineLX_load xchal_cp1_load
+ .macro xchal_cp1_load ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0
+ xchal_sa_start \continue, \ofs
+ // Custom caller-saved registers not used by default by the compiler:
+ .ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select)
+ xchal_sa_align \ptr, 0, 0, 8, 8
+ l32i \at1, \ptr, .Lxchal_ofs_+0
+ wur.AE_OVF_SAR \at1 // ureg 240
+ l32i \at1, \ptr, .Lxchal_ofs_+4
+ wur.AE_BITHEAD \at1 // ureg 241
+ l32i \at1, \ptr, .Lxchal_ofs_+8
+ wur.AE_TS_FTS_BU_BP \at1 // ureg 242
+ l32i \at1, \ptr, .Lxchal_ofs_+12
+ wur.AE_SD_NO \at1 // ureg 243
+ l32i \at1, \ptr, .Lxchal_ofs_+16
+ wur.AE_CBEGIN0 \at1 // ureg 246
+ l32i \at1, \ptr, .Lxchal_ofs_+20
+ wur.AE_CEND0 \at1 // ureg 247
+ ae_lp24x2.i aep0, \ptr, .Lxchal_ofs_+24
+ ae_lp24x2.i aep1, \ptr, .Lxchal_ofs_+32
+ ae_lp24x2.i aep2, \ptr, .Lxchal_ofs_+40
+ ae_lp24x2.i aep3, \ptr, .Lxchal_ofs_+48
+ ae_lp24x2.i aep4, \ptr, .Lxchal_ofs_+56
+ addi \ptr, \ptr, 64
+ ae_lp24x2.i aep5, \ptr, .Lxchal_ofs_+0
+ ae_lp24x2.i aep6, \ptr, .Lxchal_ofs_+8
+ ae_lp24x2.i aep7, \ptr, .Lxchal_ofs_+16
+ addi \ptr, \ptr, 24
+ ae_lq56.i aeq0, \ptr, .Lxchal_ofs_+0
+ ae_lq56.i aeq1, \ptr, .Lxchal_ofs_+8
+ ae_lq56.i aeq2, \ptr, .Lxchal_ofs_+16
+ ae_lq56.i aeq3, \ptr, .Lxchal_ofs_+24
+ .set .Lxchal_pofs_, .Lxchal_pofs_ + 88
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 32
+ .elseif ((XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0
+ xchal_sa_align \ptr, 0, 0, 8, 8
+ .set .Lxchal_ofs_, .Lxchal_ofs_ + 120
+ .endif
+ .endm // xchal_cp1_load
+
+#define XCHAL_CP1_NUM_ATMPS 1
+#define XCHAL_SA_NUM_ATMPS 1
+
+ /* Empty macros for unconfigured coprocessors: */
+ .macro xchal_cp0_store p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp0_load p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp2_store p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp2_load p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp3_store p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp3_load p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp4_store p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp4_load p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp5_store p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp5_load p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp6_store p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp6_load p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp7_store p a b c d continue=0 ofs=-1 select=-1 ; .endm
+ .macro xchal_cp7_load p a b c d continue=0 ofs=-1 select=-1 ; .endm
+
+#endif /*_XTENSA_CORE_TIE_ASM_H*/
+
diff --git a/arch/xtensa/variants/test_kc705_be/include/variant/tie.h b/arch/xtensa/variants/test_kc705_be/include/variant/tie.h
new file mode 100644
index 000000000000..4e1b4baac8e2
--- /dev/null
+++ b/arch/xtensa/variants/test_kc705_be/include/variant/tie.h
@@ -0,0 +1,182 @@
+/*
+ * tie.h -- compile-time HAL definitions dependent on CORE & TIE configuration
+ *
+ * NOTE: This header file is not meant to be included directly.
+ */
+
+/* This header file describes this specific Xtensa processor's TIE extensions
+ that extend basic Xtensa core functionality. It is customized to this
+ Xtensa processor configuration.
+
+ Copyright (c) 1999-2015 Cadence Design Systems Inc.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef _XTENSA_CORE_TIE_H
+#define _XTENSA_CORE_TIE_H
+
+#define XCHAL_CP_NUM 2 /* number of coprocessors */
+#define XCHAL_CP_MAX 8 /* max CP ID + 1 (0 if none) */
+#define XCHAL_CP_MASK 0x82 /* bitmask of all CPs by ID */
+#define XCHAL_CP_PORT_MASK 0x80 /* bitmask of only port CPs */
+
+/* Basic parameters of each coprocessor: */
+#define XCHAL_CP1_NAME "AudioEngineLX"
+#define XCHAL_CP1_IDENT AudioEngineLX
+#define XCHAL_CP1_SA_SIZE 120 /* size of state save area */
+#define XCHAL_CP1_SA_ALIGN 8 /* min alignment of save area */
+#define XCHAL_CP_ID_AUDIOENGINELX 1 /* coprocessor ID (0..7) */
+#define XCHAL_CP7_NAME "XTIOP"
+#define XCHAL_CP7_IDENT XTIOP
+#define XCHAL_CP7_SA_SIZE 0 /* size of state save area */
+#define XCHAL_CP7_SA_ALIGN 1 /* min alignment of save area */
+#define XCHAL_CP_ID_XTIOP 7 /* coprocessor ID (0..7) */
+
+/* Filler info for unassigned coprocessors, to simplify arrays etc: */
+#define XCHAL_CP0_SA_SIZE 0
+#define XCHAL_CP0_SA_ALIGN 1
+#define XCHAL_CP2_SA_SIZE 0
+#define XCHAL_CP2_SA_ALIGN 1
+#define XCHAL_CP3_SA_SIZE 0
+#define XCHAL_CP3_SA_ALIGN 1
+#define XCHAL_CP4_SA_SIZE 0
+#define XCHAL_CP4_SA_ALIGN 1
+#define XCHAL_CP5_SA_SIZE 0
+#define XCHAL_CP5_SA_ALIGN 1
+#define XCHAL_CP6_SA_SIZE 0
+#define XCHAL_CP6_SA_ALIGN 1
+
+/* Save area for non-coprocessor optional and custom (TIE) state: */
+#define XCHAL_NCP_SA_SIZE 36
+#define XCHAL_NCP_SA_ALIGN 4
+
+/* Total save area for optional and custom state (NCP + CPn): */
+#define XCHAL_TOTAL_SA_SIZE 160 /* with 16-byte align padding */
+#define XCHAL_TOTAL_SA_ALIGN 8 /* actual minimum alignment */
+
+/*
+ * Detailed contents of save areas.
+ * NOTE: caller must define the XCHAL_SA_REG macro (not defined here)
+ * before expanding the XCHAL_xxx_SA_LIST() macros.
+ *
+ * XCHAL_SA_REG(s,ccused,abikind,kind,opt,name,galign,align,asize,
+ * dbnum,base,regnum,bitsz,gapsz,reset,x...)
+ *
+ * s = passed from XCHAL_*_LIST(s), eg. to select how to expand
+ * ccused = set if used by compiler without special options or code
+ * abikind = 0 (caller-saved), 1 (callee-saved), or 2 (thread-global)
+ * kind = 0 (special reg), 1 (TIE user reg), or 2 (TIE regfile reg)
+ * opt = 0 (custom TIE extension or coprocessor), or 1 (optional reg)
+ * name = lowercase reg name (no quotes)
+ * galign = group byte alignment (power of 2) (galign >= align)
+ * align = register byte alignment (power of 2)
+ * asize = allocated size in bytes (asize*8 == bitsz + gapsz + padsz)
+ * (not including any pad bytes required to galign this or next reg)
+ * dbnum = unique target number f/debug (see <xtensa-libdb-macros.h>)
+ * base = reg shortname w/o index (or sr=special, ur=TIE user reg)
+ * regnum = reg index in regfile, or special/TIE-user reg number
+ * bitsz = number of significant bits (regfile width, or ur/sr mask bits)
+ * gapsz = intervening bits, if bitsz bits not stored contiguously
+ * (padsz = pad bits at end [TIE regfile] or at msbits [ur,sr] of asize)
+ * reset = register reset value (or 0 if undefined at reset)
+ * x = reserved for future use (0 until then)
+ *
+ * To filter out certain registers, e.g. to expand only the non-global
+ * registers used by the compiler, you can do something like this:
+ *
+ * #define XCHAL_SA_REG(s,ccused,p...) SELCC##ccused(p)
+ * #define SELCC0(p...)
+ * #define SELCC1(abikind,p...) SELAK##abikind(p)
+ * #define SELAK0(p...) REG(p)
+ * #define SELAK1(p...) REG(p)
+ * #define SELAK2(p...)
+ * #define REG(kind,tie,name,galn,aln,asz,csz,dbnum,base,rnum,bsz,rst,x...) \
+ * ...what you want to expand...
+ */
+
+#define XCHAL_NCP_SA_NUM 9
+#define XCHAL_NCP_SA_LIST(s) \
+ XCHAL_SA_REG(s,1,2,1,1, threadptr, 4, 4, 4,0x03E7, ur,231, 32,0,0,0) \
+ XCHAL_SA_REG(s,1,0,0,1, acclo, 4, 4, 4,0x0210, sr,16 , 32,0,0,0) \
+ XCHAL_SA_REG(s,1,0,0,1, acchi, 4, 4, 4,0x0211, sr,17 , 8,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1, br, 4, 4, 4,0x0204, sr,4 , 16,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1, scompare1, 4, 4, 4,0x020C, sr,12 , 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1, m0, 4, 4, 4,0x0220, sr,32 , 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1, m1, 4, 4, 4,0x0221, sr,33 , 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1, m2, 4, 4, 4,0x0222, sr,34 , 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,0,1, m3, 4, 4, 4,0x0223, sr,35 , 32,0,0,0)
+
+#define XCHAL_CP0_SA_NUM 0
+#define XCHAL_CP0_SA_LIST(s) /* empty */
+
+#define XCHAL_CP1_SA_NUM 18
+#define XCHAL_CP1_SA_LIST(s) \
+ XCHAL_SA_REG(s,0,0,1,0, ae_ovf_sar, 8, 4, 4,0x03F0, ur,240, 7,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0, ae_bithead, 4, 4, 4,0x03F1, ur,241, 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0,ae_ts_fts_bu_bp, 4, 4, 4,0x03F2, ur,242, 16,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0, ae_sd_no, 4, 4, 4,0x03F3, ur,243, 28,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0, ae_cbegin0, 4, 4, 4,0x03F6, ur,246, 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,1,0, ae_cend0, 4, 4, 4,0x03F7, ur,247, 32,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aep0, 8, 8, 8,0x0060, aep,0 , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aep1, 8, 8, 8,0x0061, aep,1 , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aep2, 8, 8, 8,0x0062, aep,2 , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aep3, 8, 8, 8,0x0063, aep,3 , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aep4, 8, 8, 8,0x0064, aep,4 , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aep5, 8, 8, 8,0x0065, aep,5 , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aep6, 8, 8, 8,0x0066, aep,6 , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aep7, 8, 8, 8,0x0067, aep,7 , 48,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aeq0, 8, 8, 8,0x0068, aeq,0 , 56,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aeq1, 8, 8, 8,0x0069, aeq,1 , 56,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aeq2, 8, 8, 8,0x006A, aeq,2 , 56,0,0,0) \
+ XCHAL_SA_REG(s,0,0,2,0, aeq3, 8, 8, 8,0x006B, aeq,3 , 56,0,0,0)
+
+#define XCHAL_CP2_SA_NUM 0
+#define XCHAL_CP2_SA_LIST(s) /* empty */
+
+#define XCHAL_CP3_SA_NUM 0
+#define XCHAL_CP3_SA_LIST(s) /* empty */
+
+#define XCHAL_CP4_SA_NUM 0
+#define XCHAL_CP4_SA_LIST(s) /* empty */
+
+#define XCHAL_CP5_SA_NUM 0
+#define XCHAL_CP5_SA_LIST(s) /* empty */
+
+#define XCHAL_CP6_SA_NUM 0
+#define XCHAL_CP6_SA_LIST(s) /* empty */
+
+#define XCHAL_CP7_SA_NUM 0
+#define XCHAL_CP7_SA_LIST(s) /* empty */
+
+/* Byte length of instruction from its first nibble (op0 field), per FLIX. */
+#define XCHAL_OP0_FORMAT_LENGTHS 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,8
+/* Byte length of instruction from its first byte, per FLIX. */
+#define XCHAL_BYTE0_FORMAT_LENGTHS \
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,\
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,\
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,\
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+
+#endif /*_XTENSA_CORE_TIE_H*/
+