diff options
Diffstat (limited to 'arch')
149 files changed, 4226 insertions, 1860 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index c6148166a7b4..4426e9687d89 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -841,6 +841,16 @@ config REFCOUNT_FULL against various use-after-free conditions that can be used in security flaw exploits. +config HAVE_ARCH_PREL32_RELOCATIONS + bool + help + May be selected by an architecture if it supports place-relative + 32-bit relocations, both in the toolchain and in the module loader, + in which case relative references can be used in special sections + for PCI fixup, initcalls etc which are only half the size on 64 bit + architectures, and don't require runtime relocation on relocatable + kernels. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 85b2369d6b20..27ea6dfcf2f2 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -155,8 +155,8 @@ CONFIG_THERMAL_EMULATION=y CONFIG_WATCHDOG=y CONFIG_S3C2410_WATCHDOG=y CONFIG_MFD_CROS_EC=y -CONFIG_MFD_CROS_EC_I2C=y -CONFIG_MFD_CROS_EC_SPI=y +CONFIG_CROS_EC_I2C=y +CONFIG_CROS_EC_SPI=y CONFIG_MFD_MAX14577=y CONFIG_MFD_MAX77686=y CONFIG_MFD_MAX77693=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 8f6be1982545..be732f382418 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -490,8 +490,8 @@ CONFIG_MFD_AC100=y CONFIG_MFD_AXP20X_I2C=y CONFIG_MFD_AXP20X_RSB=y CONFIG_MFD_CROS_EC=m -CONFIG_MFD_CROS_EC_I2C=m -CONFIG_MFD_CROS_EC_SPI=m +CONFIG_CROS_EC_I2C=m +CONFIG_CROS_EC_SPI=m CONFIG_MFD_DA9063=m CONFIG_MFD_MAX14577=y CONFIG_MFD_MAX77686=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 5655a1cee87d..6bb506edb1f5 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -398,8 +398,8 @@ CONFIG_MFD_AS3711=y CONFIG_MFD_BCM590XX=m CONFIG_MFD_AXP20X=y CONFIG_MFD_CROS_EC=m -CONFIG_MFD_CROS_EC_I2C=m -CONFIG_MFD_CROS_EC_SPI=m +CONFIG_CROS_EC_I2C=m +CONFIG_CROS_EC_SPI=m CONFIG_MFD_ASIC3=y CONFIG_PMIC_DA903X=y CONFIG_HTC_EGPIO=y diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index fe2fb1ddd771..77121b713bef 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -107,9 +107,19 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr; } +static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr &= ~HCR_TWE; +} + +static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr |= HCR_TWE; +} + static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) { - return 1; + return true; } static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 1f1fe4109b02..79906cecb091 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -216,6 +216,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); unsigned long kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 8553d68b7c8a..265ea9cf7df7 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -75,17 +75,9 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); -static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) -{ - *pmd = new_pmd; - dsb(ishst); -} - -static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) -{ - *pte = new_pte; - dsb(ishst); -} +#define kvm_mk_pmd(ptep) __pmd(__pa(ptep) | PMD_TYPE_TABLE) +#define kvm_mk_pud(pmdp) __pud(__pa(pmdp) | PMD_TYPE_TABLE) +#define kvm_mk_pgd(pudp) ({ BUILD_BUG(); 0; }) static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 16e006f708ca..4602464ebdfb 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -27,6 +27,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -125,6 +126,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 974d8d7d1bcd..3968d6c22455 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -38,25 +38,14 @@ #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32 #endif /* - * GCC 3.0, 3.1: general bad code generation. - * GCC 3.2.0: incorrect function argument offset calculation. - * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c - * (http://gcc.gnu.org/PR8896) and incorrect structure - * initialisation in fs/jffs2/erase.c * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854 * miscompiles find_get_entry(), and can result in EXT3 and EXT4 * filesystem corruption (possibly other FS too). */ -#ifdef __GNUC__ -#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) -#error Your compiler is too buggy; it is known to miscompile kernels. -#error Known good compilers: 3.3, 4.x -#endif -#if GCC_VERSION >= 40800 && GCC_VERSION < 40803 +#if defined(GCC_VERSION) && GCC_VERSION >= 40800 && GCC_VERSION < 40803 #error Your compiler is too buggy; it is known to miscompile kernels #error and result in filesystem corruption and oopses. #endif -#endif int main(void) { diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d9c299133111..82ab015bf42b 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -330,16 +330,15 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) * atomic helpers. Insert it into the gate_vma so that it is visible * through ptrace and /proc/<pid>/mem. */ -static struct vm_area_struct gate_vma = { - .vm_start = 0xffff0000, - .vm_end = 0xffff0000 + PAGE_SIZE, - .vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC, -}; +static struct vm_area_struct gate_vma; static int __init gate_vma_init(void) { vma_init(&gate_vma, NULL); gate_vma.vm_page_prot = PAGE_READONLY_EXEC; + gate_vma.vm_start = 0xffff0000; + gate_vma.vm_end = 0xffff0000 + PAGE_SIZE; + gate_vma.vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; return 0; } arch_initcall(gate_vma_init); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 3a02e76699a6..450c7a4fbc8a 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -246,6 +246,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { u64 reg; + bool g1; if (!p->is_write) return read_from_write_only(vcpu, p); @@ -253,7 +254,25 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; reg |= *vcpu_reg(vcpu, p->Rt1) ; - vgic_v3_dispatch_sgi(vcpu, reg); + /* + * In a system where GICD_CTLR.DS=1, a ICC_SGI0R access generates + * Group0 SGIs only, while ICC_SGI1R can generate either group, + * depending on the SGI configuration. ICC_ASGI1R is effectively + * equivalent to ICC_SGI0R, as there is no "alternative" secure + * group. + */ + switch (p->Op1) { + default: /* Keep GCC quiet */ + case 0: /* ICC_SGI1R */ + g1 = true; + break; + case 1: /* ICC_ASGI1R */ + case 2: /* ICC_SGI0R */ + g1 = false; + break; + } + + vgic_v3_dispatch_sgi(vcpu, reg, g1); return true; } @@ -459,6 +478,10 @@ static const struct coproc_reg cp15_regs[] = { /* ICC_SGI1R */ { CRm64(12), Op1( 0), is64, access_gic_sgi}, + /* ICC_ASGI1R */ + { CRm64(12), Op1( 1), is64, access_gic_sgi}, + /* ICC_SGI0R */ + { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index a18f33edc471..2b8de885b2bf 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -261,6 +261,29 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } + +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA); + + return 0; +} + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + bool serror_pending = events->exception.serror_pending; + bool has_esr = events->exception.serror_has_esr; + + if (serror_pending && has_esr) + return -EINVAL; + else if (serror_pending) + kvm_inject_vabt(vcpu); + + return 0; +} + int __attribute_const__ kvm_target_cpu(void) { switch (read_cpuid_part()) { diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c index d1329f1ba4e4..bf992264060e 100644 --- a/arch/arm/probes/uprobes/core.c +++ b/arch/arm/probes/uprobes/core.c @@ -32,7 +32,7 @@ bool is_swbp_insn(uprobe_opcode_t *insn) int set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return uprobe_write_opcode(mm, vaddr, + return uprobe_write_opcode(auprobe, mm, vaddr, __opcode_to_mem_arm(auprobe->bpinsn)); } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d0a53cc6293a..29e75b47becd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -108,6 +108,7 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 2c07e233012b..514787d45dee 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -373,8 +373,8 @@ CONFIG_UNIPHIER_WATCHDOG=y CONFIG_BCM2835_WDT=y CONFIG_MFD_AXP20X_RSB=y CONFIG_MFD_CROS_EC=y -CONFIG_MFD_CROS_EC_I2C=y -CONFIG_MFD_CROS_EC_SPI=y +CONFIG_CROS_EC_I2C=y +CONFIG_CROS_EC_SPI=y CONFIG_MFD_CROS_EC_CHARDEV=m CONFIG_MFD_EXYNOS_LPASS=m CONFIG_MFD_HI6421_PMIC=y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index be3bf3d08916..ae1f70450fb2 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -50,7 +50,8 @@ #define ARM64_HW_DBM 29 #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 +#define ARM64_HAS_STAGE2_FWB 32 -#define ARM64_NCAPS 32 +#define ARM64_NCAPS 33 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6dd285e979c9..aa45df752a16 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -23,6 +23,7 @@ #include <asm/types.h> /* Hyp Configuration Register (HCR) bits */ +#define HCR_FWB (UL(1) << 46) #define HCR_TEA (UL(1) << 37) #define HCR_TERR (UL(1) << 36) #define HCR_TLOR (UL(1) << 35) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 0c97e45d1dc3..6106a85ae0be 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -63,6 +63,8 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) /* trap error record accesses */ vcpu->arch.hcr_el2 |= HCR_TERR; } + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + vcpu->arch.hcr_el2 |= HCR_FWB; if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) vcpu->arch.hcr_el2 &= ~HCR_RW; @@ -81,6 +83,21 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr_el2; } +static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 &= ~HCR_TWE; +} + +static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +{ + vcpu->arch.hcr_el2 |= HCR_TWE; +} + +static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vsesr_el2; +} + static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) { vcpu->arch.vsesr_el2 = vsesr; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index fe8777b12f86..f26055f2306e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -350,6 +350,11 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); @@ -378,16 +383,23 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void); +void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); + struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); -void __kvm_set_tpidr_el2(u64 tpidr_el2); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { - u64 tpidr_el2; + /* + * Calculate the raw per-cpu offset without a translation from the + * kernel's mapping to the linear mapping, and store it in tpidr_el2 + * so that we can use adr_l to access per-cpu variables in EL2. + */ + u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) - + (u64)kvm_ksym_ref(kvm_host_cpu_state)); /* * Call initialization code, and switch to the full blown HYP code. @@ -396,17 +408,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, * cpus_have_const_cap() wrapper. */ BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); - __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); - - /* - * Calculate the raw per-cpu offset without a translation from the - * kernel's mapping to the linear mapping, and store it in tpidr_el2 - * so that we can use adr_l to access per-cpu variables in EL2. - */ - tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state) - - (u64)kvm_ksym_ref(kvm_host_cpu_state); - - kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2); + __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); } static inline bool kvm_arch_check_sve_has_vhe(void) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index fb9a7127bb75..d6fff7de5539 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -169,8 +169,12 @@ phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); -#define kvm_set_pte(ptep, pte) set_pte(ptep, pte) -#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) +#define kvm_mk_pmd(ptep) \ + __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE) +#define kvm_mk_pud(pmdp) \ + __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE) +#define kvm_mk_pgd(pudp) \ + __pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE) static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { @@ -267,6 +271,15 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) { void *va = page_address(pfn_to_page(pfn)); + /* + * With FWB, we ensure that the guest always accesses memory using + * cacheable attributes, and we don't have to clean to PoC when + * faulting in pages. Furthermore, FWB implies IDC, so cleaning to + * PoU is not required either in this case. + */ + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + return; + kvm_flush_dcache_to_poc(va, size); } @@ -287,20 +300,26 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, static inline void __kvm_flush_dcache_pte(pte_t pte) { - struct page *page = pte_page(pte); - kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { + struct page *page = pte_page(pte); + kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); + } } static inline void __kvm_flush_dcache_pmd(pmd_t pmd) { - struct page *page = pmd_page(pmd); - kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { + struct page *page = pmd_page(pmd); + kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); + } } static inline void __kvm_flush_dcache_pud(pud_t pud) { - struct page *page = pud_page(pud); - kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { + struct page *page = pud_page(pud); + kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); + } } #define kvm_virt_to_phys(x) __pa_symbol(x) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 49d99214f43c..b96442960aea 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -155,6 +155,13 @@ #define MT_S2_NORMAL 0xf #define MT_S2_DEVICE_nGnRE 0x1 +/* + * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001 + * Stage-2 enforces Normal-WB and Device-nGnRE + */ +#define MT_S2_FWB_NORMAL 6 +#define MT_S2_FWB_DEVICE_nGnRE 1 + #ifdef CONFIG_ARM64_4K_PAGES #define IOREMAP_MAX_ORDER (PUD_SHIFT) #else diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 108ecad7acc5..78b942c1bea4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -67,8 +67,28 @@ #define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) -#define PAGE_S2 __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN) -#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN) +#define PAGE_S2_MEMATTR(attr) \ + ({ \ + u64 __val; \ + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \ + __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \ + else \ + __val = PTE_S2_MEMATTR(MT_S2_ ## attr); \ + __val; \ + }) + +#define PAGE_S2_XN \ + ({ \ + u64 __val; \ + if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) \ + __val = 0; \ + else \ + __val = PTE_S2_XN; \ + __val; \ + }) + +#define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN) +#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e205ec8489e9..c1470931b897 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -314,6 +314,8 @@ #define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) #define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3) #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define SYS_ICC_ASGI1R_EL1 sys_reg(3, 0, 12, 11, 6) +#define SYS_ICC_SGI0R_EL1 sys_reg(3, 0, 12, 11, 7) #define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) #define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) #define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2) @@ -579,6 +581,7 @@ #define ID_AA64MMFR1_VMIDBITS_16 2 /* id_aa64mmfr2 */ +#define ID_AA64MMFR2_FWB_SHIFT 40 #define ID_AA64MMFR2_AT_SHIFT 32 #define ID_AA64MMFR2_LVA_SHIFT 16 #define ID_AA64MMFR2_IESB_SHIFT 12 diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 4e76630dd655..97c3478ee6e7 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -39,6 +39,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -154,6 +155,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 611e8921c3d4..e238b7932096 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -192,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), @@ -1026,6 +1027,14 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) } #endif +static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) +{ + u64 val = read_sysreg_s(SYS_CLIDR_EL1); + + /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */ + WARN_ON(val & (7 << 27 | 7 << 21)); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -1182,6 +1191,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_dic, }, + { + .desc = "Stage-2 Force Write-Back", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAS_STAGE2_FWB, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_FWB_SHIFT, + .min_field_value = 1, + .matches = has_cpuid_feature, + .cpu_enable = cpu_has_fwb, + }, #ifdef CONFIG_ARM64_HW_AFDBM { /* diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index cdd4d9d6d575..07256b08226c 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -289,6 +289,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return -EINVAL; } +int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE); + events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); + + if (events->exception.serror_pending && events->exception.serror_has_esr) + events->exception.serror_esr = vcpu_get_vsesr(vcpu); + + return 0; +} + +int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + bool serror_pending = events->exception.serror_pending; + bool has_esr = events->exception.serror_has_esr; + + if (serror_pending && has_esr) { + if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) + return -EINVAL; + + if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK)) + kvm_set_sei_esr(vcpu, events->exception.serror_esr); + else + return -EINVAL; + } else if (serror_pending) { + kvm_inject_vabt(vcpu); + } + + return 0; +} + int __attribute_const__ kvm_target_cpu(void) { unsigned long implementor = read_cpuid_implementor(); diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 6fd91b31a131..ea9225160786 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -57,6 +57,7 @@ __invalid: * x0: HYP pgd * x1: HYP stack * x2: HYP vectors + * x3: per-CPU offset */ __do_hyp_init: /* Check for a stub HVC call */ @@ -119,9 +120,8 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE) mov sp, x1 msr vbar_el2, x2 - /* copy tpidr_el1 into tpidr_el2 for use by HYP */ - mrs x1, tpidr_el1 - msr tpidr_el2, x1 + /* Set tpidr_el2 for use by HYP */ + msr tpidr_el2, x3 /* Hello, World! */ eret diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 35bc16832efe..9ce223944983 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -288,8 +288,3 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) vcpu->arch.sysregs_loaded_on_cpu = false; } - -void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2) -{ - asm("msr tpidr_el2, %0": : "r" (tpidr_el2)); -} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index d8e71659ba7e..a55e91dfcf8f 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -164,9 +164,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) inject_undef64(vcpu); } -static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) +void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr) { - vcpu_set_vsesr(vcpu, esr); + vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK); *vcpu_hcr(vcpu) |= HCR_VSE; } @@ -184,5 +184,5 @@ static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) */ void kvm_inject_vabt(struct kvm_vcpu *vcpu) { - pend_guest_serror(vcpu, ESR_ELx_ISV); + kvm_set_sei_esr(vcpu, ESR_ELx_ISV); } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 4e4aedaf7ab7..e37c78bbe1ca 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -77,8 +77,12 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PMU_V3: r = kvm_arm_support_pmu_v3(); break; + case KVM_CAP_ARM_INJECT_SERROR_ESR: + r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); + break; case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: + case KVM_CAP_VCPU_EVENTS: r = 1; break; default: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a4363735d3f8..22fbbdbece3c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -194,7 +194,16 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, if (!p->is_write) return read_from_write_only(vcpu, p, r); - kvm_set_way_flush(vcpu); + /* + * Only track S/W ops if we don't have FWB. It still indicates + * that the guest is a bit broken (S/W operations should only + * be done by firmware, knowing that there is only a single + * CPU left in the system, and certainly not from non-secure + * software). + */ + if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + kvm_set_way_flush(vcpu); + return true; } @@ -243,10 +252,43 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + bool g1; + if (!p->is_write) return read_from_write_only(vcpu, p, r); - vgic_v3_dispatch_sgi(vcpu, p->regval); + /* + * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates + * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group, + * depending on the SGI configuration. ICC_ASGI1R_EL1 is effectively + * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure + * group. + */ + if (p->is_aarch32) { + switch (p->Op1) { + default: /* Keep GCC quiet */ + case 0: /* ICC_SGI1R */ + g1 = true; + break; + case 1: /* ICC_ASGI1R */ + case 2: /* ICC_SGI0R */ + g1 = false; + break; + } + } else { + switch (p->Op2) { + default: /* Keep GCC quiet */ + case 5: /* ICC_SGI1R_EL1 */ + g1 = true; + break; + case 6: /* ICC_ASGI1R_EL1 */ + case 7: /* ICC_SGI0R_EL1 */ + g1 = false; + break; + } + } + + vgic_v3_dispatch_sgi(vcpu, p->regval, g1); return true; } @@ -1303,6 +1345,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only }, { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, + { SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi }, + { SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi }, { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only }, { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only }, @@ -1613,8 +1657,6 @@ static const struct sys_reg_desc cp14_64_regs[] = { * register). */ static const struct sys_reg_desc cp15_regs[] = { - { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, @@ -1737,8 +1779,10 @@ static const struct sys_reg_desc cp15_regs[] = { static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, - { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, + { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ + { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval }, }; diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 5e89d40be8cd..0b334b671e90 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -16,6 +16,7 @@ config H8300 select OF_IRQ select OF_EARLY_FLATTREE select HAVE_MEMBLOCK + select NO_BOOTMEM select TIMER_OF select H8300_TMR8 select HAVE_KERNEL_GZIP diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile index e1c02ca230cb..cc12b162c222 100644 --- a/arch/h8300/Makefile +++ b/arch/h8300/Makefile @@ -8,6 +8,8 @@ # (C) Copyright 2002-2015 Yoshinori Sato <ysato@users.sourceforge.jp> # +KBUILD_DEFCONFIG := edosk2674_defconfig + cflags-$(CONFIG_CPU_H8300H) := -mh aflags-$(CONFIG_CPU_H8300H) := -mh -Wa,--mach=h8300h ldflags-$(CONFIG_CPU_H8300H) := -mh8300helf_linux @@ -22,6 +24,8 @@ KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\" KBUILD_AFLAGS += $(aflags-y) LDFLAGS += $(ldflags-y) +CHECKFLAGS += -msize-long + ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := h8300-unknown-linux- endif diff --git a/arch/h8300/boot/dts/h8300h_sim.dts b/arch/h8300/boot/dts/h8300h_sim.dts index f1c31cecdec8..595398b9d018 100644 --- a/arch/h8300/boot/dts/h8300h_sim.dts +++ b/arch/h8300/boot/dts/h8300h_sim.dts @@ -73,7 +73,7 @@ timer16: timer@ffff68 { compatible = "renesas,16bit-timer"; reg = <0xffff68 8>, <0xffff60 8>; - interrupts = <24 0>; + interrupts = <26 0>; renesas,channel = <0>; clocks = <&fclk>; clock-names = "fck"; diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h index ea0cb0cf6a8b..647a83bd40b7 100644 --- a/arch/h8300/include/asm/bitops.h +++ b/arch/h8300/include/asm/bitops.h @@ -29,11 +29,11 @@ static inline unsigned long ffz(unsigned long word) result = -1; __asm__("1:\n\t" - "shlr.l %2\n\t" + "shlr.l %1\n\t" "adds #1,%0\n\t" "bcs 1b" - : "=r"(result) - : "0"(result), "r"(word)); + : "=r"(result),"=r"(word) + : "0"(result), "1"(word)); return result; } @@ -66,7 +66,7 @@ H8300_GEN_BITOP(change_bit, "bnot") #undef H8300_GEN_BITOP -static inline int test_bit(int nr, const unsigned long *addr) +static inline int test_bit(int nr, const volatile unsigned long *addr) { int ret = 0; unsigned char *b_addr; @@ -162,11 +162,11 @@ static inline unsigned long __ffs(unsigned long word) result = -1; __asm__("1:\n\t" - "shlr.l %2\n\t" + "shlr.l %1\n\t" "adds #1,%0\n\t" "bcc 1b" - : "=r" (result) - : "0"(result), "r"(word)); + : "=r" (result),"=r"(word) + : "0"(result), "1"(word)); return result; } diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h index 313cafa85380..66d383848ff1 100644 --- a/arch/h8300/include/asm/ptrace.h +++ b/arch/h8300/include/asm/ptrace.h @@ -4,6 +4,8 @@ #include <uapi/asm/ptrace.h> +struct task_struct; + #ifndef __ASSEMBLY__ #ifndef PS_S #define PS_S (0x10) diff --git a/arch/h8300/kernel/kgdb.c b/arch/h8300/kernel/kgdb.c index 602e478afbd5..1a1d30cb0609 100644 --- a/arch/h8300/kernel/kgdb.c +++ b/arch/h8300/kernel/kgdb.c @@ -129,7 +129,7 @@ void kgdb_arch_exit(void) /* Nothing to do */ } -const struct kgdb_arch arch_kgdb_ops = { +struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: trapa #2 */ .gdb_bpt_instr = { 0x57, 0x20 }, }; diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index a4d0470c10a9..34e2df5c0d6d 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -23,7 +23,6 @@ #include <linux/init.h> #include <linux/of.h> #include <linux/of_fdt.h> -#include <linux/of_platform.h> #include <linux/of_address.h> #include <linux/clk-provider.h> #include <linux/memblock.h> @@ -71,10 +70,6 @@ void __init h8300_fdt_init(void *fdt, char *bootargs) static void __init bootmem_init(void) { - int bootmap_size; - unsigned long ram_start_pfn; - unsigned long free_ram_start_pfn; - unsigned long ram_end_pfn; struct memblock_region *region; memory_end = memory_start = 0; @@ -88,33 +83,17 @@ static void __init bootmem_init(void) if (!memory_end) panic("No memory!"); - ram_start_pfn = PFN_UP(memory_start); - /* free_ram_start_pfn is first page after kernel */ - free_ram_start_pfn = PFN_UP(__pa(_end)); - ram_end_pfn = PFN_DOWN(memblock_end_of_DRAM()); + /* setup bootmem globals (we use no_bootmem, but mm still depends on this) */ + min_low_pfn = PFN_UP(memory_start); + max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_pfn = max_low_pfn; - max_pfn = ram_end_pfn; + memblock_reserve(__pa(_stext), _end - _stext); - /* - * give all the memory to the bootmap allocator, tell it to put the - * boot mem_map at the start of memory - */ - bootmap_size = init_bootmem_node(NODE_DATA(0), - free_ram_start_pfn, - 0, - ram_end_pfn); - /* - * free the usable memory, we have to make sure we do not free - * the bootmem bitmap so we then reserve it after freeing it :-) - */ - free_bootmem(PFN_PHYS(free_ram_start_pfn), - (ram_end_pfn - free_ram_start_pfn) << PAGE_SHIFT); - reserve_bootmem(PFN_PHYS(free_ram_start_pfn), bootmap_size, - BOOTMEM_DEFAULT); + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); - for_each_memblock(reserved, region) { - reserve_bootmem(region->base, region->size, BOOTMEM_DEFAULT); - } + memblock_dump_all(); } void __init setup_arch(char **cmdline_p) @@ -188,15 +167,6 @@ const struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; -static int __init device_probe(void) -{ - of_platform_populate(NULL, NULL, NULL, NULL); - - return 0; -} - -device_initcall(device_probe); - #if defined(CONFIG_CPU_H8300H) #define get_wait(base, addr) ({ \ int baddr; \ diff --git a/arch/h8300/kernel/sim-console.c b/arch/h8300/kernel/sim-console.c index 46138f55a9ea..03aa35b1a08c 100644 --- a/arch/h8300/kernel/sim-console.c +++ b/arch/h8300/kernel/sim-console.c @@ -13,12 +13,13 @@ static void sim_write(struct console *con, const char *s, unsigned n) { - register const int fd __asm__("er0") = 1; /* stdout */ register const char *_ptr __asm__("er1") = s; register const unsigned _len __asm__("er2") = n; - __asm__(".byte 0x5e,0x00,0x00,0xc7\n\t" /* jsr @0xc7 (sys_write) */ - : : "g"(fd), "g"(_ptr), "g"(_len)); + __asm__("sub.l er0,er0\n\t" /* er0 = 1 (stdout) */ + "inc.l #1,er0\n\t" + ".byte 0x5e,0x00,0x00,0xc7\n\t" /* jsr @0xc7 (sys_write) */ + : : "g"(_ptr), "g"(_len):"er0"); } static int __init sim_setup(struct earlycon_device *device, const char *opt) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 2bf4ef792f2c..8b4a0c1748c0 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -28,6 +28,7 @@ config IA64 select HAVE_ARCH_TRACEHOOK select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP + select NO_BOOTMEM select HAVE_VIRT_CPU_ACCOUNTING select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_SG_CHAIN diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index ee5b652d320a..671ce1e3f6f2 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1805,7 +1805,7 @@ static struct ioc_iommu ioc_iommu_info[] __initdata = { { SX2000_IOC_ID, "sx2000", NULL }, }; -static void ioc_init(unsigned long hpa, struct ioc *ioc) +static void __init ioc_init(unsigned long hpa, struct ioc *ioc) { struct ioc_iommu *info; @@ -2002,7 +2002,7 @@ sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) #endif } -static void acpi_sba_ioc_add(struct ioc *ioc) +static void __init acpi_sba_ioc_add(struct ioc *ioc) { acpi_handle handle = ioc->handle; acpi_status status; diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 6f952171abf9..1e6fef69bb01 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -454,6 +454,7 @@ extern void memset_io(volatile void __iomem *s, int c, long n); #define xlate_dev_kmem_ptr xlate_dev_kmem_ptr #define xlate_dev_mem_ptr xlate_dev_mem_ptr #include <asm-generic/io.h> +#undef PCI_IOBASE # endif /* __KERNEL__ */ diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index f4db2168d1b8..00e8e2a1eb19 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -50,8 +50,7 @@ void foo(void) DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); - DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); - DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid)); + DEFINE(IA64_TASK_THREAD_PID_OFFSET,offsetof (struct task_struct, thread_pid)); DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level)); DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0])); DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); @@ -68,6 +67,7 @@ void foo(void) DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct, group_stop_count)); DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending)); + DEFINE(IA64_SIGNAL_PIDS_TGID_OFFSET, offsetof (struct signal_struct, pids[PIDTYPE_TGID])); BLANK(); diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index fe742ffafc7a..d80c99a5f55d 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -62,16 +62,16 @@ ENTRY(fsys_getpid) .prologue .altrp b6 .body - add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 + add r17=IA64_TASK_SIGNAL_OFFSET,r16 ;; - ld8 r17=[r17] // r17 = current->group_leader + ld8 r17=[r17] // r17 = current->signal add r9=TI_FLAGS+IA64_TASK_SIZE,r16 ;; ld4 r9=[r9] - add r17=IA64_TASK_TGIDLINK_OFFSET,r17 + add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17 ;; and r9=TIF_ALLWORK_MASK,r9 - ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid + ld8 r17=[r17] // r17 = current->signal->pids[PIDTYPE_TGID] ;; add r8=IA64_PID_LEVEL_OFFSET,r17 ;; @@ -96,11 +96,11 @@ ENTRY(fsys_set_tid_address) .altrp b6 .body add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - add r17=IA64_TASK_TGIDLINK_OFFSET,r16 + add r17=IA64_TASK_THREAD_PID_OFFSET,r16 ;; ld4 r9=[r9] tnat.z p6,p7=r32 // check argument register for being NaT - ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid + ld8 r17=[r17] // r17 = current->thread_pid ;; and r9=TIF_ALLWORK_MASK,r9 add r8=IA64_PID_LEVEL_OFFSET,r17 diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index ad43cbf70628..0e6c2d9fb498 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -32,6 +32,7 @@ #include <linux/delay.h> #include <linux/cpu.h> #include <linux/kernel.h> +#include <linux/memblock.h> #include <linux/reboot.h> #include <linux/sched/mm.h> #include <linux/sched/clock.h> @@ -383,8 +384,16 @@ reserve_memory (void) sort_regions(rsvd_region, num_rsvd_regions); num_rsvd_regions = merge_regions(rsvd_region, num_rsvd_regions); -} + /* reserve all regions except the end of memory marker with memblock */ + for (n = 0; n < num_rsvd_regions - 1; n++) { + struct rsvd_region *region = &rsvd_region[n]; + phys_addr_t addr = __pa(region->start); + phys_addr_t size = region->end - region->start; + + memblock_reserve(addr, size); + } +} /** * find_initrd - get initrd parameters from the boot parameter structure diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 7d64b30913d1..e2e40bbd391c 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -34,53 +34,6 @@ static unsigned long max_gap; /* physical address where the bootmem map is located */ unsigned long bootmap_start; -/** - * find_bootmap_location - callback to find a memory area for the bootmap - * @start: start of region - * @end: end of region - * @arg: unused callback data - * - * Find a place to put the bootmap and return its starting address in - * bootmap_start. This address must be page-aligned. - */ -static int __init -find_bootmap_location (u64 start, u64 end, void *arg) -{ - u64 needed = *(unsigned long *)arg; - u64 range_start, range_end, free_start; - int i; - -#if IGNORE_PFN0 - if (start == PAGE_OFFSET) { - start += PAGE_SIZE; - if (start >= end) - return 0; - } -#endif - - free_start = PAGE_OFFSET; - - for (i = 0; i < num_rsvd_regions; i++) { - range_start = max(start, free_start); - range_end = min(end, rsvd_region[i].start & PAGE_MASK); - - free_start = PAGE_ALIGN(rsvd_region[i].end); - - if (range_end <= range_start) - continue; /* skip over empty range */ - - if (range_end - range_start >= needed) { - bootmap_start = __pa(range_start); - return -1; /* done */ - } - - /* nothing more available in this segment */ - if (range_end == end) - return 0; - } - return 0; -} - #ifdef CONFIG_SMP static void *cpu_data; /** @@ -196,8 +149,6 @@ setup_per_cpu_areas(void) void __init find_memory (void) { - unsigned long bootmap_size; - reserve_memory(); /* first find highest page frame number */ @@ -205,21 +156,12 @@ find_memory (void) max_low_pfn = 0; efi_memmap_walk(find_max_min_low_pfn, NULL); max_pfn = max_low_pfn; - /* how many bytes to cover all the pages */ - bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT; - - /* look for a location to hold the bootmap */ - bootmap_start = ~0UL; - efi_memmap_walk(find_bootmap_location, &bootmap_size); - if (bootmap_start == ~0UL) - panic("Cannot find %ld bytes for bootmap\n", bootmap_size); - bootmap_size = init_bootmem_node(NODE_DATA(0), - (bootmap_start >> PAGE_SHIFT), 0, max_pfn); - - /* Free all available memory, then mark bootmem-map as being in use. */ - efi_memmap_walk(filter_rsvd_memory, free_bootmem); - reserve_bootmem(bootmap_start, bootmap_size, BOOTMEM_DEFAULT); +#ifdef CONFIG_VIRTUAL_MEM_MAP + efi_memmap_walk(filter_memory, register_active_ranges); +#else + memblock_add_node(0, PFN_PHYS(max_low_pfn), 0); +#endif find_initrd(); @@ -244,11 +186,9 @@ paging_init (void) max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_VIRTUAL_MEM_MAP - efi_memmap_walk(filter_memory, register_active_ranges); efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); if (max_gap < LARGE_GAP) { vmem_map = (struct page *) 0; - free_area_init_nodes(max_zone_pfns); } else { unsigned long map_size; @@ -266,13 +206,10 @@ paging_init (void) */ NODE_DATA(0)->node_mem_map = vmem_map + find_min_pfn_with_active_regions(); - free_area_init_nodes(max_zone_pfns); printk("Virtual mem_map starts at 0x%p\n", mem_map); } -#else /* !CONFIG_VIRTUAL_MEM_MAP */ - memblock_add_node(0, PFN_PHYS(max_low_pfn), 0); - free_area_init_nodes(max_zone_pfns); #endif /* !CONFIG_VIRTUAL_MEM_MAP */ + free_area_init_nodes(max_zone_pfns); zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 7d9bd20319ff..1928d5719e41 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -20,6 +20,7 @@ #include <linux/nmi.h> #include <linux/swap.h> #include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/acpi.h> #include <linux/efi.h> #include <linux/nodemask.h> @@ -38,9 +39,6 @@ struct early_node_data { struct ia64_node_data *node_data; unsigned long pernode_addr; unsigned long pernode_size; -#ifdef CONFIG_ZONE_DMA32 - unsigned long num_dma_physpages; -#endif unsigned long min_pfn; unsigned long max_pfn; }; @@ -60,33 +58,31 @@ pg_data_t *pgdat_list[MAX_NUMNODES]; (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1))) /** - * build_node_maps - callback to setup bootmem structs for each node + * build_node_maps - callback to setup mem_data structs for each node * @start: physical start of range * @len: length of range * @node: node where this range resides * - * We allocate a struct bootmem_data for each piece of memory that we wish to + * Detect extents of each piece of memory that we wish to * treat as a virtually contiguous block (i.e. each node). Each such block * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down * if necessary. Any non-existent pages will simply be part of the virtual - * memmap. We also update min_low_pfn and max_low_pfn here as we receive - * memory ranges from the caller. + * memmap. */ static int __init build_node_maps(unsigned long start, unsigned long len, int node) { unsigned long spfn, epfn, end = start + len; - struct bootmem_data *bdp = &bootmem_node_data[node]; epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; - if (!bdp->node_low_pfn) { - bdp->node_min_pfn = spfn; - bdp->node_low_pfn = epfn; + if (!mem_data[node].min_pfn) { + mem_data[node].min_pfn = spfn; + mem_data[node].max_pfn = epfn; } else { - bdp->node_min_pfn = min(spfn, bdp->node_min_pfn); - bdp->node_low_pfn = max(epfn, bdp->node_low_pfn); + mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn); + mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn); } return 0; @@ -269,7 +265,6 @@ static void __init fill_pernode(int node, unsigned long pernode, { void *cpu_data; int cpus = early_nr_cpus_node(node); - struct bootmem_data *bdp = &bootmem_node_data[node]; mem_data[node].pernode_addr = pernode; mem_data[node].pernode_size = pernodesize; @@ -284,8 +279,6 @@ static void __init fill_pernode(int node, unsigned long pernode, mem_data[node].node_data = __va(pernode); pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); - - pgdat_list[node]->bdata = bdp; pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); cpu_data = per_cpu_node_setup(cpu_data, node); @@ -325,20 +318,16 @@ static int __init find_pernode_space(unsigned long start, unsigned long len, int node) { unsigned long spfn, epfn; - unsigned long pernodesize = 0, pernode, pages, mapsize; - struct bootmem_data *bdp = &bootmem_node_data[node]; + unsigned long pernodesize = 0, pernode; spfn = start >> PAGE_SHIFT; epfn = (start + len) >> PAGE_SHIFT; - pages = bdp->node_low_pfn - bdp->node_min_pfn; - mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT; - /* * Make sure this memory falls within this node's usable memory * since we may have thrown some away in build_maps(). */ - if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn) + if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn) return 0; /* Don't setup this node's local space twice... */ @@ -353,32 +342,13 @@ static int __init find_pernode_space(unsigned long start, unsigned long len, pernode = NODEDATA_ALIGN(start, node); /* Is this range big enough for what we want to store here? */ - if (start + len > (pernode + pernodesize + mapsize)) + if (start + len > (pernode + pernodesize)) fill_pernode(node, pernode, pernodesize); return 0; } /** - * free_node_bootmem - free bootmem allocator memory for use - * @start: physical start of range - * @len: length of range - * @node: node where this range resides - * - * Simply calls the bootmem allocator to free the specified ranged from - * the given pg_data_t's bdata struct. After this function has been called - * for all the entries in the EFI memory map, the bootmem allocator will - * be ready to service allocation requests. - */ -static int __init free_node_bootmem(unsigned long start, unsigned long len, - int node) -{ - free_bootmem_node(pgdat_list[node], start, len); - - return 0; -} - -/** * reserve_pernode_space - reserve memory for per-node space * * Reserve the space used by the bootmem maps & per-node space in the boot @@ -387,28 +357,17 @@ static int __init free_node_bootmem(unsigned long start, unsigned long len, */ static void __init reserve_pernode_space(void) { - unsigned long base, size, pages; - struct bootmem_data *bdp; + unsigned long base, size; int node; for_each_online_node(node) { - pg_data_t *pdp = pgdat_list[node]; - if (node_isset(node, memory_less_mask)) continue; - bdp = pdp->bdata; - - /* First the bootmem_map itself */ - pages = bdp->node_low_pfn - bdp->node_min_pfn; - size = bootmem_bootmap_pages(pages) << PAGE_SHIFT; - base = __pa(bdp->node_bootmem_map); - reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); - /* Now the per-node space */ size = mem_data[node].pernode_size; base = __pa(mem_data[node].pernode_addr); - reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); + memblock_reserve(base, size); } } @@ -528,6 +487,7 @@ void __init find_memory(void) int node; reserve_memory(); + efi_memmap_walk(filter_memory, register_active_ranges); if (num_online_nodes() == 0) { printk(KERN_ERR "node info missing!\n"); @@ -544,38 +504,8 @@ void __init find_memory(void) efi_memmap_walk(find_max_min_low_pfn, NULL); for_each_online_node(node) - if (bootmem_node_data[node].node_low_pfn) { + if (mem_data[node].min_pfn) node_clear(node, memory_less_mask); - mem_data[node].min_pfn = ~0UL; - } - - efi_memmap_walk(filter_memory, register_active_ranges); - - /* - * Initialize the boot memory maps in reverse order since that's - * what the bootmem allocator expects - */ - for (node = MAX_NUMNODES - 1; node >= 0; node--) { - unsigned long pernode, pernodesize, map; - struct bootmem_data *bdp; - - if (!node_online(node)) - continue; - else if (node_isset(node, memory_less_mask)) - continue; - - bdp = &bootmem_node_data[node]; - pernode = mem_data[node].pernode_addr; - pernodesize = mem_data[node].pernode_size; - map = pernode + pernodesize; - - init_bootmem_node(pgdat_list[node], - map>>PAGE_SHIFT, - bdp->node_min_pfn, - bdp->node_low_pfn); - } - - efi_memmap_walk(filter_rsvd_memory, free_node_bootmem); reserve_pernode_space(); memory_less_nodes(); @@ -655,36 +585,6 @@ void call_pernode_memory(unsigned long start, unsigned long len, void *arg) } /** - * count_node_pages - callback to build per-node memory info structures - * @start: physical start of range - * @len: length of range - * @node: node where this range resides - * - * Each node has it's own number of physical pages, DMAable pages, start, and - * end page frame number. This routine will be called by call_pernode_memory() - * for each piece of usable memory and will setup these values for each node. - * Very similar to build_maps(). - */ -static __init int count_node_pages(unsigned long start, unsigned long len, int node) -{ - unsigned long end = start + len; - -#ifdef CONFIG_ZONE_DMA32 - if (start <= __pa(MAX_DMA_ADDRESS)) - mem_data[node].num_dma_physpages += - (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT; -#endif - start = GRANULEROUNDDOWN(start); - end = GRANULEROUNDUP(end); - mem_data[node].max_pfn = max(mem_data[node].max_pfn, - end >> PAGE_SHIFT); - mem_data[node].min_pfn = min(mem_data[node].min_pfn, - start >> PAGE_SHIFT); - - return 0; -} - -/** * paging_init - setup page tables * * paging_init() sets up the page tables for each node of the system and frees @@ -700,8 +600,6 @@ void __init paging_init(void) max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; - efi_memmap_walk(filter_rsvd_memory, count_node_pages); - sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); diff --git a/arch/m68k/coldfire/clk.c b/arch/m68k/coldfire/clk.c index 849cd208e2ed..7bc666e482eb 100644 --- a/arch/m68k/coldfire/clk.c +++ b/arch/m68k/coldfire/clk.c @@ -129,4 +129,33 @@ unsigned long clk_get_rate(struct clk *clk) } EXPORT_SYMBOL(clk_get_rate); +/* dummy functions, should not be called */ +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_round_rate); + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + WARN_ON(clk); + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + WARN_ON(clk); + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + /***************************************************************************/ diff --git a/arch/m68k/include/asm/dma.h b/arch/m68k/include/asm/dma.h index b0978a23bad1..ae2021964e32 100644 --- a/arch/m68k/include/asm/dma.h +++ b/arch/m68k/include/asm/dma.h @@ -390,7 +390,7 @@ static __inline__ void set_dma_mode(unsigned int dmanr, char mode) #ifdef DEBUG_DMA printk("%s(%d): dmanr=%d DMR[%x]=%x DIR[%x]=%x\n", __FILE__, __LINE__, - dmanr, (int) &dmalp[MCFDMA_DMR], dmabp[MCFDMA_DMR], + dmanr, (int) &dmalp[MCFDMA_DMR], dmalp[MCFDMA_DMR], (int) &dmawp[MCFDMA_DIR], dmawp[MCFDMA_DIR]); #endif } @@ -421,7 +421,7 @@ static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a) #ifdef DEBUG_DMA printk("%s(%d): dmanr=%d DMR[%x]=%x SAR[%x]=%08x DAR[%x]=%08x\n", - __FILE__, __LINE__, dmanr, (int) &dmawp[MCFDMA_DMR], dmawp[MCFDMA_DMR], + __FILE__, __LINE__, dmanr, (int) &dmalp[MCFDMA_DMR], dmalp[MCFDMA_DMR], (int) &dmalp[MCFDMA_DSAR], dmalp[MCFDMA_DSAR], (int) &dmalp[MCFDMA_DDAR], dmalp[MCFDMA_DDAR]); #endif diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 6163a39ddeb6..ace5c5bf1836 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -2,6 +2,8 @@ config MICROBLAZE def_bool y select ARCH_NO_SWAP select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_WANT_IPC_PARSE_VERSION @@ -9,6 +11,8 @@ config MICROBLAZE select TIMER_OF select CLONE_BACKWARDS3 select COMMON_CLK + select DMA_NONCOHERENT_OPS + select DMA_NONCOHERENT_MMAP select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index d269dd4b8279..73330360a8e6 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -40,11 +40,11 @@ CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare ifdef CONFIG_CPU_BIG_ENDIAN KBUILD_CFLAGS += -mbig-endian KBUILD_AFLAGS += -mbig-endian -LD += -EB +LDFLAGS += -EB else KBUILD_CFLAGS += -mlittle-endian KBUILD_AFLAGS += -mlittle-endian -LD += -EL +LDFLAGS += -EL endif CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index fe6a6c6e5003..569ba9e670c1 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += bugs.h generic-y += compat.h generic-y += device.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h deleted file mode 100644 index add50c1373bf..000000000000 --- a/arch/microblaze/include/asm/dma-mapping.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Implements the generic device dma API for microblaze and the pci - * - * Copyright (C) 2009-2010 Michal Simek <monstr@monstr.eu> - * Copyright (C) 2009-2010 PetaLogix - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file COPYING in the main directory of this - * archive for more details. - * - * This file is base on powerpc and x86 dma-mapping.h versions - * Copyright (C) 2004 IBM - */ - -#ifndef _ASM_MICROBLAZE_DMA_MAPPING_H -#define _ASM_MICROBLAZE_DMA_MAPPING_H - -/* - * Available generic sets of operations - */ -extern const struct dma_map_ops dma_nommu_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &dma_nommu_ops; -} - -#endif /* _ASM_MICROBLAZE_DMA_MAPPING_H */ diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index db8b1fa83452..7b650ab14fa0 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -553,11 +553,6 @@ void __init *early_get_page(void); extern unsigned long ioremap_bot, ioremap_base; -void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle); -void consistent_free(size_t size, void *vaddr); -void consistent_sync(void *vaddr, size_t size, int direction); -void consistent_sync_page(struct page *page, unsigned long offset, - size_t size, int direction); unsigned long consistent_virt_to_pfn(void *vaddr); void setup_memory(void); diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index 3145e7dc8ab1..71032cf64669 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -8,29 +8,15 @@ */ #include <linux/device.h> -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/gfp.h> #include <linux/dma-debug.h> #include <linux/export.h> #include <linux/bug.h> #include <asm/cacheflush.h> -static void *dma_nommu_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - return consistent_alloc(flag, size, dma_handle); -} - -static void dma_nommu_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - consistent_free(size, vaddr); -} - -static inline void __dma_sync(unsigned long paddr, - size_t size, enum dma_data_direction direction) +static void __dma_sync(struct device *dev, phys_addr_t paddr, size_t size, + enum dma_data_direction direction) { switch (direction) { case DMA_TO_DEVICE: @@ -45,113 +31,21 @@ static inline void __dma_sync(unsigned long paddr, } } -static int dma_nommu_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction, - unsigned long attrs) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - struct scatterlist *sg; - int i; - - /* FIXME this part of code is untested */ - for_each_sg(sgl, sg, nents, i) { - sg->dma_address = sg_phys(sg); - - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - continue; - - __dma_sync(sg_phys(sg), sg->length, direction); - } - - return nents; -} - -static inline dma_addr_t dma_nommu_map_page(struct device *dev, - struct page *page, - unsigned long offset, - size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - __dma_sync(page_to_phys(page) + offset, size, direction); - return page_to_phys(page) + offset; + __dma_sync(dev, paddr, size, dir); } -static inline void dma_nommu_unmap_page(struct device *dev, - dma_addr_t dma_address, - size_t size, - enum dma_data_direction direction, - unsigned long attrs) +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { -/* There is not necessary to do cache cleanup - * - * phys_to_virt is here because in __dma_sync_page is __virt_to_phys and - * dma_address is physical address - */ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - __dma_sync(dma_address, size, direction); + __dma_sync(dev, paddr, size, dir); } -static inline void -dma_nommu_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - /* - * It's pointless to flush the cache as the memory segment - * is given to the CPU - */ - - if (direction == DMA_FROM_DEVICE) - __dma_sync(dma_handle, size, direction); -} - -static inline void -dma_nommu_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - /* - * It's pointless to invalidate the cache if the device isn't - * supposed to write to the relevant region - */ - - if (direction == DMA_TO_DEVICE) - __dma_sync(dma_handle, size, direction); -} - -static inline void -dma_nommu_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, - enum dma_data_direction direction) -{ - struct scatterlist *sg; - int i; - - /* FIXME this part of code is untested */ - if (direction == DMA_FROM_DEVICE) - for_each_sg(sgl, sg, nents, i) - __dma_sync(sg->dma_address, sg->length, direction); -} - -static inline void -dma_nommu_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, - enum dma_data_direction direction) -{ - struct scatterlist *sg; - int i; - - /* FIXME this part of code is untested */ - if (direction == DMA_TO_DEVICE) - for_each_sg(sgl, sg, nents, i) - __dma_sync(sg->dma_address, sg->length, direction); -} - -static -int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) +int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, size_t size, + unsigned long attrs) { #ifdef CONFIG_MMU unsigned long user_count = vma_pages(vma); @@ -170,17 +64,3 @@ int dma_nommu_mmap_coherent(struct device *dev, struct vm_area_struct *vma, return -ENXIO; #endif } - -const struct dma_map_ops dma_nommu_ops = { - .alloc = dma_nommu_alloc_coherent, - .free = dma_nommu_free_coherent, - .mmap = dma_nommu_mmap_coherent, - .map_sg = dma_nommu_map_sg, - .map_page = dma_nommu_map_page, - .unmap_page = dma_nommu_unmap_page, - .sync_single_for_cpu = dma_nommu_sync_single_for_cpu, - .sync_single_for_device = dma_nommu_sync_single_for_device, - .sync_sg_for_cpu = dma_nommu_sync_sg_for_cpu, - .sync_sg_for_device = dma_nommu_sync_sg_for_device, -}; -EXPORT_SYMBOL(dma_nommu_ops); diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S index 4655ff342c64..f264fdcf152a 100644 --- a/arch/microblaze/kernel/head.S +++ b/arch/microblaze/kernel/head.S @@ -341,11 +341,6 @@ start_here: /* Initialize r31 with current task address */ addik r31, r0, init_task - /* - * Call platform dependent initialize function. - * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for - * the function. - */ addik r11, r0, machine_early_init brald r15, r11 nop diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index b06c3a7faf20..c9a278ac795a 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -33,6 +33,7 @@ #include <linux/pci.h> #include <linux/interrupt.h> #include <linux/gfp.h> +#include <linux/dma-noncoherent.h> #include <asm/pgalloc.h> #include <linux/io.h> @@ -59,7 +60,8 @@ * uncached region. This will no doubt cause big problems if memory allocated * here is not also freed properly. -- JW */ -void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) { unsigned long order, vaddr; void *ret; @@ -154,7 +156,6 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle) return ret; } -EXPORT_SYMBOL(consistent_alloc); #ifdef CONFIG_MMU static pte_t *consistent_virt_to_pte(void *vaddr) @@ -178,7 +179,8 @@ unsigned long consistent_virt_to_pfn(void *vaddr) /* * free page(s) as defined by the above mapping. */ -void consistent_free(size_t size, void *vaddr) +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, unsigned long attrs) { struct page *page; @@ -218,49 +220,3 @@ void consistent_free(size_t size, void *vaddr) flush_tlb_all(); #endif } -EXPORT_SYMBOL(consistent_free); - -/* - * make an area consistent. - */ -void consistent_sync(void *vaddr, size_t size, int direction) -{ - unsigned long start; - unsigned long end; - - start = (unsigned long)vaddr; - - /* Convert start address back down to unshadowed memory region */ -#ifdef CONFIG_XILINX_UNCACHED_SHADOW - start &= ~UNCACHED_SHADOW_MASK; -#endif - end = start + size; - - switch (direction) { - case PCI_DMA_NONE: - BUG(); - case PCI_DMA_FROMDEVICE: /* invalidate only */ - invalidate_dcache_range(start, end); - break; - case PCI_DMA_TODEVICE: /* writeback only */ - flush_dcache_range(start, end); - break; - case PCI_DMA_BIDIRECTIONAL: /* writeback and invalidate */ - flush_dcache_range(start, end); - break; - } -} -EXPORT_SYMBOL(consistent_sync); - -/* - * consistent_sync_page makes memory consistent. identical - * to consistent_sync, but takes a struct page instead of a - * virtual address - */ -void consistent_sync_page(struct page *page, unsigned long offset, - size_t size, int direction) -{ - unsigned long start = (unsigned long)page_address(page) + offset; - consistent_sync((void *)start, size, direction); -} -EXPORT_SYMBOL(consistent_sync_page); diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index f34346d56095..2ffd171af8b6 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -597,19 +597,6 @@ static void pcibios_fixup_resources(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources); -/* - * We need to avoid collisions with `mirrored' VGA ports - * and other strange ISA hardware, so we always want the - * addresses to be allocated in the 0x000-0x0ff region - * modulo 0x400. - * - * Why? Because some silly external IO cards only decode - * the low 10 bits of the IO address. The 0x00-0xff region - * is reserved for motherboard devices that decode all 16 - * bits, so it's ok to allocate at, say, 0x2800-0x28ff, - * but we want to try to avoid allocating at 0x2900-0x2bff - * which might have be mirrored at 0x0100-0x03ff.. - */ int pcibios_add_device(struct pci_dev *dev) { dev->irq = of_irq_parse_and_map_pci(dev, 0, 0); diff --git a/arch/microblaze/pci/xilinx_pci.c b/arch/microblaze/pci/xilinx_pci.c index 14c7da5fd039..b800909ddccf 100644 --- a/arch/microblaze/pci/xilinx_pci.c +++ b/arch/microblaze/pci/xilinx_pci.c @@ -157,6 +157,7 @@ void __init xilinx_pci_init(void) /* Set the max bus number to 255, and bus/subbus no's to 0 */ pci_reg = of_iomap(pci_node, 0); + WARN_ON(!pci_reg); out_be32(pci_reg + XPLB_PCI_BUS, 0x000000ff); iounmap(pci_reg); diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c index f7a0645ccb82..4aaff3b3175c 100644 --- a/arch/mips/kernel/uprobes.c +++ b/arch/mips/kernel/uprobes.c @@ -224,7 +224,7 @@ unsigned long arch_uretprobe_hijack_return_addr( int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN); + return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); } void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index f019d3ec0c1c..d00973aab7f1 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -235,6 +235,7 @@ typedef unsigned long elf_greg_t; #define SET_PERSONALITY(ex) \ ({ \ set_personality((current->personality & ~PER_MASK) | PER_LINUX); \ + clear_thread_flag(TIF_32BIT); \ current->thread.map_base = DEFAULT_MAP_BASE; \ current->thread.task_size = DEFAULT_TASK_SIZE; \ }) @@ -243,9 +244,11 @@ typedef unsigned long elf_greg_t; #define COMPAT_SET_PERSONALITY(ex) \ ({ \ - set_thread_flag(TIF_32BIT); \ - current->thread.map_base = DEFAULT_MAP_BASE32; \ - current->thread.task_size = DEFAULT_TASK_SIZE32; \ + if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \ + set_thread_flag(TIF_32BIT); \ + current->thread.map_base = DEFAULT_MAP_BASE32; \ + current->thread.task_size = DEFAULT_TASK_SIZE32; \ + } else clear_thread_flag(TIF_32BIT); \ }) /* diff --git a/arch/parisc/include/asm/linkage.h b/arch/parisc/include/asm/linkage.h index 49f6f3d772cc..cd6fe4febead 100644 --- a/arch/parisc/include/asm/linkage.h +++ b/arch/parisc/include/asm/linkage.h @@ -22,15 +22,6 @@ name: ASM_NL\ .export name -#ifdef CONFIG_64BIT -#define ENDPROC(name) \ - END(name) -#else -#define ENDPROC(name) \ - .type name, @function !\ - END(name) -#endif - #define ENTRY_CFI(name, ...) \ ENTRY(name) ASM_NL\ .proc ASM_NL\ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 2dbe5580a1a4..2bd5e695bdad 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -256,11 +256,7 @@ on downward growing arches, it looks like this: * it in here from the current->personality */ -#ifdef CONFIG_64BIT -#define USER_WIDE_MODE (!test_thread_flag(TIF_32BIT)) -#else -#define USER_WIDE_MODE 0 -#endif +#define USER_WIDE_MODE (!is_32bit_task()) #define start_thread(regs, new_pc, new_sp) do { \ elf_addr_t *sp = (elf_addr_t *)new_sp; \ diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index e00013248907..8ecc1f0c0483 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -2,7 +2,9 @@ #ifndef __ASM_TRAPS_H #define __ASM_TRAPS_H -#ifdef __KERNEL__ +#define PARISC_ITLB_TRAP 6 /* defined by architecture. Do not change. */ + +#if !defined(__ASSEMBLY__) struct pt_regs; /* traps.c */ diff --git a/arch/parisc/include/asm/unwind.h b/arch/parisc/include/asm/unwind.h index f133b7efbebb..9547e5261a8b 100644 --- a/arch/parisc/include/asm/unwind.h +++ b/arch/parisc/include/asm/unwind.h @@ -73,8 +73,10 @@ unwind_table_remove(struct unwind_table *table); void unwind_frame_init(struct unwind_frame_info *info, struct task_struct *t, struct pt_regs *regs); -void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct task_struct *t); -void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *regs); +void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, + struct task_struct *t); +void unwind_frame_init_task(struct unwind_frame_info *info, + struct task_struct *task, struct pt_regs *regs); int unwind_once(struct unwind_frame_info *info); int unwind_to_user(struct unwind_frame_info *info); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index c7508f5717fb..242c5ab65611 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -36,6 +36,7 @@ #include <asm/signal.h> #include <asm/unistd.h> #include <asm/ldcw.h> +#include <asm/traps.h> #include <asm/thread_info.h> #include <linux/linkage.h> @@ -692,7 +693,7 @@ ENTRY(fault_vector_20) def 3 extint 4 def 5 - itlb_20 6 + itlb_20 PARISC_ITLB_TRAP def 7 def 8 def 9 @@ -735,7 +736,7 @@ ENTRY(fault_vector_11) def 3 extint 4 def 5 - itlb_11 6 + itlb_11 PARISC_ITLB_TRAP def 7 def 8 def 9 @@ -776,7 +777,7 @@ END(fault_vector_11) * copy_thread moved args into task save area. */ -ENTRY_CFI(ret_from_kernel_thread) +ENTRY(ret_from_kernel_thread) /* Call schedule_tail first though */ BL schedule_tail, %r2 nop @@ -791,7 +792,7 @@ ENTRY_CFI(ret_from_kernel_thread) copy %r31, %r2 b finish_child_return nop -ENDPROC_CFI(ret_from_kernel_thread) +END(ret_from_kernel_thread) /* @@ -815,9 +816,8 @@ ENTRY_CFI(_switch_to) LDREG TASK_THREAD_INFO(%r25), %r25 bv %r0(%r2) mtctl %r25,%cr30 -ENDPROC_CFI(_switch_to) -ENTRY_CFI(_switch_to_ret) +ENTRY(_switch_to_ret) mtctl %r0, %cr0 /* Needed for single stepping */ callee_rest callee_rest_float @@ -825,7 +825,7 @@ ENTRY_CFI(_switch_to_ret) LDREG -RP_OFFSET(%r30), %r2 bv %r0(%r2) copy %r26, %r28 -ENDPROC_CFI(_switch_to_ret) +ENDPROC_CFI(_switch_to) /* * Common rfi return path for interruptions, kernel execve, and @@ -886,14 +886,12 @@ ENTRY_CFI(syscall_exit_rfi) STREG %r19,PT_SR5(%r16) STREG %r19,PT_SR6(%r16) STREG %r19,PT_SR7(%r16) -ENDPROC_CFI(syscall_exit_rfi) -ENTRY_CFI(intr_return) +ENTRY(intr_return) /* check for reschedule */ mfctl %cr30,%r1 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ bb,<,n %r19,31-TIF_NEED_RESCHED,intr_do_resched /* forward */ -ENDPROC_CFI(intr_return) .import do_notify_resume,code intr_check_sig: @@ -1049,6 +1047,7 @@ intr_extint: b do_cpu_irq_mask ldo R%intr_return(%r2), %r2 /* return to intr_return, not here */ +ENDPROC_CFI(syscall_exit_rfi) /* Generic interruptions (illegal insn, unaligned, page fault, etc) */ @@ -1068,21 +1067,12 @@ ENTRY_CFI(intr_save) /* for os_hpmc */ save_specials %r29 /* If this trap is a itlb miss, skip saving/adjusting isr/ior */ - - /* - * FIXME: 1) Use a #define for the hardwired "6" below (and in - * traps.c. - * 2) Once we start executing code above 4 Gb, we need - * to adjust iasq/iaoq here in the same way we - * adjust isr/ior below. - */ - - cmpib,COND(=),n 6,%r26,skip_save_ior + cmpib,COND(=),n PARISC_ITLB_TRAP,%r26,skip_save_ior - mfctl %cr20, %r16 /* isr */ + mfctl %isr, %r16 nop /* serialize mfctl on PA 2.0 to avoid 4 cycle penalty */ - mfctl %cr21, %r17 /* ior */ + mfctl %ior, %r17 #ifdef CONFIG_64BIT @@ -1094,22 +1084,34 @@ ENTRY_CFI(intr_save) /* for os_hpmc */ extrd,u,*<> %r8,PSW_W_BIT,1,%r0 depdi 0,1,2,%r17 - /* - * FIXME: This code has hardwired assumptions about the split - * between space bits and offset bits. This will change - * when we allow alternate page sizes. - */ - - /* adjust isr/ior. */ - extrd,u %r16,63,SPACEID_SHIFT,%r1 /* get high bits from isr for ior */ - depd %r1,31,SPACEID_SHIFT,%r17 /* deposit them into ior */ - depdi 0,63,SPACEID_SHIFT,%r16 /* clear them from isr */ + /* adjust isr/ior: get high bits from isr and deposit in ior */ + space_adjust %r16,%r17,%r1 #endif STREG %r16, PT_ISR(%r29) STREG %r17, PT_IOR(%r29) +#if 0 && defined(CONFIG_64BIT) + /* Revisit when we have 64-bit code above 4Gb */ + b,n intr_save2 skip_save_ior: + /* We have a itlb miss, and when executing code above 4 Gb on ILP64, we + * need to adjust iasq/iaoq here in the same way we adjusted isr/ior + * above. + */ + extrd,u,* %r8,PSW_W_BIT,1,%r1 + cmpib,COND(=),n 1,%r1,intr_save2 + LDREG PT_IASQ0(%r29), %r16 + LDREG PT_IAOQ0(%r29), %r17 + /* adjust iasq/iaoq */ + space_adjust %r16,%r17,%r1 + STREG %r16, PT_IASQ0(%r29) + STREG %r17, PT_IAOQ0(%r29) +#else +skip_save_ior: +#endif + +intr_save2: virt_map save_general %r29 @@ -1747,7 +1749,7 @@ fork_like fork fork_like vfork /* Set the return value for the child */ -ENTRY_CFI(child_return) +ENTRY(child_return) BL schedule_tail, %r2 nop finish_child_return: @@ -1759,7 +1761,7 @@ finish_child_return: reg_restore %r1 b syscall_exit copy %r0,%r28 -ENDPROC_CFI(child_return) +END(child_return) ENTRY_CFI(sys_rt_sigreturn_wrapper) LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r26 @@ -1791,7 +1793,7 @@ ENTRY_CFI(sys_rt_sigreturn_wrapper) LDREG PT_GR28(%r1),%r28 /* reload original r28 for syscall_exit */ ENDPROC_CFI(sys_rt_sigreturn_wrapper) -ENTRY_CFI(syscall_exit) +ENTRY(syscall_exit) /* NOTE: Not all syscalls exit this way. rt_sigreturn will exit * via syscall_exit_rfi if the signal was received while the process * was running. @@ -1990,15 +1992,13 @@ syscall_do_resched: #else nop #endif -ENDPROC_CFI(syscall_exit) +END(syscall_exit) #ifdef CONFIG_FUNCTION_TRACER .import ftrace_function_trampoline,code .align L1_CACHE_BYTES - .globl mcount - .type mcount, @function ENTRY_CFI(mcount, caller) _mcount: .export _mcount,data @@ -2027,8 +2027,6 @@ ENDPROC_CFI(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER .align 8 - .globl return_to_handler - .type return_to_handler, @function ENTRY_CFI(return_to_handler, caller,frame=FRAME_SIZE) .export parisc_return_to_handler,data parisc_return_to_handler: @@ -2078,6 +2076,7 @@ ENDPROC_CFI(return_to_handler) /* void call_on_stack(unsigned long param1, void *func, unsigned long new_stack) */ ENTRY_CFI(call_on_stack, FRAME=2*FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP) +ENTRY(_call_on_stack) copy %sp, %r1 /* Regarding the HPPA calling conventions for function pointers, diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 45cc65902fce..82bd0d0927ce 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -288,6 +288,8 @@ void __init collect_boot_cpu_data(void) printk(KERN_INFO "model %s\n", boot_cpu_data.pdc.sys_model_name); + dump_stack_set_arch_desc("%s", boot_cpu_data.pdc.sys_model_name); + boot_cpu_data.hversion = boot_cpu_data.pdc.model.hversion; boot_cpu_data.sversion = boot_cpu_data.pdc.model.sversion; diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c index 2fe914c5f533..ec5835e83a7a 100644 --- a/arch/parisc/kernel/stacktrace.c +++ b/arch/parisc/kernel/stacktrace.c @@ -16,20 +16,7 @@ static void dump_trace(struct task_struct *task, struct stack_trace *trace) { struct unwind_frame_info info; - /* initialize unwind info */ - if (task == current) { - unsigned long sp; - struct pt_regs r; -HERE: - asm volatile ("copy %%r30, %0" : "=r"(sp)); - memset(&r, 0, sizeof(struct pt_regs)); - r.iaoq[0] = (unsigned long)&&HERE; - r.gr[2] = (unsigned long)__builtin_return_address(0); - r.gr[30] = sp; - unwind_frame_init(&info, task, &r); - } else { - unwind_frame_init_from_blocked_task(&info, task); - } + unwind_frame_init_task(&info, task, NULL); /* unwind stack and save entries in stack_trace struct */ trace->nr_entries = 0; diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 43b308cfdf53..376ea0d1b275 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -156,11 +156,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, int do_color_align, last_mmap; struct vm_unmapped_area_info info; -#ifdef CONFIG_64BIT - /* This should only ever run for 32-bit processes. */ - BUG_ON(!test_thread_flag(TIF_32BIT)); -#endif - /* requested length too big for entire address space */ if (len > TASK_SIZE) return -ENOMEM; diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 5f7e57fcaeef..f453997a7b8f 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -108,12 +108,8 @@ linux_gateway_entry: mtsp %r0,%sr6 /* get kernel space into sr6 */ #ifdef CONFIG_64BIT - /* for now we can *always* set the W bit on entry to the syscall - * since we don't support wide userland processes. We could - * also save the current SM other than in r0 and restore it on - * exit from the syscall, and also use that value to know - * whether to do narrow or wide syscalls. -PB - */ + /* Store W bit on entry to the syscall in case it's a wide userland + * process. */ ssm PSW_SM_W, %r1 extrd,u %r1,PSW_W_BIT,1,%r1 /* sp must be aligned on 4, so deposit the W bit setting into @@ -227,8 +223,7 @@ linux_gateway_entry: or,= %r2,%r2,%r2 ldo R%sys_call_table64(%r1), %r19 #else - ldil L%sys_call_table, %r1 - ldo R%sys_call_table(%r1), %r19 + load32 sys_call_table, %r19 #endif comiclr,>> __NR_Linux_syscalls, %r20, %r0 b,n .Lsyscall_nosys @@ -331,8 +326,6 @@ tracesys_next: * task->thread.regs.gr[20] above. */ copy %ret0,%r20 - ldil L%sys_call_table,%r1 - ldo R%sys_call_table(%r1), %r19 ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ LDREG TI_TASK(%r1), %r1 @@ -354,6 +347,23 @@ tracesys_next: comiclr,>> __NR_Linux_syscalls, %r20, %r0 b,n .Ltracesys_nosys + /* Note! We cannot use the syscall table that is mapped + nearby since the gateway page is mapped execute-only. */ + +#ifdef CONFIG_64BIT + LDREG TASK_PT_GR30(%r1), %r19 /* get users sp back */ + extrd,u %r19,63,1,%r2 /* W hidden in bottom bit */ + + ldil L%sys_call_table, %r1 + or,= %r2,%r2,%r2 + addil L%(sys_call_table64-sys_call_table), %r1 + ldo R%sys_call_table(%r1), %r19 + or,= %r2,%r2,%r2 + ldo R%sys_call_table64(%r1), %r19 +#else + load32 sys_call_table, %r19 +#endif + LDREGX %r20(%r19), %r19 /* If this is a sys_rt_sigreturn call, and the signal was received @@ -464,16 +474,13 @@ tracesys_sigexit: lws_start: #ifdef CONFIG_64BIT - /* FIXME: If we are a 64-bit kernel just - * turn this on unconditionally. - */ ssm PSW_SM_W, %r1 extrd,u %r1,PSW_W_BIT,1,%r1 /* sp must be aligned on 4, so deposit the W bit setting into * the bottom of sp temporarily */ or,ev %r1,%r30,%r30 - /* Clip LWS number to a 32-bit value always */ + /* Clip LWS number to a 32-bit value for 32-bit processes */ depdi 0, 31, 32, %r20 #endif diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 318815212518..68f10f87073d 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -45,7 +45,7 @@ #include "../math-emu/math-emu.h" /* for handle_fpe() */ -static void parisc_show_stack(struct task_struct *task, unsigned long *sp, +static void parisc_show_stack(struct task_struct *task, struct pt_regs *regs); static int printbinary(char *buf, unsigned long x, int nbits) @@ -152,7 +152,7 @@ void show_regs(struct pt_regs *regs) printk("%s IAOQ[1]: %pS\n", level, (void *) regs->iaoq[1]); printk("%s RP(r2): %pS\n", level, (void *) regs->gr[2]); - parisc_show_stack(current, NULL, regs); + parisc_show_stack(current, regs); } } @@ -185,44 +185,19 @@ static void do_show_stack(struct unwind_frame_info *info) printk(KERN_CRIT "\n"); } -static void parisc_show_stack(struct task_struct *task, unsigned long *sp, +static void parisc_show_stack(struct task_struct *task, struct pt_regs *regs) { struct unwind_frame_info info; - struct task_struct *t; - t = task ? task : current; - if (regs) { - unwind_frame_init(&info, t, regs); - goto show_stack; - } - - if (t == current) { - unsigned long sp; - -HERE: - asm volatile ("copy %%r30, %0" : "=r"(sp)); - { - struct pt_regs r; - - memset(&r, 0, sizeof(struct pt_regs)); - r.iaoq[0] = (unsigned long)&&HERE; - r.gr[2] = (unsigned long)__builtin_return_address(0); - r.gr[30] = sp; - - unwind_frame_init(&info, current, &r); - } - } else { - unwind_frame_init_from_blocked_task(&info, t); - } + unwind_frame_init_task(&info, task, regs); -show_stack: do_show_stack(&info); } void show_stack(struct task_struct *t, unsigned long *sp) { - return parisc_show_stack(t, sp, NULL); + parisc_show_stack(t, NULL); } int is_valid_bugaddr(unsigned long iaoq) @@ -557,7 +532,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) cpu_lpmc(5, regs); return; - case 6: + case PARISC_ITLB_TRAP: /* Instruction TLB miss fault/Instruction page fault */ fault_address = regs->iaoq[0]; fault_space = regs->iasq[0]; diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 5cdf13069dd9..f329b466e68f 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -209,6 +209,8 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int * We have to use void * instead of a function pointer, because * function pointers aren't a pointer to the function on 64-bit. * Make them const so the compiler knows they live in .text + * Note: We could use dereference_kernel_function_descriptor() + * instead but we want to keep it simple here. */ extern void * const handle_interruption; extern void * const ret_from_kernel_thread; @@ -216,7 +218,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int extern void * const intr_return; extern void * const _switch_to_ret; #ifdef CONFIG_IRQSTACKS - extern void * const call_on_stack; + extern void * const _call_on_stack; #endif /* CONFIG_IRQSTACKS */ if (pc == (unsigned long) &handle_interruption) { @@ -251,7 +253,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int } #ifdef CONFIG_IRQSTACKS - if (pc == (unsigned long) &call_on_stack) { + if (pc == (unsigned long) &_call_on_stack) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; @@ -403,9 +405,31 @@ void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct kfree(r2); } -void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *regs) +#define get_parisc_stackpointer() ({ \ + unsigned long sp; \ + __asm__("copy %%r30, %0" : "=r"(sp)); \ + (sp); \ +}) + +void unwind_frame_init_task(struct unwind_frame_info *info, + struct task_struct *task, struct pt_regs *regs) { - unwind_frame_init(info, current, regs); + task = task ? task : current; + + if (task == current) { + struct pt_regs r; + + if (!regs) { + memset(&r, 0, sizeof(r)); + r.iaoq[0] = _THIS_IP_; + r.gr[2] = _RET_IP_; + r.gr[30] = get_parisc_stackpointer(); + regs = &r; + } + unwind_frame_init(info, task, &r); + } else { + unwind_frame_init_from_blocked_task(info, task); + } } int unwind_once(struct unwind_frame_info *next_frame) @@ -442,19 +466,12 @@ int unwind_to_user(struct unwind_frame_info *info) unsigned long return_address(unsigned int level) { struct unwind_frame_info info; - struct pt_regs r; - unsigned long sp; /* initialize unwind info */ - asm volatile ("copy %%r30, %0" : "=r"(sp)); - memset(&r, 0, sizeof(struct pt_regs)); - r.iaoq[0] = _THIS_IP_; - r.gr[2] = _RET_IP_; - r.gr[30] = sp; - unwind_frame_init(&info, current, &r); + unwind_frame_init_task(&info, current, NULL); /* unwind stack */ - ++level; + level += 2; do { if (unwind_once(&info) < 0 || info.ip == 0) return 0; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a80669209155..db0b6eebbfa5 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -177,6 +177,7 @@ config PPC select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_CBPF_JIT if !PPC64 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 1f345a0b6ba2..83a9aa3cf689 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu); #define SPLIT_HACK_MASK 0xff000000 #define SPLIT_HACK_OFFS 0xfb000000 +/* + * This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the + * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride + * (but not its actual threading mode, which is not available) to avoid + * collisions. + * + * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block + * 0) unchanged: if the guest is filling each VCORE completely then it will be + * using consecutive IDs and it will fill the space without any packing. + * + * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo + * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is + * added to avoid collisions. + * + * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only + * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs + * can be safely packed into the second half of each VCORE by adding an offset + * of (stride / 2). + * + * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4)) + * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each + * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4). + * + * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a + * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7 + * must be free to use. + * + * (The offsets for each block are stored in block_offsets[], indexed by the + * block number if the stride is 8. For cases where the guest's stride is less + * than 8, we can re-use the block_offsets array by multiplying the block + * number by (MAX_SMT_THREADS / stride) to reach the correct entry.) + */ +static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id) +{ + const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7}; + int stride = kvm->arch.emul_smt_mode; + int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride); + u32 packed_id; + + if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack")) + return 0; + packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block]; + if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed")) + return 0; + return packed_id; +} + #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index fa4efa7e88f7..906bcbdfd2a1 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -42,7 +42,14 @@ #define KVM_USER_MEM_SLOTS 512 #include <asm/cputhreads.h> -#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES) + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */ +#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES) + +#else +#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -672,7 +679,7 @@ struct kvm_vcpu_arch { gva_t vaddr_accessed; pgd_t *pgdir; - u8 io_gpr; /* GPR used as IO source/target */ + u16 io_gpr; /* GPR used as IO source/target */ u8 mmio_host_swabbed; u8 mmio_sign_extend; /* conversion between single and double precision */ @@ -688,7 +695,6 @@ struct kvm_vcpu_arch { */ u8 mmio_vsx_copy_nums; u8 mmio_vsx_offset; - u8 mmio_vsx_tx_sx_enabled; u8 mmio_vmx_copy_nums; u8 mmio_vmx_offset; u8 mmio_copy_type; @@ -801,14 +807,14 @@ struct kvm_vcpu_arch { #define KVMPPC_VCPU_BUSY_IN_HOST 2 /* Values for vcpu->arch.io_gpr */ -#define KVM_MMIO_REG_MASK 0x001f -#define KVM_MMIO_REG_EXT_MASK 0xffe0 +#define KVM_MMIO_REG_MASK 0x003f +#define KVM_MMIO_REG_EXT_MASK 0xffc0 #define KVM_MMIO_REG_GPR 0x0000 -#define KVM_MMIO_REG_FPR 0x0020 -#define KVM_MMIO_REG_QPR 0x0040 -#define KVM_MMIO_REG_FQPR 0x0060 -#define KVM_MMIO_REG_VSX 0x0080 -#define KVM_MMIO_REG_VMX 0x00c0 +#define KVM_MMIO_REG_FPR 0x0040 +#define KVM_MMIO_REG_QPR 0x0080 +#define KVM_MMIO_REG_FQPR 0x00c0 +#define KVM_MMIO_REG_VSX 0x0100 +#define KVM_MMIO_REG_VMX 0x0180 #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 486b7c83b8c5..e5b314ed054e 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -163,7 +163,7 @@ #define PSSCR_ESL 0x00200000 /* Enable State Loss */ #define PSSCR_SD 0x00400000 /* Status Disable */ #define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ -#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */ +#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */ #define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */ #define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */ diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index e8afdd4f4814..9a3f2646ecc7 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -179,7 +179,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if ((tbltmp->it_page_shift <= stt->page_shift) && (tbltmp->it_offset << tbltmp->it_page_shift == stt->offset << stt->page_shift) && - (tbltmp->it_size << tbltmp->it_page_shift == + (tbltmp->it_size << tbltmp->it_page_shift >= stt->size << stt->page_shift)) { /* * Reference the table to avoid races with @@ -295,7 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, { struct kvmppc_spapr_tce_table *stt = NULL; struct kvmppc_spapr_tce_table *siter; - unsigned long npages, size; + unsigned long npages, size = args->size; int ret = -ENOMEM; int i; @@ -303,7 +303,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) return -EINVAL; - size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); npages = kvmppc_tce_pages(size); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); if (ret) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 96f1cc6c97b7..574fc1dcb2bf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -127,14 +127,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); * and SPURR count and should be set according to the number of * online threads in the vcore being run. */ -#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA -#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9 -#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA -#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9 -#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA -#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA -#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA -#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA +#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL +#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL +#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL +#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL +#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL +#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL +#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL +#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = { RWMR_RPA_P8_1THREAD, @@ -1807,7 +1807,7 @@ static int threads_per_vcore(struct kvm *kvm) return threads_per_subcore; } -static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) +static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id) { struct kvmppc_vcore *vcore; @@ -1821,7 +1821,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) init_swait_queue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; - vcore->first_vcpuid = core * kvm->arch.smt_mode; + vcore->first_vcpuid = id; vcore->kvm = kvm; INIT_LIST_HEAD(&vcore->preempt_list); @@ -2037,12 +2037,26 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_lock(&kvm->lock); vcore = NULL; err = -EINVAL; - core = id / kvm->arch.smt_mode; + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) { + pr_devel("KVM: VCPU ID too high\n"); + core = KVM_MAX_VCORES; + } else { + BUG_ON(kvm->arch.smt_mode != 1); + core = kvmppc_pack_vcpu_id(kvm, id); + } + } else { + core = id / kvm->arch.smt_mode; + } if (core < KVM_MAX_VCORES) { vcore = kvm->arch.vcores[core]; - if (!vcore) { + if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) { + pr_devel("KVM: collision on id %u", id); + vcore = NULL; + } else if (!vcore) { err = -ENOMEM; - vcore = kvmppc_vcore_create(kvm, core); + vcore = kvmppc_vcore_create(kvm, + id & ~(kvm->arch.smt_mode - 1)); kvm->arch.vcores[core] = vcore; kvm->arch.online_vcores++; } @@ -4550,6 +4564,8 @@ static int kvmppc_book3s_init_hv(void) pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); return -ENODEV; } + /* presence of intc confirmed - node can be dropped again */ + of_node_put(np); } #endif diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f9818d7d3381..126f02b3ffb8 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) return -EBUSY; } +static u32 xive_vp(struct kvmppc_xive *xive, u32 server) +{ + return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); +} + static u8 xive_lock_and_mask(struct kvmppc_xive *xive, struct kvmppc_xive_src_block *sb, struct kvmppc_xive_irq_state *state) @@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, */ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { xive_native_configure_irq(hw_num, - xive->vp_base + state->act_server, + xive_vp(xive, state->act_server), MASKED, state->number); /* set old_p so we can track if an H_EOI was done */ state->old_p = true; @@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, */ if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { xive_native_configure_irq(hw_num, - xive->vp_base + state->act_server, + xive_vp(xive, state->act_server), state->act_priority, state->number); /* If an EOI is needed, do it here */ if (!state->old_p) @@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm, kvmppc_xive_select_irq(state, &hw_num, NULL); return xive_native_configure_irq(hw_num, - xive->vp_base + server, + xive_vp(xive, server), prio, state->number); } @@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, * which is fine for a never started interrupt. */ xive_native_configure_irq(hw_irq, - xive->vp_base + state->act_server, + xive_vp(xive, state->act_server), state->act_priority, state->number); /* @@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, /* Reconfigure the IPI */ xive_native_configure_irq(state->ipi_number, - xive->vp_base + state->act_server, + xive_vp(xive, state->act_server), state->act_priority, state->number); /* @@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, pr_devel("Duplicate !\n"); return -EEXIST; } - if (cpu >= KVM_MAX_VCPUS) { + if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { pr_devel("Out of bounds !\n"); return -EINVAL; } @@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, xc->xive = xive; xc->vcpu = vcpu; xc->server_num = cpu; - xc->vp_id = xive->vp_base + cpu; + xc->vp_id = xive_vp(xive, cpu); xc->mfrr = 0xff; xc->valid = true; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index afde788be141..75dce1ef3bc8 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -106,7 +106,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) * if mmio_vsx_tx_sx_enabled == 1, copy data between * VSR[32..63] and memory */ - vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst); vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_offset = 0; vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; @@ -242,8 +241,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } emulated = kvmppc_handle_vsx_load(run, vcpu, - KVM_MMIO_REG_VSX | (op.reg & 0x1f), - io_size_each, 1, op.type & SIGNEXT); + KVM_MMIO_REG_VSX|op.reg, io_size_each, + 1, op.type & SIGNEXT); break; } #endif @@ -363,7 +362,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) } emulated = kvmppc_handle_vsx_store(run, vcpu, - op.reg & 0x1f, io_size_each, 1); + op.reg, io_size_each, 1); break; } #endif diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3ccc386b380d..eba5756d5b41 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -879,10 +879,10 @@ static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu, if (offset == -1) return; - if (vcpu->arch.mmio_vsx_tx_sx_enabled) { - val.vval = VCPU_VSX_VR(vcpu, index); + if (index >= 32) { + val.vval = VCPU_VSX_VR(vcpu, index - 32); val.vsxval[offset] = gpr; - VCPU_VSX_VR(vcpu, index) = val.vval; + VCPU_VSX_VR(vcpu, index - 32) = val.vval; } else { VCPU_VSX_FPR(vcpu, index, offset) = gpr; } @@ -894,11 +894,11 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu, union kvmppc_one_reg val; int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; - if (vcpu->arch.mmio_vsx_tx_sx_enabled) { - val.vval = VCPU_VSX_VR(vcpu, index); + if (index >= 32) { + val.vval = VCPU_VSX_VR(vcpu, index - 32); val.vsxval[0] = gpr; val.vsxval[1] = gpr; - VCPU_VSX_VR(vcpu, index) = val.vval; + VCPU_VSX_VR(vcpu, index - 32) = val.vval; } else { VCPU_VSX_FPR(vcpu, index, 0) = gpr; VCPU_VSX_FPR(vcpu, index, 1) = gpr; @@ -911,12 +911,12 @@ static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu, union kvmppc_one_reg val; int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; - if (vcpu->arch.mmio_vsx_tx_sx_enabled) { + if (index >= 32) { val.vsx32val[0] = gpr; val.vsx32val[1] = gpr; val.vsx32val[2] = gpr; val.vsx32val[3] = gpr; - VCPU_VSX_VR(vcpu, index) = val.vval; + VCPU_VSX_VR(vcpu, index - 32) = val.vval; } else { val.vsx32val[0] = gpr; val.vsx32val[1] = gpr; @@ -936,10 +936,10 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, if (offset == -1) return; - if (vcpu->arch.mmio_vsx_tx_sx_enabled) { - val.vval = VCPU_VSX_VR(vcpu, index); + if (index >= 32) { + val.vval = VCPU_VSX_VR(vcpu, index - 32); val.vsx32val[offset] = gpr32; - VCPU_VSX_VR(vcpu, index) = val.vval; + VCPU_VSX_VR(vcpu, index - 32) = val.vval; } else { dword_offset = offset / 2; word_offset = offset % 2; @@ -1360,10 +1360,10 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) break; } - if (!vcpu->arch.mmio_vsx_tx_sx_enabled) { + if (rs < 32) { *val = VCPU_VSX_FPR(vcpu, rs, vsx_offset); } else { - reg.vval = VCPU_VSX_VR(vcpu, rs); + reg.vval = VCPU_VSX_VR(vcpu, rs - 32); *val = reg.vsxval[vsx_offset]; } break; @@ -1377,13 +1377,13 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) break; } - if (!vcpu->arch.mmio_vsx_tx_sx_enabled) { + if (rs < 32) { dword_offset = vsx_offset / 2; word_offset = vsx_offset % 2; reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset); *val = reg.vsx32val[word_offset]; } else { - reg.vval = VCPU_VSX_VR(vcpu, rs); + reg.vval = VCPU_VSX_VR(vcpu, rs - 32); *val = reg.vsx32val[vsx_offset]; } break; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a6afa60074cb..054b29c9a533 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -135,6 +135,7 @@ config S390 select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER @@ -157,6 +158,7 @@ config S390 select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_PHYS_MAP select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NOP_MCOUNT select HAVE_OPROFILE select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/s390/Makefile b/arch/s390/Makefile index eee6703093c3..ba6d122526fb 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -96,13 +96,15 @@ ifdef CONFIG_EXPOLINE endif ifdef CONFIG_FUNCTION_TRACER -# make use of hotpatch feature if the compiler supports it -cc_hotpatch := -mhotpatch=0,3 -ifeq ($(call cc-option-yn,$(cc_hotpatch)),y) -CC_FLAGS_FTRACE := $(cc_hotpatch) -KBUILD_AFLAGS += -DCC_USING_HOTPATCH -KBUILD_CFLAGS += -DCC_USING_HOTPATCH -endif + ifeq ($(call cc-option-yn,-mfentry -mnop-mcount),n) + # make use of hotpatch feature if the compiler supports it + cc_hotpatch := -mhotpatch=0,3 + ifeq ($(call cc-option-yn,$(cc_hotpatch)),y) + CC_FLAGS_FTRACE := $(cc_hotpatch) + KBUILD_AFLAGS += -DCC_USING_HOTPATCH + KBUILD_CFLAGS += -DCC_USING_HOTPATCH + endif + endif endif # Test CFI features of binutils diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index cfccc0edd00d..8ea270fdc7fb 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -4,7 +4,7 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 -#ifdef CC_USING_HOTPATCH +#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) #define MCOUNT_INSN_SIZE 6 #else #define MCOUNT_INSN_SIZE 24 @@ -42,7 +42,7 @@ struct ftrace_insn { static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER -#ifdef CC_USING_HOTPATCH +#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) /* brcl 0,0 */ insn->opc = 0xc004; insn->disp = 0; @@ -57,7 +57,7 @@ static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn) static inline int is_ftrace_nop(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER -#ifdef CC_USING_HOTPATCH +#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) if (insn->disp == 0) return 1; #else diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a2188e309bd6..29c940bf8506 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -269,6 +269,7 @@ struct kvm_s390_sie_block { __u8 reserved1c0[8]; /* 0x01c0 */ #define ECD_HOSTREGMGMT 0x20000000 #define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 __u32 ecd; /* 0x01c8 */ __u8 reserved1cc[18]; /* 0x01cc */ __u64 pp; /* 0x01de */ @@ -655,6 +656,7 @@ struct kvm_vcpu_arch { seqcount_t cputm_seqcount; __u64 cputm_start; bool gs_enabled; + bool skey_enabled; }; struct kvm_vm_stat { @@ -793,12 +795,6 @@ struct kvm_s390_vsie { struct page *pages[KVM_MAX_VCPUS]; }; -struct kvm_s390_migration_state { - unsigned long bitmap_size; /* in bits (number of guest pages) */ - atomic64_t dirty_pages; /* number of dirty pages */ - unsigned long *pgste_bitmap; -}; - struct kvm_arch{ void *sca; int use_esca; @@ -828,7 +824,8 @@ struct kvm_arch{ struct kvm_s390_vsie vsie; u8 epdx; u64 epoch; - struct kvm_s390_migration_state *migration_state; + int migration_mode; + atomic64_t cmma_dirty_pages; /* subset of available cpu features enabled by user space */ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); struct kvm_s390_gisa *gisa; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 4cdaa55fabfe..9a50f02b9894 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -4,7 +4,7 @@ /* * KVM s390 specific structures and definitions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2018 * * Author(s): Carsten Otte <cotte@de.ibm.com> * Christian Borntraeger <borntraeger@de.ibm.com> @@ -225,6 +225,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_BPBC (1UL << 10) +#define KVM_SYNC_ETOKEN (1UL << 11) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) @@ -258,6 +259,8 @@ struct kvm_sync_regs { struct { __u64 reserved1[2]; __u64 gscb[4]; + __u64 etoken; + __u64 etoken_extension; }; }; }; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index dc76d813e420..84be7f02d0c2 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -61,7 +61,7 @@ unsigned long ftrace_plt; static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) { -#ifdef CC_USING_HOTPATCH +#if defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT) /* brcl 0,0 */ insn->opc = 0xc004; insn->disp = 0; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 27110f3294ed..e93fbf02490c 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -35,7 +35,7 @@ ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller lgr %r1,%r15 -#ifndef CC_USING_HOTPATCH +#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) aghi %r0,MCOUNT_RETURN_FIXUP #endif aghi %r15,-STACK_FRAME_SIZE diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index cb198d4a6dca..5c53e977be62 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -665,7 +665,7 @@ static void cpumsf_output_event_pid(struct perf_event *event, goto out; /* Update the process ID (see also kernel/events/core.c) */ - data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID); + data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID); data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID); perf_output_sample(&handle, &header, data, event); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f9d90337e64a..91ad4a9425c0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -906,54 +906,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) */ static int kvm_s390_vm_start_migration(struct kvm *kvm) { - struct kvm_s390_migration_state *mgs; struct kvm_memory_slot *ms; - /* should be the only one */ struct kvm_memslots *slots; - unsigned long ram_pages; + unsigned long ram_pages = 0; int slotnr; /* migration mode already enabled */ - if (kvm->arch.migration_state) + if (kvm->arch.migration_mode) return 0; - slots = kvm_memslots(kvm); if (!slots || !slots->used_slots) return -EINVAL; - mgs = kzalloc(sizeof(*mgs), GFP_KERNEL); - if (!mgs) - return -ENOMEM; - kvm->arch.migration_state = mgs; - - if (kvm->arch.use_cmma) { + if (!kvm->arch.use_cmma) { + kvm->arch.migration_mode = 1; + return 0; + } + /* mark all the pages in active slots as dirty */ + for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { + ms = slots->memslots + slotnr; /* - * Get the first slot. They are reverse sorted by base_gfn, so - * the first slot is also the one at the end of the address - * space. We have verified above that at least one slot is - * present. + * The second half of the bitmap is only used on x86, + * and would be wasted otherwise, so we put it to good + * use here to keep track of the state of the storage + * attributes. */ - ms = slots->memslots; - /* round up so we only use full longs */ - ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); - /* allocate enough bytes to store all the bits */ - mgs->pgste_bitmap = vmalloc(ram_pages / 8); - if (!mgs->pgste_bitmap) { - kfree(mgs); - kvm->arch.migration_state = NULL; - return -ENOMEM; - } - - mgs->bitmap_size = ram_pages; - atomic64_set(&mgs->dirty_pages, ram_pages); - /* mark all the pages in active slots as dirty */ - for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { - ms = slots->memslots + slotnr; - bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages); - } - - kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); + memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms)); + ram_pages += ms->npages; } + atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages); + kvm->arch.migration_mode = 1; + kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); return 0; } @@ -963,21 +946,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) */ static int kvm_s390_vm_stop_migration(struct kvm *kvm) { - struct kvm_s390_migration_state *mgs; - /* migration mode already disabled */ - if (!kvm->arch.migration_state) + if (!kvm->arch.migration_mode) return 0; - mgs = kvm->arch.migration_state; - kvm->arch.migration_state = NULL; - - if (kvm->arch.use_cmma) { + kvm->arch.migration_mode = 0; + if (kvm->arch.use_cmma) kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); - /* We have to wait for the essa emulation to finish */ - synchronize_srcu(&kvm->srcu); - vfree(mgs->pgste_bitmap); - } - kfree(mgs); return 0; } @@ -1005,7 +979,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm, static int kvm_s390_vm_get_migration(struct kvm *kvm, struct kvm_device_attr *attr) { - u64 mig = (kvm->arch.migration_state != NULL); + u64 mig = kvm->arch.migration_mode; if (attr->attr != KVM_S390_VM_MIGRATION_STATUS) return -ENXIO; @@ -1653,6 +1627,134 @@ out: #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) /* + * Similar to gfn_to_memslot, but returns the index of a memslot also when the + * address falls in a hole. In that case the index of one of the memslots + * bordering the hole is returned. + */ +static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) +{ + int start = 0, end = slots->used_slots; + int slot = atomic_read(&slots->lru_slot); + struct kvm_memory_slot *memslots = slots->memslots; + + if (gfn >= memslots[slot].base_gfn && + gfn < memslots[slot].base_gfn + memslots[slot].npages) + return slot; + + while (start < end) { + slot = start + (end - start) / 2; + + if (gfn >= memslots[slot].base_gfn) + end = slot; + else + start = slot + 1; + } + + if (gfn >= memslots[start].base_gfn && + gfn < memslots[start].base_gfn + memslots[start].npages) { + atomic_set(&slots->lru_slot, start); + } + + return start; +} + +static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, + u8 *res, unsigned long bufsize) +{ + unsigned long pgstev, hva, cur_gfn = args->start_gfn; + + args->count = 0; + while (args->count < bufsize) { + hva = gfn_to_hva(kvm, cur_gfn); + /* + * We return an error if the first value was invalid, but we + * return successfully if at least one value was copied. + */ + if (kvm_is_error_hva(hva)) + return args->count ? 0 : -EFAULT; + if (get_pgste(kvm->mm, hva, &pgstev) < 0) + pgstev = 0; + res[args->count++] = (pgstev >> 24) & 0x43; + cur_gfn++; + } + + return 0; +} + +static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, + unsigned long cur_gfn) +{ + int slotidx = gfn_to_memslot_approx(slots, cur_gfn); + struct kvm_memory_slot *ms = slots->memslots + slotidx; + unsigned long ofs = cur_gfn - ms->base_gfn; + + if (ms->base_gfn + ms->npages <= cur_gfn) { + slotidx--; + /* If we are above the highest slot, wrap around */ + if (slotidx < 0) + slotidx = slots->used_slots - 1; + + ms = slots->memslots + slotidx; + ofs = 0; + } + ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); + while ((slotidx > 0) && (ofs >= ms->npages)) { + slotidx--; + ms = slots->memslots + slotidx; + ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); + } + return ms->base_gfn + ofs; +} + +static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, + u8 *res, unsigned long bufsize) +{ + unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev; + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *ms; + + cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn); + ms = gfn_to_memslot(kvm, cur_gfn); + args->count = 0; + args->start_gfn = cur_gfn; + if (!ms) + return 0; + next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); + mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; + + while (args->count < bufsize) { + hva = gfn_to_hva(kvm, cur_gfn); + if (kvm_is_error_hva(hva)) + return 0; + /* Decrement only if we actually flipped the bit to 0 */ + if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) + atomic64_dec(&kvm->arch.cmma_dirty_pages); + if (get_pgste(kvm->mm, hva, &pgstev) < 0) + pgstev = 0; + /* Save the value */ + res[args->count++] = (pgstev >> 24) & 0x43; + /* If the next bit is too far away, stop. */ + if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) + return 0; + /* If we reached the previous "next", find the next one */ + if (cur_gfn == next_gfn) + next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); + /* Reached the end of memory or of the buffer, stop */ + if ((next_gfn >= mem_end) || + (next_gfn - args->start_gfn >= bufsize)) + return 0; + cur_gfn++; + /* Reached the end of the current memslot, take the next one. */ + if (cur_gfn - ms->base_gfn >= ms->npages) { + ms = gfn_to_memslot(kvm, cur_gfn); + if (!ms) + return 0; + } + } + return 0; +} + +/* * This function searches for the next page with dirty CMMA attributes, and * saves the attributes in the buffer up to either the end of the buffer or * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; @@ -1663,22 +1765,18 @@ out: static int kvm_s390_get_cmma_bits(struct kvm *kvm, struct kvm_s390_cmma_log *args) { - struct kvm_s390_migration_state *s = kvm->arch.migration_state; - unsigned long bufsize, hva, pgstev, i, next, cur; - int srcu_idx, peek, r = 0, rr; - u8 *res; - - cur = args->start_gfn; - i = next = pgstev = 0; + unsigned long bufsize; + int srcu_idx, peek, ret; + u8 *values; - if (unlikely(!kvm->arch.use_cmma)) + if (!kvm->arch.use_cmma) return -ENXIO; /* Invalid/unsupported flags were specified */ if (args->flags & ~KVM_S390_CMMA_PEEK) return -EINVAL; /* Migration mode query, and we are not doing a migration */ peek = !!(args->flags & KVM_S390_CMMA_PEEK); - if (!peek && !s) + if (!peek && !kvm->arch.migration_mode) return -EINVAL; /* CMMA is disabled or was not used, or the buffer has length zero */ bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); @@ -1686,74 +1784,35 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, memset(args, 0, sizeof(*args)); return 0; } - - if (!peek) { - /* We are not peeking, and there are no dirty pages */ - if (!atomic64_read(&s->dirty_pages)) { - memset(args, 0, sizeof(*args)); - return 0; - } - cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, - args->start_gfn); - if (cur >= s->bitmap_size) /* nothing found, loop back */ - cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); - if (cur >= s->bitmap_size) { /* again! (very unlikely) */ - memset(args, 0, sizeof(*args)); - return 0; - } - next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); + /* We are not peeking, and there are no dirty pages */ + if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) { + memset(args, 0, sizeof(*args)); + return 0; } - res = vmalloc(bufsize); - if (!res) + values = vmalloc(bufsize); + if (!values) return -ENOMEM; - args->start_gfn = cur; - down_read(&kvm->mm->mmap_sem); srcu_idx = srcu_read_lock(&kvm->srcu); - while (i < bufsize) { - hva = gfn_to_hva(kvm, cur); - if (kvm_is_error_hva(hva)) { - r = -EFAULT; - break; - } - /* decrement only if we actually flipped the bit to 0 */ - if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) - atomic64_dec(&s->dirty_pages); - r = get_pgste(kvm->mm, hva, &pgstev); - if (r < 0) - pgstev = 0; - /* save the value */ - res[i++] = (pgstev >> 24) & 0x43; - /* - * if the next bit is too far away, stop. - * if we reached the previous "next", find the next one - */ - if (!peek) { - if (next > cur + KVM_S390_MAX_BIT_DISTANCE) - break; - if (cur == next) - next = find_next_bit(s->pgste_bitmap, - s->bitmap_size, cur + 1); - /* reached the end of the bitmap or of the buffer, stop */ - if ((next >= s->bitmap_size) || - (next >= args->start_gfn + bufsize)) - break; - } - cur++; - } + if (peek) + ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); + else + ret = kvm_s390_get_cmma(kvm, args, values, bufsize); srcu_read_unlock(&kvm->srcu, srcu_idx); up_read(&kvm->mm->mmap_sem); - args->count = i; - args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; - rr = copy_to_user((void __user *)args->values, res, args->count); - if (rr) - r = -EFAULT; + if (kvm->arch.migration_mode) + args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); + else + args->remaining = 0; - vfree(res); - return r; + if (copy_to_user((void __user *)args->values, values, args->count)) + ret = -EFAULT; + + vfree(values); + return ret; } /* @@ -2192,10 +2251,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); - if (kvm->arch.migration_state) { - vfree(kvm->arch.migration_state->pgste_bitmap); - kfree(kvm->arch.migration_state); - } KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } @@ -2353,6 +2408,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; if (test_kvm_facility(vcpu->kvm, 133)) vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; + if (test_kvm_facility(vcpu->kvm, 156)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; /* fprs can be synchronized via vrs, even if the guest has no vx. With * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. */ @@ -2602,7 +2659,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) } if (test_kvm_facility(vcpu->kvm, 139)) vcpu->arch.sie_block->ecd |= ECD_MEF; - + if (test_kvm_facility(vcpu->kvm, 156)) + vcpu->arch.sie_block->ecd |= ECD_ETOKENF; if (vcpu->arch.sie_block->gd) { vcpu->arch.sie_block->eca |= ECA_AIV; VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", @@ -3520,6 +3578,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } preempt_enable(); } + /* SIE will load etoken directly from SDNX and therefore kvm_run */ kvm_run->kvm_dirty_regs = 0; } @@ -3559,7 +3618,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) __ctl_clear_bit(2, 4); vcpu->arch.host_gscb = NULL; } - + /* SIE will save etoken directly into SDNX and therefore kvm_run */ } int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index cfc5a62329f6..d68f10441a16 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -205,13 +205,10 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu) { int rc; - struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; trace_kvm_s390_skey_related_inst(vcpu); /* Already enabled? */ - if (vcpu->kvm->arch.use_skf && - !(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) && - !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS)) + if (vcpu->arch.skey_enabled) return 0; rc = s390_enable_skey(); @@ -222,9 +219,10 @@ int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu) if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS)) kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS); if (!vcpu->kvm->arch.use_skf) - sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; else - sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); + vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); + vcpu->arch.skey_enabled = true; return 0; } @@ -987,7 +985,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (clear_user((void __user *)vmaddr, PAGE_SIZE)) + if (kvm_clear_guest(vcpu->kvm, start, PAGE_SIZE)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } @@ -1024,9 +1022,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return 0; } -static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) +/* + * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu) + */ +static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc) { - struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state; int r1, r2, nappended, entries; unsigned long gfn, hva, res, pgstev, ptev; unsigned long *cbrlo; @@ -1076,10 +1076,12 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) cbrlo[entries] = gfn << PAGE_SHIFT; } - if (orc && gfn < ms->bitmap_size) { - /* increment only if we are really flipping the bit to 1 */ - if (!test_and_set_bit(gfn, ms->pgste_bitmap)) - atomic64_inc(&ms->dirty_pages); + if (orc) { + struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn); + + /* Increment only if we are really flipping the bit */ + if (ms && !test_and_set_bit(gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms))) + atomic64_inc(&vcpu->kvm->arch.cmma_dirty_pages); } return nappended; @@ -1108,7 +1110,7 @@ static int handle_essa(struct kvm_vcpu *vcpu) : ESSA_SET_STABLE_IF_RESIDENT)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - if (likely(!vcpu->kvm->arch.migration_state)) { + if (!vcpu->kvm->arch.migration_mode) { /* * CMMA is enabled in the KVM settings, but is disabled in * the SIE block and in the mm_context, and we are not doing @@ -1136,10 +1138,16 @@ static int handle_essa(struct kvm_vcpu *vcpu) /* Retry the ESSA instruction */ kvm_s390_retry_instr(vcpu); } else { - /* Account for the possible extra cbrl entry */ - i = do_essa(vcpu, orc); + int srcu_idx; + + down_read(&vcpu->kvm->mm->mmap_sem); + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + i = __do_essa(vcpu, orc); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); + up_read(&vcpu->kvm->mm->mmap_sem); if (i < 0) return i; + /* Account for the possible extra cbrl entry */ entries += i; } vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 84c89cb9636f..63844b95c22c 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -2,7 +2,7 @@ /* * kvm nested virtualization support for s390x * - * Copyright IBM Corp. 2016 + * Copyright IBM Corp. 2016, 2018 * * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com> */ @@ -378,6 +378,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (test_kvm_facility(vcpu->kvm, 139)) scb_s->ecd |= scb_o->ecd & ECD_MEF; + /* etoken */ + if (test_kvm_facility(vcpu->kvm, 156)) + scb_s->ecd |= scb_o->ecd & ECD_ETOKENF; + prepare_ibc(vcpu, vsie_page); rc = shadow_crycb(vcpu, vsie_page); out: @@ -627,7 +631,8 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) vsie_page->riccbd_gpa = gpa; scb_s->riccbd = hpa; } - if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { + if (((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) || + (scb_s->ecd & ECD_ETOKENF)) { unsigned long sdnxc; gpa = READ_ONCE(scb_o->sdnxo) & ~0xfUL; @@ -818,6 +823,8 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * - < 0 if an error occurred */ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) + __releases(vcpu->kvm->srcu) + __acquires(vcpu->kvm->srcu) { struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 90a8c9e84ca6..0c85aedcf9b3 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -4,7 +4,7 @@ * numbering scheme from the Princples of Operations: most significant bit * has bit number 0. * - * Copyright IBM Corp. 2015 + * Copyright IBM Corp. 2015, 2018 * */ @@ -106,6 +106,7 @@ static struct facility_def facility_defs[] = { .name = "FACILITIES_KVM_CPUMODEL", .bits = (int[]){ + 156, /* etoken facility */ -1 /* END */ } }, diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 2d58c26bff9a..e6f2a38d2e61 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -45,9 +45,13 @@ config SPARC select LOCKDEP_SMALL if LOCKDEP select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH + select HAVE_MEMBLOCK + select NO_BOOTMEM config SPARC32 def_bool !64BIT + select ARCH_HAS_SYNC_DMA_FOR_CPU + select DMA_NONCOHERENT_OPS select GENERIC_ATOMIC64 select CLZ_TAB select HAVE_UID16 @@ -60,7 +64,6 @@ config SPARC64 select HAVE_KRETPROBES select HAVE_KPROBES select HAVE_RCU_TABLE_FREE if SMP - select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE @@ -79,7 +82,6 @@ config SPARC64 select IRQ_PREFLOW_FASTEOI select ARCH_HAVE_NMI_SAFE_CMPXCHG select HAVE_C_RECORDMCOUNT - select NO_BOOTMEM select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW select HAVE_NMI diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 966a13d2b127..e32ef20de567 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -9,10 +9,10 @@ # Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) # We are not yet configured - so test on arch -ifeq ($(ARCH),sparc) - KBUILD_DEFCONFIG := sparc32_defconfig -else +ifeq ($(ARCH),sparc64) KBUILD_DEFCONFIG := sparc64_defconfig +else + KBUILD_DEFCONFIG := sparc32_defconfig endif ifeq ($(CONFIG_SPARC32),y) diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 12ae33daf52f..e17566376934 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -7,7 +7,6 @@ #include <linux/dma-debug.h> extern const struct dma_map_ops *dma_ops; -extern const struct dma_map_ops pci32_dma_ops; extern struct bus_type pci_bus_type; @@ -15,11 +14,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #ifdef CONFIG_SPARC_LEON if (sparc_cpu_model == sparc_leon) - return &pci32_dma_ops; + return &dma_noncoherent_ops; #endif #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) if (bus == &pci_bus_type) - return &pci32_dma_ops; + return &dma_noncoherent_ops; #endif return dma_ops; } diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index cca9134cfa7d..6799c93c9f27 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -38,6 +38,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/scatterlist.h> +#include <linux/dma-noncoherent.h> #include <linux/of_device.h> #include <asm/io.h> @@ -434,42 +435,41 @@ arch_initcall(sparc_register_ioport); /* Allocate and map kernel buffer using consistent mode DMA for a device. * hwdev should be valid struct pci_dev pointer for PCI devices. */ -static void *pci32_alloc_coherent(struct device *dev, size_t len, - dma_addr_t *pba, gfp_t gfp, - unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) { - unsigned long len_total = PAGE_ALIGN(len); + unsigned long len_total = PAGE_ALIGN(size); void *va; struct resource *res; int order; - if (len == 0) { + if (size == 0) { return NULL; } - if (len > 256*1024) { /* __get_free_pages() limit */ + if (size > 256*1024) { /* __get_free_pages() limit */ return NULL; } order = get_order(len_total); va = (void *) __get_free_pages(gfp, order); if (va == NULL) { - printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT); + printk("%s: no %ld pages\n", __func__, len_total>>PAGE_SHIFT); goto err_nopages; } if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) { - printk("pci_alloc_consistent: no core\n"); + printk("%s: no core\n", __func__); goto err_nomem; } if (allocate_resource(&_sparc_dvma, res, len_total, _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) { - printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total); + printk("%s: cannot occupy 0x%lx", __func__, len_total); goto err_nova; } srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total); - *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */ + *dma_handle = virt_to_phys(va); return (void *) res->start; err_nova: @@ -481,184 +481,53 @@ err_nopages: } /* Free and unmap a consistent DMA buffer. - * cpu_addr is what was returned from pci_alloc_consistent, - * size must be the same as what as passed into pci_alloc_consistent, - * and likewise dma_addr must be the same as what *dma_addrp was set to. + * cpu_addr is what was returned arch_dma_alloc, size must be the same as what + * was passed into arch_dma_alloc, and likewise dma_addr must be the same as + * what *dma_ndler was set to. * * References to the memory and mappings associated with cpu_addr/dma_addr * past this call are illegal. */ -static void pci32_free_coherent(struct device *dev, size_t n, void *p, - dma_addr_t ba, unsigned long attrs) +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs) { struct resource *res; if ((res = lookup_resource(&_sparc_dvma, - (unsigned long)p)) == NULL) { - printk("pci_free_consistent: cannot free %p\n", p); + (unsigned long)cpu_addr)) == NULL) { + printk("%s: cannot free %p\n", __func__, cpu_addr); return; } - if (((unsigned long)p & (PAGE_SIZE-1)) != 0) { - printk("pci_free_consistent: unaligned va %p\n", p); + if (((unsigned long)cpu_addr & (PAGE_SIZE-1)) != 0) { + printk("%s: unaligned va %p\n", __func__, cpu_addr); return; } - n = PAGE_ALIGN(n); - if (resource_size(res) != n) { - printk("pci_free_consistent: region 0x%lx asked 0x%lx\n", - (long)resource_size(res), (long)n); + size = PAGE_ALIGN(size); + if (resource_size(res) != size) { + printk("%s: region 0x%lx asked 0x%zx\n", __func__, + (long)resource_size(res), size); return; } - dma_make_coherent(ba, n); - srmmu_unmapiorange((unsigned long)p, n); + dma_make_coherent(dma_addr, size); + srmmu_unmapiorange((unsigned long)cpu_addr, size); release_resource(res); kfree(res); - free_pages((unsigned long)phys_to_virt(ba), get_order(n)); -} - -/* - * Same as pci_map_single, but with pages. - */ -static dma_addr_t pci32_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - /* IIep is write-through, not flushing. */ - return page_to_phys(page) + offset; -} - -static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_make_coherent(ba, PAGE_ALIGN(size)); -} - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scatter-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -static int pci32_map_sg(struct device *device, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int n; - - /* IIep is write-through, not flushing. */ - for_each_sg(sgl, sg, nents, n) { - sg->dma_address = sg_phys(sg); - sg->dma_length = sg->length; - } - return nents; -} - -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int n; - - if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { - for_each_sg(sgl, sg, nents, n) { - dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); - } - } -} - -/* Make physical memory consistent for a single - * streaming mode DMA translation before or after a transfer. - * - * If you perform a pci_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, you - * must first perform a pci_dma_sync_for_device, and then the - * device again owns the buffer. - */ -static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba, - size_t size, enum dma_data_direction dir) -{ - if (dir != PCI_DMA_TODEVICE) { - dma_make_coherent(ba, PAGE_ALIGN(size)); - } -} - -static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba, - size_t size, enum dma_data_direction dir) -{ - if (dir != PCI_DMA_TODEVICE) { - dma_make_coherent(ba, PAGE_ALIGN(size)); - } + free_pages((unsigned long)phys_to_virt(dma_addr), get_order(size)); } -/* Make physical memory consistent for a set of streaming - * mode DMA translations after a transfer. - * - * The same as pci_dma_sync_single_* but for a scatter-gather list, - * same rules and usage. - */ -static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int n; - - if (dir != PCI_DMA_TODEVICE) { - for_each_sg(sgl, sg, nents, n) { - dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); - } - } -} +/* IIep is write-through, not flushing on cpu to device transfer. */ -static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - struct scatterlist *sg; - int n; - - if (dir != PCI_DMA_TODEVICE) { - for_each_sg(sgl, sg, nents, n) { - dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length)); - } - } + if (dir != PCI_DMA_TODEVICE) + dma_make_coherent(paddr, PAGE_ALIGN(size)); } -/* note: leon re-uses pci32_dma_ops */ -const struct dma_map_ops pci32_dma_ops = { - .alloc = pci32_alloc_coherent, - .free = pci32_free_coherent, - .map_page = pci32_map_page, - .unmap_page = pci32_unmap_page, - .map_sg = pci32_map_sg, - .unmap_sg = pci32_unmap_sg, - .sync_single_for_cpu = pci32_sync_single_for_cpu, - .sync_single_for_device = pci32_sync_single_for_device, - .sync_sg_for_cpu = pci32_sync_sg_for_cpu, - .sync_sg_for_device = pci32_sync_sg_for_device, -}; -EXPORT_SYMBOL(pci32_dma_ops); - const struct dma_map_ops *dma_ops = &sbus_dma_ops; EXPORT_SYMBOL(dma_ops); diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 95fe4f081ba3..92634d4e440c 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -23,6 +23,7 @@ #include <linux/init.h> #include <linux/highmem.h> #include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/pagemap.h> #include <linux/poison.h> #include <linux/gfp.h> @@ -101,13 +102,46 @@ static unsigned long calc_max_low_pfn(void) return tmp; } +static void __init find_ramdisk(unsigned long end_of_phys_memory) +{ +#ifdef CONFIG_BLK_DEV_INITRD + unsigned long size; + + /* Now have to check initial ramdisk, so that it won't pass + * the end of memory + */ + if (sparc_ramdisk_image) { + if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE) + sparc_ramdisk_image -= KERNBASE; + initrd_start = sparc_ramdisk_image + phys_base; + initrd_end = initrd_start + sparc_ramdisk_size; + if (initrd_end > end_of_phys_memory) { + printk(KERN_CRIT "initrd extends beyond end of memory " + "(0x%016lx > 0x%016lx)\ndisabling initrd\n", + initrd_end, end_of_phys_memory); + initrd_start = 0; + } else { + /* Reserve the initrd image area. */ + size = initrd_end - initrd_start; + memblock_reserve(initrd_start, size); + + initrd_start = (initrd_start - phys_base) + PAGE_OFFSET; + initrd_end = (initrd_end - phys_base) + PAGE_OFFSET; + } + } +#endif +} + unsigned long __init bootmem_init(unsigned long *pages_avail) { - unsigned long bootmap_size, start_pfn; - unsigned long end_of_phys_memory = 0UL; - unsigned long bootmap_pfn, bytes_avail, size; + unsigned long start_pfn, bytes_avail, size; + unsigned long end_of_phys_memory = 0; + unsigned long high_pages = 0; int i; + memblock_set_bottom_up(true); + memblock_allow_resize(); + bytes_avail = 0UL; for (i = 0; sp_banks[i].num_bytes != 0; i++) { end_of_phys_memory = sp_banks[i].base_addr + @@ -124,24 +158,25 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) if (sp_banks[i].num_bytes == 0) { sp_banks[i].base_addr = 0xdeadbeef; } else { + memblock_add(sp_banks[i].base_addr, + sp_banks[i].num_bytes); sp_banks[i+1].num_bytes = 0; sp_banks[i+1].base_addr = 0xdeadbeef; } break; } } + memblock_add(sp_banks[i].base_addr, sp_banks[i].num_bytes); } /* Start with page aligned address of last symbol in kernel - * image. + * image. */ start_pfn = (unsigned long)__pa(PAGE_ALIGN((unsigned long) &_end)); /* Now shift down to get the real physical page frame number. */ start_pfn >>= PAGE_SHIFT; - bootmap_pfn = start_pfn; - max_pfn = end_of_phys_memory >> PAGE_SHIFT; max_low_pfn = max_pfn; @@ -150,85 +185,19 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) if (max_low_pfn > pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT)) { highstart_pfn = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT); max_low_pfn = calc_max_low_pfn(); + high_pages = calc_highpages(); printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - calc_highpages() >> (20 - PAGE_SHIFT)); + high_pages >> (20 - PAGE_SHIFT)); } -#ifdef CONFIG_BLK_DEV_INITRD - /* Now have to check initial ramdisk, so that bootmap does not overwrite it */ - if (sparc_ramdisk_image) { - if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE) - sparc_ramdisk_image -= KERNBASE; - initrd_start = sparc_ramdisk_image + phys_base; - initrd_end = initrd_start + sparc_ramdisk_size; - if (initrd_end > end_of_phys_memory) { - printk(KERN_CRIT "initrd extends beyond end of memory " - "(0x%016lx > 0x%016lx)\ndisabling initrd\n", - initrd_end, end_of_phys_memory); - initrd_start = 0; - } - if (initrd_start) { - if (initrd_start >= (start_pfn << PAGE_SHIFT) && - initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE) - bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT; - } - } -#endif - /* Initialize the boot-time allocator. */ - bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base, - max_low_pfn); - - /* Now register the available physical memory with the - * allocator. - */ - *pages_avail = 0; - for (i = 0; sp_banks[i].num_bytes != 0; i++) { - unsigned long curr_pfn, last_pfn; + find_ramdisk(end_of_phys_memory); - curr_pfn = sp_banks[i].base_addr >> PAGE_SHIFT; - if (curr_pfn >= max_low_pfn) - break; - - last_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT; - if (last_pfn > max_low_pfn) - last_pfn = max_low_pfn; - - /* - * .. finally, did all the rounding and playing - * around just make the area go away? - */ - if (last_pfn <= curr_pfn) - continue; - - size = (last_pfn - curr_pfn) << PAGE_SHIFT; - *pages_avail += last_pfn - curr_pfn; - - free_bootmem(sp_banks[i].base_addr, size); - } - -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) { - /* Reserve the initrd image area. */ - size = initrd_end - initrd_start; - reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT); - *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; - - initrd_start = (initrd_start - phys_base) + PAGE_OFFSET; - initrd_end = (initrd_end - phys_base) + PAGE_OFFSET; - } -#endif /* Reserve the kernel text/data/bss. */ size = (start_pfn << PAGE_SHIFT) - phys_base; - reserve_bootmem(phys_base, size, BOOTMEM_DEFAULT); - *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; + memblock_reserve(phys_base, size); - /* Reserve the bootmem map. We do not account for it - * in pages_avail because we will release that memory - * in free_all_bootmem. - */ - size = bootmap_size; - reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT); - *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; + size = memblock_phys_mem_size() - memblock_reserved_size(); + *pages_avail = (size >> PAGE_SHIFT) - high_pages; return max_pfn; } @@ -322,7 +291,7 @@ void __init mem_init(void) map_high_region(start_pfn, end_pfn); } - + mem_init_print_info(NULL); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b0312f8947ce..512003f16889 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -124,6 +124,7 @@ config X86 select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 302517929932..d1e19f358b6e 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -23,11 +23,8 @@ * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h. * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL * which is meaningless and will cause compiling error in some cases. - * So do not include linux/export.h and define EXPORT_SYMBOL(sym) - * as empty. */ -#define _LINUX_EXPORT_H -#define EXPORT_SYMBOL(sym) +#define __DISABLE_EXPORTS #include "misc.h" #include "error.h" diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index b173d404e3df..b21ee65c4101 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1,2 +1,2 @@ -obj-y := hv_init.o mmu.o +obj-y := hv_init.o mmu.o nested.o obj-$(CONFIG_X86_64) += hv_apic.o diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c new file mode 100644 index 000000000000..b8e60cc50461 --- /dev/null +++ b/arch/x86/hyperv/nested.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V nested virtualization code. + * + * Copyright (C) 2018, Microsoft, Inc. + * + * Author : Lan Tianyu <Tianyu.Lan@microsoft.com> + */ + + +#include <linux/types.h> +#include <asm/hyperv-tlfs.h> +#include <asm/mshyperv.h> +#include <asm/tlbflush.h> + +#include <asm/trace/hyperv.h> + +int hyperv_flush_guest_mapping(u64 as) +{ + struct hv_guest_mapping_flush **flush_pcpu; + struct hv_guest_mapping_flush *flush; + u64 status; + unsigned long flags; + int ret = -ENOTSUPP; + + if (!hv_hypercall_pg) + goto fault; + + local_irq_save(flags); + + flush_pcpu = (struct hv_guest_mapping_flush **) + this_cpu_ptr(hyperv_pcpu_input_arg); + + flush = *flush_pcpu; + + if (unlikely(!flush)) { + local_irq_restore(flags); + goto fault; + } + + flush->address_space = as; + flush->flags = 0; + + status = hv_do_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE, + flush, NULL); + local_irq_restore(flags); + + if (!(status & HV_HYPERCALL_RESULT_MASK)) + ret = 0; + +fault: + trace_hyperv_nested_flush_guest_mapping(as, ret); + return ret; +} +EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping); diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index de690c2d2e33..a0ab9ab61c75 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -8,5 +8,6 @@ generated-y += xen-hypercalls.h generic-y += dma-contiguous.h generic-y += early_ioremap.h +generic-y += export.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h deleted file mode 100644 index 2a51d66689c5..000000000000 --- a/arch/x86/include/asm/export.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_64BIT -#define KSYM_ALIGN 16 -#endif -#include <asm-generic/export.h> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 6ced78af48da..e977b6b3a538 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -310,6 +310,7 @@ struct ms_hyperv_tsc_page { #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 /* Nested features (CPUID 0x4000000A) EAX */ +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) #define HV_X64_NESTED_MSR_BITMAP BIT(19) struct hv_reenlightenment_control { @@ -351,6 +352,7 @@ struct hv_tsc_emulation_status { #define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 @@ -742,6 +744,12 @@ struct ipi_arg_ex { struct hv_vpset vp_set; }; +/* HvFlushGuestPhysicalAddressSpace hypercalls */ +struct hv_guest_mapping_flush { + u64 address_space; + u64 flags; +}; + /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ struct hv_tlb_flush { u64 address_space; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index acebb808c4b5..00ddb0c9e612 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -55,6 +55,7 @@ #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) +#define KVM_REQ_LOAD_CR3 KVM_ARCH_REQ(5) #define KVM_REQ_EVENT KVM_ARCH_REQ(6) #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) @@ -76,13 +77,13 @@ #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) +#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define CR3_PCID_INVD BIT_64(63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ @@ -326,6 +327,16 @@ struct rsvd_bits_validate { u64 bad_mt_xwr; }; +struct kvm_mmu_root_info { + gpa_t cr3; + hpa_t hpa; +}; + +#define KVM_MMU_ROOT_INFO_INVALID \ + ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE }) + +#define KVM_MMU_NUM_PREV_ROOTS 3 + /* * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the @@ -345,7 +356,7 @@ struct kvm_mmu { struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); - void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); + void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa); void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); hpa_t root_hpa; @@ -354,6 +365,7 @@ struct kvm_mmu { u8 shadow_root_level; u8 ept_ad; bool direct_map; + struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; /* * Bitmap; bit set = permission fault @@ -978,6 +990,15 @@ struct kvm_x86_ops { void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); + int (*tlb_remote_flush)(struct kvm *kvm); + + /* + * Flush any TLB entries associated with the given GVA. + * Does not need to flush GPA->HPA mappings. + * Can potentially get non-canonical addresses through INVLPGs, which + * the implementation may choose to ignore if appropriate. + */ + void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); void (*run)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu); @@ -1090,6 +1111,14 @@ struct kvm_x86_ops { void (*setup_mce)(struct kvm_vcpu *vcpu); + int (*get_nested_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + unsigned user_data_size); + int (*set_nested_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state); + void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); + int (*smi_allowed)(struct kvm_vcpu *vcpu); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase); @@ -1122,6 +1151,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) return kvm_x86_ops->vm_free(kvm); } +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB +static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +{ + if (kvm_x86_ops->tlb_remote_flush && + !kvm_x86_ops->tlb_remote_flush(kvm)) + return 0; + else + return -ENOTSUPP; +} + int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); @@ -1273,6 +1312,10 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state, return !!(*irq_state); } +#define KVM_MMU_ROOT_CURRENT BIT(0) +#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) +#define KVM_MMU_ROOTS_ALL (~0UL) + int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); @@ -1284,7 +1327,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -void kvm_mmu_free_roots(struct kvm_vcpu *vcpu); +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free); gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, @@ -1303,7 +1346,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); +void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush); void kvm_enable_tdp(void); void kvm_disable_tdp(void); @@ -1418,6 +1462,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); +int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, + unsigned long ipi_bitmap_high, int min, + unsigned long icr, int op_64_bit); + u64 kvm_get_arch_capabilities(void); void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index e1771a1987dd..f37704497d8f 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -347,6 +347,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); +int hyperv_flush_guest_mapping(u64 as); #ifdef CONFIG_X86_64 void hv_apic_init(void); @@ -366,6 +367,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) { return NULL; } +static inline int hyperv_flush_guest_mapping(u64 as) { return -1; } #endif /* CONFIG_HYPERV */ #ifdef CONFIG_HYPERV_TSCPAGE diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h index 9c0d4b588e3f..2e6245a023ef 100644 --- a/arch/x86/include/asm/trace/hyperv.h +++ b/arch/x86/include/asm/trace/hyperv.h @@ -28,6 +28,20 @@ TRACE_EVENT(hyperv_mmu_flush_tlb_others, __entry->addr, __entry->end) ); +TRACE_EVENT(hyperv_nested_flush_guest_mapping, + TP_PROTO(u64 as, int ret), + TP_ARGS(as, ret), + + TP_STRUCT__entry( + __field(u64, as) + __field(int, ret) + ), + TP_fast_assign(__entry->as = as; + __entry->ret = ret; + ), + TP_printk("address space %llx ret %d", __entry->as, __entry->ret) + ); + TRACE_EVENT(hyperv_send_ipi_mask, TP_PROTO(const struct cpumask *cpus, int vector), diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 95f9107449bf..9527ba5d62da 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -74,6 +74,7 @@ #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 #define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 #define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_XSAVES 0x00100000 @@ -213,6 +214,8 @@ enum vmcs_field { VMWRITE_BITMAP_HIGH = 0x00002029, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, + ENCLS_EXITING_BITMAP = 0x0000202E, + ENCLS_EXITING_BITMAP_HIGH = 0x0000202F, TSC_MULTIPLIER = 0x00002032, TSC_MULTIPLIER_HIGH = 0x00002033, GUEST_PHYSICAL_ADDRESS = 0x00002400, diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index c535c2fdea13..86299efa804a 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -378,4 +378,41 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_STATE_NESTED_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_RUN_PENDING 0x00000002 + +#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_SMM_VMXON 0x00000002 + +struct kvm_vmx_nested_state { + __u64 vmxon_pa; + __u64 vmcs_pa; + + struct { + __u16 flags; + } smm; +}; + +/* for KVM_CAP_NESTED_STATE */ +struct kvm_nested_state { + /* KVM_STATE_* flags */ + __u16 flags; + + /* 0 for VMX, 1 for SVM. */ + __u16 format; + + /* 128 for SVM, 128 + VMCS size for VMX. */ + __u32 size; + + union { + /* VMXON, VMCS */ + struct kvm_vmx_nested_state vmx; + + /* Pad the header to 128 bytes. */ + __u8 pad[120]; + }; + + __u8 data[0]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 0ede697c3961..19980ec1a316 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -28,6 +28,7 @@ #define KVM_FEATURE_PV_UNHALT 7 #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 +#define KVM_FEATURE_PV_SEND_IPI 11 #define KVM_HINTS_REALTIME 0 diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index dde437f5d14f..158ad1483c43 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -108,7 +108,7 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) cx->type); } snprintf(cx->desc, - ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x", + ACPI_CX_DESC_LEN, "ACPI FFH MWAIT 0x%x", cx->address); out: return retval; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 09aaabb2bbf1..0f471bd93417 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -444,6 +444,98 @@ static void __init sev_map_percpu_data(void) } #ifdef CONFIG_SMP +#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) + +static void __send_ipi_mask(const struct cpumask *mask, int vector) +{ + unsigned long flags; + int cpu, apic_id, icr; + int min = 0, max = 0; +#ifdef CONFIG_X86_64 + __uint128_t ipi_bitmap = 0; +#else + u64 ipi_bitmap = 0; +#endif + + if (cpumask_empty(mask)) + return; + + local_irq_save(flags); + + switch (vector) { + default: + icr = APIC_DM_FIXED | vector; + break; + case NMI_VECTOR: + icr = APIC_DM_NMI; + break; + } + + for_each_cpu(cpu, mask) { + apic_id = per_cpu(x86_cpu_to_apicid, cpu); + if (!ipi_bitmap) { + min = max = apic_id; + } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) { + ipi_bitmap <<= min - apic_id; + min = apic_id; + } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { + max = apic_id < max ? max : apic_id; + } else { + kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, + (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); + min = max = apic_id; + ipi_bitmap = 0; + } + __set_bit(apic_id - min, (unsigned long *)&ipi_bitmap); + } + + if (ipi_bitmap) { + kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, + (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); + } + + local_irq_restore(flags); +} + +static void kvm_send_ipi_mask(const struct cpumask *mask, int vector) +{ + __send_ipi_mask(mask, vector); +} + +static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) +{ + unsigned int this_cpu = smp_processor_id(); + struct cpumask new_mask; + const struct cpumask *local_mask; + + cpumask_copy(&new_mask, mask); + cpumask_clear_cpu(this_cpu, &new_mask); + local_mask = &new_mask; + __send_ipi_mask(local_mask, vector); +} + +static void kvm_send_ipi_allbutself(int vector) +{ + kvm_send_ipi_mask_allbutself(cpu_online_mask, vector); +} + +static void kvm_send_ipi_all(int vector) +{ + __send_ipi_mask(cpu_online_mask, vector); +} + +/* + * Set the IPI entry points + */ +static void kvm_setup_pv_ipi(void) +{ + apic->send_IPI_mask = kvm_send_ipi_mask; + apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself; + apic->send_IPI_allbutself = kvm_send_ipi_allbutself; + apic->send_IPI_all = kvm_send_ipi_all; + pr_info("KVM setup pv IPIs\n"); +} + static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) { native_smp_prepare_cpus(max_cpus); @@ -611,13 +703,27 @@ static uint32_t __init kvm_detect(void) return kvm_cpuid_base(); } +static void __init kvm_apic_init(void) +{ +#if defined(CONFIG_SMP) + if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI)) + kvm_setup_pv_ipi(); +#endif +} + +static void __init kvm_init_platform(void) +{ + kvmclock_init(); + x86_platform.apic_post_init = kvm_apic_init; +} + const __initconst struct hypervisor_x86 x86_hyper_kvm = { .name = "KVM", .detect = kvm_detect, .type = X86_HYPER_KVM, - .init.init_platform = kvmclock_init, .init.guest_late_init = kvm_guest_init, .init.x2apic_available = kvm_para_available, + .init.init_platform = kvm_init_platform, }; static __init int activate_jump_labels(void) @@ -736,6 +842,10 @@ void __init kvm_spinlock_init(void) if (kvm_para_has_hint(KVM_HINTS_REALTIME)) return; + /* Don't use the pvqspinlock code if there is only 1 vCPU. */ + if (num_possible_cpus() == 1) + return; + __pv_init_lock_hash(); pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7e042e3d47fd..7bcfa61375c0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | (1 << KVM_FEATURE_PV_UNHALT) | (1 << KVM_FEATURE_PV_TLB_FLUSH) | - (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); + (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | + (1 << KVM_FEATURE_PV_SEND_IPI); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4c4f4263420c..106482da6388 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4191,7 +4191,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) maxphyaddr = 36; rsvd = rsvd_bits(maxphyaddr, 63); if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE) - rsvd &= ~CR3_PCID_INVD; + rsvd &= ~X86_CR3_PCID_NOFLUSH; } if (new_val & rsvd) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index af8caf965baa..01d209ab5481 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -235,7 +235,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, struct kvm_vcpu *vcpu = synic_to_vcpu(synic); int ret; - if (!synic->active) + if (!synic->active && !host) return 1; trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); @@ -295,11 +295,12 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, return ret; } -static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata) +static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, + bool host) { int ret; - if (!synic->active) + if (!synic->active && !host) return 1; ret = 0; @@ -1014,6 +1015,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, case HV_X64_MSR_TSC_EMULATION_STATUS: hv->hv_tsc_emulation_status = data; break; + case HV_X64_MSR_TIME_REF_COUNT: + /* read-only, but still ignore it if host-initiated */ + if (!host) + return 1; + break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -1101,6 +1107,12 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), data, host); } + case HV_X64_MSR_TSC_FREQUENCY: + case HV_X64_MSR_APIC_FREQUENCY: + /* read-only, but still ignore it if host-initiated */ + if (!host) + return 1; + break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -1156,7 +1168,8 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return 0; } -static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, + bool host) { u64 data = 0; struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; @@ -1183,7 +1196,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_SIMP: case HV_X64_MSR_EOM: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: - return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); + return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata, host); case HV_X64_MSR_STIMER0_CONFIG: case HV_X64_MSR_STIMER1_CONFIG: case HV_X64_MSR_STIMER2_CONFIG: @@ -1229,7 +1242,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return kvm_hv_set_msr(vcpu, msr, data, host); } -int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) { if (kvm_hv_msr_partition_wide(msr)) { int r; @@ -1239,7 +1252,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); return r; } else - return kvm_hv_get_msr(vcpu, msr, pdata); + return kvm_hv_get_msr(vcpu, msr, pdata, host); } static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 837465d69c6d..d6aa969e20f1 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -48,7 +48,7 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) } int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); -int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); +int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host); bool kvm_hv_hypercall_enabled(struct kvm *kvm); int kvm_hv_hypercall(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d536d457517b..0cefba28c864 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, irq->level, irq->trig_mode, dest_map); } +int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, + unsigned long ipi_bitmap_high, int min, + unsigned long icr, int op_64_bit) +{ + int i; + struct kvm_apic_map *map; + struct kvm_vcpu *vcpu; + struct kvm_lapic_irq irq = {0}; + int cluster_size = op_64_bit ? 64 : 32; + int count = 0; + + irq.vector = icr & APIC_VECTOR_MASK; + irq.delivery_mode = icr & APIC_MODE_MASK; + irq.level = (icr & APIC_INT_ASSERT) != 0; + irq.trig_mode = icr & APIC_INT_LEVELTRIG; + + if (icr & APIC_DEST_MASK) + return -KVM_EINVAL; + if (icr & APIC_SHORT_MASK) + return -KVM_EINVAL; + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + /* Bits above cluster_size are masked in the caller. */ + for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) { + vcpu = map->phys_map[min + i]->vcpu; + count += kvm_apic_set_irq(vcpu, &irq, NULL); + } + + min += cluster_size; + for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) { + vcpu = map->phys_map[min + i]->vcpu; + count += kvm_apic_set_irq(vcpu, &irq, NULL); + } + + rcu_read_unlock(); + return count; +} + static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a44e568363a4..a282321329b5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -178,7 +178,24 @@ struct kvm_shadow_walk_iterator { unsigned index; }; -#define for_each_shadow_entry(_vcpu, _addr, _walker) \ +static const union kvm_mmu_page_role mmu_base_role_mask = { + .cr0_wp = 1, + .cr4_pae = 1, + .nxe = 1, + .smep_andnot_wp = 1, + .smap_andnot_wp = 1, + .smm = 1, + .guest_mode = 1, + .ad_disabled = 1, +}; + +#define for_each_shadow_entry_using_root(_vcpu, _root, _addr, _walker) \ + for (shadow_walk_init_using_root(&(_walker), (_vcpu), \ + (_root), (_addr)); \ + shadow_walk_okay(&(_walker)); \ + shadow_walk_next(&(_walker))) + +#define for_each_shadow_entry(_vcpu, _addr, _walker) \ for (shadow_walk_init(&(_walker), _vcpu, _addr); \ shadow_walk_okay(&(_walker)); \ shadow_walk_next(&(_walker))) @@ -221,7 +238,20 @@ static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | PT64_EPT_EXECUTABLE_MASK; static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; +/* + * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order + * to guard against L1TF attacks. + */ +static u64 __read_mostly shadow_nonpresent_or_rsvd_mask; + +/* + * The number of high-order 1 bits to use in the mask above. + */ +static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; + static void mmu_spte_set(u64 *sptep, u64 spte); +static union kvm_mmu_page_role +kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) { @@ -308,9 +338,13 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, { unsigned int gen = kvm_current_mmio_generation(vcpu); u64 mask = generation_mmio_spte_mask(gen); + u64 gpa = gfn << PAGE_SHIFT; access &= ACC_WRITE_MASK | ACC_USER_MASK; - mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT; + mask |= shadow_mmio_value | access; + mask |= gpa | shadow_nonpresent_or_rsvd_mask; + mask |= (gpa & shadow_nonpresent_or_rsvd_mask) + << shadow_nonpresent_or_rsvd_mask_len; trace_mark_mmio_spte(sptep, gfn, access, gen); mmu_spte_set(sptep, mask); @@ -323,8 +357,14 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask; - return (spte & ~mask) >> PAGE_SHIFT; + u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask | + shadow_nonpresent_or_rsvd_mask; + u64 gpa = spte & ~mask; + + gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) + & shadow_nonpresent_or_rsvd_mask; + + return gpa >> PAGE_SHIFT; } static unsigned get_mmio_spte_access(u64 spte) @@ -381,7 +421,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); -static void kvm_mmu_clear_all_pte_masks(void) +static void kvm_mmu_reset_all_pte_masks(void) { shadow_user_mask = 0; shadow_accessed_mask = 0; @@ -391,6 +431,18 @@ static void kvm_mmu_clear_all_pte_masks(void) shadow_mmio_mask = 0; shadow_present_mask = 0; shadow_acc_track_mask = 0; + + /* + * If the CPU has 46 or less physical address bits, then set an + * appropriate mask to guard against L1TF attacks. Otherwise, it is + * assumed that the CPU is not vulnerable to L1TF. + */ + if (boot_cpu_data.x86_phys_bits < + 52 - shadow_nonpresent_or_rsvd_mask_len) + shadow_nonpresent_or_rsvd_mask = + rsvd_bits(boot_cpu_data.x86_phys_bits - + shadow_nonpresent_or_rsvd_mask_len, + boot_cpu_data.x86_phys_bits - 1); } static int is_cpuid_PSE36(void) @@ -1986,7 +2038,7 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu, return 0; } -static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) +static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root) { } @@ -2117,12 +2169,8 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { - if (sp->role.cr4_pae != !!is_pae(vcpu)) { - kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); - return false; - } - - if (vcpu->arch.mmu.sync_page(vcpu, sp) == 0) { + if (sp->role.cr4_pae != !!is_pae(vcpu) + || vcpu->arch.mmu.sync_page(vcpu, sp) == 0) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); return false; } @@ -2392,11 +2440,12 @@ out: return sp; } -static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, - struct kvm_vcpu *vcpu, u64 addr) +static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterator, + struct kvm_vcpu *vcpu, hpa_t root, + u64 addr) { iterator->addr = addr; - iterator->shadow_addr = vcpu->arch.mmu.root_hpa; + iterator->shadow_addr = root; iterator->level = vcpu->arch.mmu.shadow_root_level; if (iterator->level == PT64_ROOT_4LEVEL && @@ -2405,6 +2454,12 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, --iterator->level; if (iterator->level == PT32E_ROOT_LEVEL) { + /* + * prev_root is currently only used for 64-bit hosts. So only + * the active root_hpa is valid here. + */ + BUG_ON(root != vcpu->arch.mmu.root_hpa); + iterator->shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; iterator->shadow_addr &= PT64_BASE_ADDR_MASK; @@ -2414,6 +2469,13 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, } } +static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, + struct kvm_vcpu *vcpu, u64 addr) +{ + shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu.root_hpa, + addr); +} + static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) { if (iterator->level < PT_PAGE_TABLE_LEVEL) @@ -2702,6 +2764,45 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_unsync_page(vcpu, sp); } + /* + * We need to ensure that the marking of unsync pages is visible + * before the SPTE is updated to allow writes because + * kvm_mmu_sync_roots() checks the unsync flags without holding + * the MMU lock and so can race with this. If the SPTE was updated + * before the page had been marked as unsync-ed, something like the + * following could happen: + * + * CPU 1 CPU 2 + * --------------------------------------------------------------------- + * 1.2 Host updates SPTE + * to be writable + * 2.1 Guest writes a GPTE for GVA X. + * (GPTE being in the guest page table shadowed + * by the SP from CPU 1.) + * This reads SPTE during the page table walk. + * Since SPTE.W is read as 1, there is no + * fault. + * + * 2.2 Guest issues TLB flush. + * That causes a VM Exit. + * + * 2.3 kvm_mmu_sync_pages() reads sp->unsync. + * Since it is false, so it just returns. + * + * 2.4 Guest accesses GVA X. + * Since the mapping in the SP was not updated, + * so the old mapping for GVA X incorrectly + * gets used. + * 1.1 Host marks SP + * as unsync + * (sp->unsync = true) + * + * The write barrier below ensures that 1.1 happens before 1.2 and thus + * the situation in 2.4 does not arise. The implicit barrier in 2.2 + * pairs with this write barrier. + */ + smp_wmb(); + return false; } @@ -2724,6 +2825,10 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) return true; } +/* Bits which may be returned by set_spte() */ +#define SET_SPTE_WRITE_PROTECTED_PT BIT(0) +#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1) + static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int level, gfn_t gfn, kvm_pfn_t pfn, bool speculative, @@ -2800,7 +2905,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { pgprintk("%s: found shadow page for %llx, marking ro\n", __func__, gfn); - ret = 1; + ret |= SET_SPTE_WRITE_PROTECTED_PT; pte_access &= ~ACC_WRITE_MASK; spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); } @@ -2816,7 +2921,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, set_pte: if (mmu_spte_update(sptep, spte)) - kvm_flush_remote_tlbs(vcpu->kvm); + ret |= SET_SPTE_NEED_REMOTE_TLB_FLUSH; done: return ret; } @@ -2827,7 +2932,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, { int was_rmapped = 0; int rmap_count; + int set_spte_ret; int ret = RET_PF_RETRY; + bool flush = false; pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, *sptep, write_fault, gfn); @@ -2844,22 +2951,25 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, child = page_header(pte & PT64_BASE_ADDR_MASK); drop_parent_pte(child, sptep); - kvm_flush_remote_tlbs(vcpu->kvm); + flush = true; } else if (pfn != spte_to_pfn(*sptep)) { pgprintk("hfn old %llx new %llx\n", spte_to_pfn(*sptep), pfn); drop_spte(vcpu->kvm, sptep); - kvm_flush_remote_tlbs(vcpu->kvm); + flush = true; } else was_rmapped = 1; } - if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, - true, host_writable)) { + set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn, + speculative, true, host_writable); + if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { if (write_fault) ret = RET_PF_EMULATE; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } + if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush) + kvm_flush_remote_tlbs(vcpu->kvm); if (unlikely(is_mmio_spte(*sptep))) ret = RET_PF_EMULATE; @@ -3358,26 +3468,47 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, *root_hpa = INVALID_PAGE; } -void kvm_mmu_free_roots(struct kvm_vcpu *vcpu) +/* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */ +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free) { int i; LIST_HEAD(invalid_list); struct kvm_mmu *mmu = &vcpu->arch.mmu; + bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT; - if (!VALID_PAGE(mmu->root_hpa)) - return; + BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG); + + /* Before acquiring the MMU lock, see if we need to do any real work. */ + if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) { + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) && + VALID_PAGE(mmu->prev_roots[i].hpa)) + break; + + if (i == KVM_MMU_NUM_PREV_ROOTS) + return; + } spin_lock(&vcpu->kvm->mmu_lock); - if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && - (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) { - mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list); - } else { - for (i = 0; i < 4; ++i) - if (mmu->pae_root[i] != 0) - mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i], - &invalid_list); - mmu->root_hpa = INVALID_PAGE; + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) + mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa, + &invalid_list); + + if (free_active_root) { + if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && + (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) { + mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, + &invalid_list); + } else { + for (i = 0; i < 4; ++i) + if (mmu->pae_root[i] != 0) + mmu_free_root_page(vcpu->kvm, + &mmu->pae_root[i], + &invalid_list); + mmu->root_hpa = INVALID_PAGE; + } } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); @@ -3546,7 +3677,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) return mmu_alloc_shadow_roots(vcpu); } -static void mmu_sync_roots(struct kvm_vcpu *vcpu) +void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) { int i; struct kvm_mmu_page *sp; @@ -3558,14 +3689,39 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) return; vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); - kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); + if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; + sp = page_header(root); + + /* + * Even if another CPU was marking the SP as unsync-ed + * simultaneously, any guest page table changes are not + * guaranteed to be visible anyway until this VCPU issues a TLB + * flush strictly after those changes are made. We only need to + * ensure that the other CPU sets these flags before any actual + * changes to the page tables are made. The comments in + * mmu_need_write_protect() describe what could go wrong if this + * requirement isn't satisfied. + */ + if (!smp_load_acquire(&sp->unsync) && + !smp_load_acquire(&sp->unsync_children)) + return; + + spin_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); + mmu_sync_children(vcpu, sp); + kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); + spin_unlock(&vcpu->kvm->mmu_lock); return; } + + spin_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); + for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; @@ -3575,13 +3731,8 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) mmu_sync_children(vcpu, sp); } } - kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); -} -void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) -{ - spin_lock(&vcpu->kvm->mmu_lock); - mmu_sync_roots(vcpu); + kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); spin_unlock(&vcpu->kvm->mmu_lock); } EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); @@ -3948,16 +4099,107 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, context->update_pte = nonpaging_update_pte; context->root_level = 0; context->shadow_root_level = PT32E_ROOT_LEVEL; - context->root_hpa = INVALID_PAGE; context->direct_map = true; context->nx = false; } -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) +/* + * Find out if a previously cached root matching the new CR3/role is available. + * The current root is also inserted into the cache. + * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is + * returned. + * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and + * false is returned. This root should now be freed by the caller. + */ +static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3, + union kvm_mmu_page_role new_role) +{ + uint i; + struct kvm_mmu_root_info root; + struct kvm_mmu *mmu = &vcpu->arch.mmu; + + root.cr3 = mmu->get_cr3(vcpu); + root.hpa = mmu->root_hpa; + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + swap(root, mmu->prev_roots[i]); + + if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) && + page_header(root.hpa) != NULL && + new_role.word == page_header(root.hpa)->role.word) + break; + } + + mmu->root_hpa = root.hpa; + + return i < KVM_MMU_NUM_PREV_ROOTS; +} + +static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, + union kvm_mmu_page_role new_role, + bool skip_tlb_flush) { - kvm_mmu_free_roots(vcpu); + struct kvm_mmu *mmu = &vcpu->arch.mmu; + + /* + * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid + * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs + * later if necessary. + */ + if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && + mmu->root_level >= PT64_ROOT_4LEVEL) { + if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT)) + return false; + + if (cached_root_available(vcpu, new_cr3, new_role)) { + /* + * It is possible that the cached previous root page is + * obsolete because of a change in the MMU + * generation number. However, that is accompanied by + * KVM_REQ_MMU_RELOAD, which will free the root that we + * have set here and allocate a new one. + */ + + kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); + if (!skip_tlb_flush) { + kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); + kvm_x86_ops->tlb_flush(vcpu, true); + } + + /* + * The last MMIO access's GVA and GPA are cached in the + * VCPU. When switching to a new CR3, that GVA->GPA + * mapping may no longer be valid. So clear any cached + * MMIO info even when we don't need to sync the shadow + * page tables. + */ + vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); + + __clear_sp_write_flooding_count( + page_header(mmu->root_hpa)); + + return true; + } + } + + return false; } +static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, + union kvm_mmu_page_role new_role, + bool skip_tlb_flush) +{ + if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush)) + kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_CURRENT); +} + +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush) +{ + __kvm_mmu_new_cr3(vcpu, new_cr3, kvm_mmu_calc_root_page_role(vcpu), + skip_tlb_flush); +} +EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3); + static unsigned long get_cr3(struct kvm_vcpu *vcpu) { return kvm_read_cr3(vcpu); @@ -4432,7 +4674,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, context->invlpg = paging64_invlpg; context->update_pte = paging64_update_pte; context->shadow_root_level = level; - context->root_hpa = INVALID_PAGE; context->direct_map = false; } @@ -4462,7 +4703,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu, context->invlpg = paging32_invlpg; context->update_pte = paging32_update_pte; context->shadow_root_level = PT32E_ROOT_LEVEL; - context->root_hpa = INVALID_PAGE; context->direct_map = false; } @@ -4472,20 +4712,32 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu, paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); } +static union kvm_mmu_page_role +kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu) +{ + union kvm_mmu_page_role role = {0}; + + role.guest_mode = is_guest_mode(vcpu); + role.smm = is_smm(vcpu); + role.ad_disabled = (shadow_accessed_mask == 0); + role.level = kvm_x86_ops->get_tdp_level(vcpu); + role.direct = true; + role.access = ACC_ALL; + + return role; +} + static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = &vcpu->arch.mmu; - context->base_role.word = 0; - context->base_role.guest_mode = is_guest_mode(vcpu); - context->base_role.smm = is_smm(vcpu); - context->base_role.ad_disabled = (shadow_accessed_mask == 0); + context->base_role.word = mmu_base_role_mask.word & + kvm_calc_tdp_mmu_root_page_role(vcpu).word; context->page_fault = tdp_page_fault; context->sync_page = nonpaging_sync_page; context->invlpg = nonpaging_invlpg; context->update_pte = nonpaging_update_pte; context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu); - context->root_hpa = INVALID_PAGE; context->direct_map = true; context->set_cr3 = kvm_x86_ops->set_tdp_cr3; context->get_cr3 = get_cr3; @@ -4520,13 +4772,36 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) reset_tdp_shadow_zero_bits_mask(vcpu, context); } -void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) +static union kvm_mmu_page_role +kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu) { + union kvm_mmu_page_role role = {0}; bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); - struct kvm_mmu *context = &vcpu->arch.mmu; - MMU_WARN_ON(VALID_PAGE(context->root_hpa)); + role.nxe = is_nx(vcpu); + role.cr4_pae = !!is_pae(vcpu); + role.cr0_wp = is_write_protection(vcpu); + role.smep_andnot_wp = smep && !is_write_protection(vcpu); + role.smap_andnot_wp = smap && !is_write_protection(vcpu); + role.guest_mode = is_guest_mode(vcpu); + role.smm = is_smm(vcpu); + role.direct = !is_paging(vcpu); + role.access = ACC_ALL; + + if (!is_long_mode(vcpu)) + role.level = PT32E_ROOT_LEVEL; + else if (is_la57_mode(vcpu)) + role.level = PT64_ROOT_5LEVEL; + else + role.level = PT64_ROOT_4LEVEL; + + return role; +} + +void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu *context = &vcpu->arch.mmu; if (!is_paging(vcpu)) nonpaging_init_context(vcpu, context); @@ -4537,26 +4812,34 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) else paging32_init_context(vcpu, context); - context->base_role.nxe = is_nx(vcpu); - context->base_role.cr4_pae = !!is_pae(vcpu); - context->base_role.cr0_wp = is_write_protection(vcpu); - context->base_role.smep_andnot_wp - = smep && !is_write_protection(vcpu); - context->base_role.smap_andnot_wp - = smap && !is_write_protection(vcpu); - context->base_role.guest_mode = is_guest_mode(vcpu); - context->base_role.smm = is_smm(vcpu); + context->base_role.word = mmu_base_role_mask.word & + kvm_calc_shadow_mmu_root_page_role(vcpu).word; reset_shadow_zero_bits_mask(vcpu, context); } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); +static union kvm_mmu_page_role +kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty) +{ + union kvm_mmu_page_role role = vcpu->arch.mmu.base_role; + + role.level = PT64_ROOT_4LEVEL; + role.direct = false; + role.ad_disabled = !accessed_dirty; + role.guest_mode = true; + role.access = ACC_ALL; + + return role; +} + void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, - bool accessed_dirty) + bool accessed_dirty, gpa_t new_eptp) { struct kvm_mmu *context = &vcpu->arch.mmu; + union kvm_mmu_page_role root_page_role = + kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty); - MMU_WARN_ON(VALID_PAGE(context->root_hpa)); - + __kvm_mmu_new_cr3(vcpu, new_eptp, root_page_role, false); context->shadow_root_level = PT64_ROOT_4LEVEL; context->nx = true; @@ -4567,10 +4850,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, context->invlpg = ept_invlpg; context->update_pte = ept_update_pte; context->root_level = PT64_ROOT_4LEVEL; - context->root_hpa = INVALID_PAGE; context->direct_map = false; - context->base_role.ad_disabled = !accessed_dirty; - context->base_role.guest_mode = 1; + context->base_role.word = root_page_role.word & mmu_base_role_mask.word; update_permission_bitmask(vcpu, context, true); update_pkru_bitmask(vcpu, context, true); update_last_nonleaf_level(vcpu, context); @@ -4633,8 +4914,17 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) update_last_nonleaf_level(vcpu, g_context); } -static void init_kvm_mmu(struct kvm_vcpu *vcpu) +void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots) { + if (reset_roots) { + uint i; + + vcpu->arch.mmu.root_hpa = INVALID_PAGE; + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; + } + if (mmu_is_nested(vcpu)) init_kvm_nested_mmu(vcpu); else if (tdp_enabled) @@ -4642,11 +4932,21 @@ static void init_kvm_mmu(struct kvm_vcpu *vcpu) else init_kvm_softmmu(vcpu); } +EXPORT_SYMBOL_GPL(kvm_init_mmu); + +static union kvm_mmu_page_role +kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu) +{ + if (tdp_enabled) + return kvm_calc_tdp_mmu_root_page_role(vcpu); + else + return kvm_calc_shadow_mmu_root_page_role(vcpu); +} void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) { kvm_mmu_unload(vcpu); - init_kvm_mmu(vcpu); + kvm_init_mmu(vcpu, true); } EXPORT_SYMBOL_GPL(kvm_mmu_reset_context); @@ -4661,8 +4961,8 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) kvm_mmu_sync_roots(vcpu); if (r) goto out; - /* set_cr3() should ensure TLB has been flushed */ - vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa); + kvm_mmu_load_cr3(vcpu); + kvm_x86_ops->tlb_flush(vcpu, true); out: return r; } @@ -4670,7 +4970,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load); void kvm_mmu_unload(struct kvm_vcpu *vcpu) { - kvm_mmu_free_roots(vcpu); + kvm_mmu_free_roots(vcpu, KVM_MMU_ROOTS_ALL); WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); } EXPORT_SYMBOL_GPL(kvm_mmu_unload); @@ -4823,16 +5123,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, u64 entry, gentry, *spte; int npte; bool remote_flush, local_flush; - union kvm_mmu_page_role mask = { }; - - mask.cr0_wp = 1; - mask.cr4_pae = 1; - mask.nxe = 1; - mask.smep_andnot_wp = 1; - mask.smap_andnot_wp = 1; - mask.smm = 1; - mask.guest_mode = 1; - mask.ad_disabled = 1; /* * If we don't have indirect shadow pages, it means no page is @@ -4876,7 +5166,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mmu_page_zap_pte(vcpu->kvm, sp, spte); if (gentry && !((sp->role.word ^ vcpu->arch.mmu.base_role.word) - & mask.word) && rmap_can_add(vcpu)) + & mmu_base_role_mask.word) && rmap_can_add(vcpu)) mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); if (need_remote_flush(entry, *spte)) remote_flush = true; @@ -5001,12 +5291,67 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { - vcpu->arch.mmu.invlpg(vcpu, gva); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + struct kvm_mmu *mmu = &vcpu->arch.mmu; + int i; + + /* INVLPG on a * non-canonical address is a NOP according to the SDM. */ + if (is_noncanonical_address(gva, vcpu)) + return; + + mmu->invlpg(vcpu, gva, mmu->root_hpa); + + /* + * INVLPG is required to invalidate any global mappings for the VA, + * irrespective of PCID. Since it would take us roughly similar amount + * of work to determine whether any of the prev_root mappings of the VA + * is marked global, or to just sync it blindly, so we might as well + * just always sync it. + * + * Mappings not reachable via the current cr3 or the prev_roots will be + * synced when switching to that cr3, so nothing needs to be done here + * for them. + */ + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (VALID_PAGE(mmu->prev_roots[i].hpa)) + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + + kvm_x86_ops->tlb_flush_gva(vcpu, gva); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); +void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) +{ + struct kvm_mmu *mmu = &vcpu->arch.mmu; + bool tlb_flush = false; + uint i; + + if (pcid == kvm_get_active_pcid(vcpu)) { + mmu->invlpg(vcpu, gva, mmu->root_hpa); + tlb_flush = true; + } + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + if (VALID_PAGE(mmu->prev_roots[i].hpa) && + pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) { + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + tlb_flush = true; + } + } + + if (tlb_flush) + kvm_x86_ops->tlb_flush_gva(vcpu, gva); + + ++vcpu->stat.invlpg; + + /* + * Mappings not reachable via the current cr3 or the prev_roots will be + * synced when switching to that cr3, so nothing needs to be done here + * for them. + */ +} +EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva); + void kvm_enable_tdp(void) { tdp_enabled = true; @@ -5030,6 +5375,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) struct page *page; int i; + if (tdp_enabled) + return 0; + /* * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. * Therefore we need to allocate shadow page tables in the first @@ -5048,11 +5396,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) int kvm_mmu_create(struct kvm_vcpu *vcpu) { + uint i; + vcpu->arch.walk_mmu = &vcpu->arch.mmu; vcpu->arch.mmu.root_hpa = INVALID_PAGE; vcpu->arch.mmu.translate_gpa = translate_gpa; vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; + return alloc_mmu_pages(vcpu); } @@ -5060,7 +5413,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) { MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); - init_kvm_mmu(vcpu); + kvm_init_mmu(vcpu, true); } static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, @@ -5500,7 +5853,7 @@ int kvm_mmu_module_init(void) { int ret = -ENOMEM; - kvm_mmu_clear_all_pte_masks(); + kvm_mmu_reset_all_pte_masks(); pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 5b408c0ad612..1fab69c0b2f3 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -61,9 +61,10 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); +void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, - bool accessed_dirty); + bool accessed_dirty, gpa_t new_eptp); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len); @@ -85,6 +86,27 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) return kvm_mmu_load(vcpu); } +static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3) +{ + BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0); + + return kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE) + ? cr3 & X86_CR3_PCID_MASK + : 0; +} + +static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu) +{ + return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu)); +} + +static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu) +{ + if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) + vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa | + kvm_get_active_pcid(vcpu)); +} + /* * Currently, we have two sorts of write-protection, a) the first one * write-protects guest page to sync the guest modification, b) another one is diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6288e9d7068e..14ffd973df54 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -181,7 +181,7 @@ no_present: * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK * to signify readability since it isn't used in the EPT case */ -static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) +static inline unsigned FNAME(gpte_access)(u64 gpte) { unsigned access; #if PTTYPE == PTTYPE_EPT @@ -394,8 +394,8 @@ retry_walk: accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0; /* Convert to ACC_*_MASK flags for struct guest_walker. */ - walker->pt_access = FNAME(gpte_access)(vcpu, pt_access ^ walk_nx_mask); - walker->pte_access = FNAME(gpte_access)(vcpu, pte_access ^ walk_nx_mask); + walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask); + walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask); errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access); if (unlikely(errcode)) goto error; @@ -508,7 +508,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); gfn = gpte_to_gfn(gpte); - pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + pte_access = sp->role.access & FNAME(gpte_access)(gpte); FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, no_dirty_log && (pte_access & ACC_WRITE_MASK)); @@ -856,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); } -static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) +static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) { struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; @@ -871,13 +871,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) */ mmu_topup_memory_caches(vcpu); - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) { + if (!VALID_PAGE(root_hpa)) { WARN_ON(1); return; } spin_lock(&vcpu->kvm->mmu_lock); - for_each_shadow_entry(vcpu, gva, iterator) { + for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) { level = iterator.level; sptep = iterator.sptep; @@ -968,6 +968,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) int i, nr_present = 0; bool host_writable; gpa_t first_pte_gpa; + int set_spte_ret = 0; /* direct kvm_mmu_page can not be unsync. */ BUG_ON(sp->role.direct); @@ -1002,7 +1003,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) gfn = gpte_to_gfn(gpte); pte_access = sp->role.access; - pte_access &= FNAME(gpte_access)(vcpu, gpte); + pte_access &= FNAME(gpte_access)(gpte); FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access, @@ -1024,12 +1025,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; - set_spte(vcpu, &sp->spt[i], pte_access, - PT_PAGE_TABLE_LEVEL, gfn, - spte_to_pfn(sp->spt[i]), true, false, - host_writable); + set_spte_ret |= set_spte(vcpu, &sp->spt[i], + pte_access, PT_PAGE_TABLE_LEVEL, + gfn, spte_to_pfn(sp->spt[i]), + true, false, host_writable); } + if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH) + kvm_flush_remote_tlbs(vcpu->kvm); + return nr_present; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f059a73f0fd0..6276140044d0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2884,7 +2884,6 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, svm->vmcb->control.nested_cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_NPT); - svm_flush_tlb(vcpu, true); } static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, @@ -5435,6 +5434,13 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) svm->asid_generation--; } +static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + invlpga(gva, svm->vmcb->control.asid); +} + static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) { } @@ -5580,8 +5586,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) clgi(); - local_irq_enable(); - /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if * it's non-zero. Since vmentry is serialising on affected CPUs, there @@ -5590,6 +5594,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); + local_irq_enable(); + asm volatile ( "push %%" _ASM_BP "; \n\t" "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" @@ -5712,12 +5718,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); - reload_tss(vcpu); local_irq_disable(); + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); + vcpu->arch.cr2 = svm->vmcb->save.cr2; vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; @@ -5766,7 +5772,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) svm->vmcb->save.cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_CR); - svm_flush_tlb(vcpu, true); } static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) @@ -5779,8 +5784,6 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) /* Also sync guest cr3 here in case we live migrate */ svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); mark_dirty(svm->vmcb, VMCB_CR); - - svm_flush_tlb(vcpu, true); } static int is_disabled(void) @@ -7090,6 +7093,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .set_rflags = svm_set_rflags, .tlb_flush = svm_flush_tlb, + .tlb_flush_gva = svm_flush_tlb_gva, .run = svm_vcpu_run, .handle_exit = handle_exit, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 46b428c0990e..8dae47e7267a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -38,6 +38,7 @@ #include "kvm_cache_regs.h" #include "x86.h" +#include <asm/asm.h> #include <asm/cpu.h> #include <asm/io.h> #include <asm/desc.h> @@ -197,12 +198,14 @@ static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_ static const struct { const char *option; - enum vmx_l1d_flush_state cmd; + bool for_parse; } vmentry_l1d_param[] = { - {"auto", VMENTER_L1D_FLUSH_AUTO}, - {"never", VMENTER_L1D_FLUSH_NEVER}, - {"cond", VMENTER_L1D_FLUSH_COND}, - {"always", VMENTER_L1D_FLUSH_ALWAYS}, + [VMENTER_L1D_FLUSH_AUTO] = {"auto", true}, + [VMENTER_L1D_FLUSH_NEVER] = {"never", true}, + [VMENTER_L1D_FLUSH_COND] = {"cond", true}, + [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true}, + [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false}, + [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false}, }; #define L1D_CACHE_ORDER 4 @@ -218,15 +221,15 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) return 0; } - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { - u64 msr; + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) { + u64 msr; - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); - if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; - return 0; - } - } + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) { + l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED; + return 0; + } + } /* If set to auto use the default l1tf mitigation method */ if (l1tf == VMENTER_L1D_FLUSH_AUTO) { @@ -286,8 +289,9 @@ static int vmentry_l1d_flush_parse(const char *s) if (s) { for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) { - if (sysfs_streq(s, vmentry_l1d_param[i].option)) - return vmentry_l1d_param[i].cmd; + if (vmentry_l1d_param[i].for_parse && + sysfs_streq(s, vmentry_l1d_param[i].option)) + return i; } } return -EINVAL; @@ -297,13 +301,13 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) { int l1tf, ret; - if (!boot_cpu_has(X86_BUG_L1TF)) - return 0; - l1tf = vmentry_l1d_flush_parse(s); if (l1tf < 0) return l1tf; + if (!boot_cpu_has(X86_BUG_L1TF)) + return 0; + /* * Has vmx_init() run already? If not then this is the pre init * parameter parsing. In that case just store the value and let @@ -323,6 +327,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp) static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) { + if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param))) + return sprintf(s, "???\n"); + return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } @@ -332,23 +339,54 @@ static const struct kernel_param_ops vmentry_l1d_flush_ops = { }; module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644); +enum ept_pointers_status { + EPT_POINTERS_CHECK = 0, + EPT_POINTERS_MATCH = 1, + EPT_POINTERS_MISMATCH = 2 +}; + struct kvm_vmx { struct kvm kvm; unsigned int tss_addr; bool ept_identity_pagetable_done; gpa_t ept_identity_map_addr; + + enum ept_pointers_status ept_pointers_match; + spinlock_t ept_pointer_lock; }; #define NR_AUTOLOAD_MSRS 8 +struct vmcs_hdr { + u32 revision_id:31; + u32 shadow_vmcs:1; +}; + struct vmcs { - u32 revision_id; + struct vmcs_hdr hdr; u32 abort; char data[0]; }; /* + * vmcs_host_state tracks registers that are loaded from the VMCS on VMEXIT + * and whose values change infrequently, but are not constant. I.e. this is + * used as a write-through cache of the corresponding VMCS fields. + */ +struct vmcs_host_state { + unsigned long cr3; /* May not match real cr3 */ + unsigned long cr4; /* May not match real cr4 */ + unsigned long gs_base; + unsigned long fs_base; + + u16 fs_sel, gs_sel, ldt_sel; +#ifdef CONFIG_X86_64 + u16 ds_sel, es_sel; +#endif +}; + +/* * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs * loaded on this CPU (so we can clear them if the CPU goes down). @@ -359,14 +397,13 @@ struct loaded_vmcs { int cpu; bool launched; bool nmi_known_unmasked; - unsigned long vmcs_host_cr3; /* May not match real cr3 */ - unsigned long vmcs_host_cr4; /* May not match real cr4 */ /* Support for vnmi-less CPUs */ int soft_vnmi_blocked; ktime_t entry_time; s64 vnmi_blocked_time; unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; + struct vmcs_host_state host_state; }; struct shared_msr_entry { @@ -397,7 +434,7 @@ struct __packed vmcs12 { /* According to the Intel spec, a VMCS region must start with the * following two fields. Then follow implementation-specific data. */ - u32 revision_id; + struct vmcs_hdr hdr; u32 abort; u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */ @@ -565,7 +602,7 @@ struct __packed vmcs12 { "Offset of " #field " in struct vmcs12 has changed.") static inline void vmx_check_vmcs12_offsets(void) { - CHECK_OFFSET(revision_id, 0); + CHECK_OFFSET(hdr, 0); CHECK_OFFSET(abort, 4); CHECK_OFFSET(launch_state, 8); CHECK_OFFSET(io_bitmap_a, 40); @@ -784,6 +821,12 @@ struct nested_vmx { */ struct vmcs12 *cached_vmcs12; /* + * Cache of the guest's shadow VMCS, existing outside of guest + * memory. Loaded from guest memory during VM entry. Flushed + * to guest memory during VM exit. + */ + struct vmcs12 *cached_shadow_vmcs12; + /* * Indicates if the shadow vmcs must be updated with the * data hold by vmcs12 */ @@ -933,25 +976,20 @@ struct vcpu_vmx { /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested - * guest (L2), it points to a different VMCS. + * guest (L2), it points to a different VMCS. loaded_cpu_state points + * to the VMCS whose state is loaded into the CPU registers that only + * need to be switched when transitioning to/from the kernel; a NULL + * value indicates that host state is loaded. */ struct loaded_vmcs vmcs01; struct loaded_vmcs *loaded_vmcs; + struct loaded_vmcs *loaded_cpu_state; bool __launched; /* temporary, used in vmx_vcpu_run */ struct msr_autoload { struct vmx_msrs guest; struct vmx_msrs host; } msr_autoload; - struct { - int loaded; - u16 fs_sel, gs_sel, ldt_sel; -#ifdef CONFIG_X86_64 - u16 ds_sel, es_sel; -#endif - int gs_ldt_reload_needed; - int fs_reload_needed; - u64 msr_host_bndcfgs; - } host_state; + struct { int vm86_active; ulong save_rflags; @@ -1001,6 +1039,7 @@ struct vcpu_vmx { */ u64 msr_ia32_feature_control; u64 msr_ia32_feature_control_valid_bits; + u64 ept_pointer; }; enum segment_cache_field { @@ -1220,6 +1259,11 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) return to_vmx(vcpu)->nested.cached_vmcs12; } +static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.cached_shadow_vmcs12; +} + static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); @@ -1490,6 +1534,48 @@ static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) * GUEST_IA32_RTIT_CTL = 0x00002814, */ } + +/* check_ept_pointer() should be under protection of ept_pointer_lock. */ +static void check_ept_pointer_match(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + u64 tmp_eptp = INVALID_PAGE; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!VALID_PAGE(tmp_eptp)) { + tmp_eptp = to_vmx(vcpu)->ept_pointer; + } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) { + to_kvm_vmx(kvm)->ept_pointers_match + = EPT_POINTERS_MISMATCH; + return; + } + } + + to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; +} + +static int vmx_hv_remote_flush_tlb(struct kvm *kvm) +{ + int ret; + + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); + + if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) + check_ept_pointer_match(kvm); + + if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { + ret = -ENOTSUPP; + goto out; + } + + ret = hyperv_flush_guest_mapping( + to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer); + +out: + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); + return ret; +} #else /* !IS_ENABLED(CONFIG_HYPERV) */ static inline void evmcs_write64(unsigned long field, u64 value) {} static inline void evmcs_write32(unsigned long field, u32 value) {} @@ -1604,6 +1690,12 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; } +static inline bool cpu_has_vmx_encls_vmexit(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENCLS_EXITING; +} + /* * Comment's format: document - errata name - stepping - processor name. * Refer from @@ -1864,6 +1956,12 @@ static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu) CPU_BASED_MONITOR_TRAP_FLAG; } +static inline bool nested_cpu_has_vmx_shadow_vmcs(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.msrs.secondary_ctls_high & + SECONDARY_EXEC_SHADOW_VMCS; +} + static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) { return vmcs12->cpu_based_vm_exec_control & bit; @@ -1944,6 +2042,11 @@ static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12) VMX_VMFUNC_EPTP_SWITCHING); } +static inline bool nested_cpu_has_shadow_vmcs(struct vmcs12 *vmcs12) +{ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS); +} + static inline bool is_nmi(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) @@ -1974,11 +2077,12 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) u64 rsvd : 48; u64 gva; } operand = { vpid, 0, gva }; + bool error; - asm volatile (__ex(ASM_VMX_INVVPID) - /* CF==1 or ZF==1 --> rc = -1 */ - "; ja 1f ; ud2 ; 1:" - : : "a"(&operand), "c"(ext) : "cc", "memory"); + asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na) + : CC_OUT(na) (error) : "a"(&operand), "c"(ext) + : "memory"); + BUG_ON(error); } static inline void __invept(int ext, u64 eptp, gpa_t gpa) @@ -1986,11 +2090,12 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) struct { u64 eptp, gpa; } operand = {eptp, gpa}; + bool error; - asm volatile (__ex(ASM_VMX_INVEPT) - /* CF==1 or ZF==1 --> rc = -1 */ - "; ja 1f ; ud2 ; 1:\n" - : : "a" (&operand), "c" (ext) : "cc", "memory"); + asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na) + : CC_OUT(na) (error) : "a" (&operand), "c" (ext) + : "memory"); + BUG_ON(error); } static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) @@ -2006,12 +2111,12 @@ static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) static void vmcs_clear(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - u8 error; + bool error; - asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" - : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) - : "cc", "memory"); - if (error) + asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na) + : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr) + : "memory"); + if (unlikely(error)) printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", vmcs, phys_addr); } @@ -2028,15 +2133,15 @@ static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) static void vmcs_load(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - u8 error; + bool error; if (static_branch_unlikely(&enable_evmcs)) return evmcs_load(phys_addr); - asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" - : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) - : "cc", "memory"); - if (error) + asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na) + : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr) + : "memory"); + if (unlikely(error)) printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n", vmcs, phys_addr); } @@ -2114,6 +2219,19 @@ static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) __loaded_vmcs_clear, loaded_vmcs, 1); } +static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr) +{ + if (vpid == 0) + return true; + + if (cpu_has_vmx_invvpid_individual_addr()) { + __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr); + return true; + } + + return false; +} + static inline void vpid_sync_vcpu_single(int vpid) { if (vpid == 0) @@ -2248,10 +2366,10 @@ static noinline void vmwrite_error(unsigned long field, unsigned long value) static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) { - u8 error; + bool error; - asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0" - : "=q"(error) : "a"(value), "d"(field) : "cc"); + asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na) + : CC_OUT(na) (error) : "a"(value), "d"(field)); if (unlikely(error)) vmwrite_error(field, value); } @@ -2735,121 +2853,150 @@ static unsigned long segment_base(u16 selector) } #endif -static void vmx_save_host_state(struct kvm_vcpu *vcpu) +static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs_host_state *host_state; #ifdef CONFIG_X86_64 int cpu = raw_smp_processor_id(); - unsigned long fs_base, kernel_gs_base; #endif + unsigned long fs_base, gs_base; + u16 fs_sel, gs_sel; int i; - if (vmx->host_state.loaded) + if (vmx->loaded_cpu_state) return; - vmx->host_state.loaded = 1; + vmx->loaded_cpu_state = vmx->loaded_vmcs; + host_state = &vmx->loaded_cpu_state->host_state; + /* * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. */ - vmx->host_state.ldt_sel = kvm_read_ldt(); - vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; + host_state->ldt_sel = kvm_read_ldt(); #ifdef CONFIG_X86_64 + savesegment(ds, host_state->ds_sel); + savesegment(es, host_state->es_sel); + + gs_base = cpu_kernelmode_gs_base(cpu); if (likely(is_64bit_mm(current->mm))) { save_fsgs_for_kvm(); - vmx->host_state.fs_sel = current->thread.fsindex; - vmx->host_state.gs_sel = current->thread.gsindex; + fs_sel = current->thread.fsindex; + gs_sel = current->thread.gsindex; fs_base = current->thread.fsbase; - kernel_gs_base = current->thread.gsbase; + vmx->msr_host_kernel_gs_base = current->thread.gsbase; } else { -#endif - savesegment(fs, vmx->host_state.fs_sel); - savesegment(gs, vmx->host_state.gs_sel); -#ifdef CONFIG_X86_64 + savesegment(fs, fs_sel); + savesegment(gs, gs_sel); fs_base = read_msr(MSR_FS_BASE); - kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); - } -#endif - if (!(vmx->host_state.fs_sel & 7)) { - vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); - vmx->host_state.fs_reload_needed = 0; - } else { - vmcs_write16(HOST_FS_SELECTOR, 0); - vmx->host_state.fs_reload_needed = 1; - } - if (!(vmx->host_state.gs_sel & 7)) - vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); - else { - vmcs_write16(HOST_GS_SELECTOR, 0); - vmx->host_state.gs_ldt_reload_needed = 1; + vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); } -#ifdef CONFIG_X86_64 - savesegment(ds, vmx->host_state.ds_sel); - savesegment(es, vmx->host_state.es_sel); - - vmcs_writel(HOST_FS_BASE, fs_base); - vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu)); - - vmx->msr_host_kernel_gs_base = kernel_gs_base; if (is_long_mode(&vmx->vcpu)) wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #else - vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); - vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel)); + savesegment(fs, fs_sel); + savesegment(gs, gs_sel); + fs_base = segment_base(fs_sel); + gs_base = segment_base(gs_sel); #endif - if (boot_cpu_has(X86_FEATURE_MPX)) - rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); + + if (unlikely(fs_sel != host_state->fs_sel)) { + if (!(fs_sel & 7)) + vmcs_write16(HOST_FS_SELECTOR, fs_sel); + else + vmcs_write16(HOST_FS_SELECTOR, 0); + host_state->fs_sel = fs_sel; + } + if (unlikely(gs_sel != host_state->gs_sel)) { + if (!(gs_sel & 7)) + vmcs_write16(HOST_GS_SELECTOR, gs_sel); + else + vmcs_write16(HOST_GS_SELECTOR, 0); + host_state->gs_sel = gs_sel; + } + if (unlikely(fs_base != host_state->fs_base)) { + vmcs_writel(HOST_FS_BASE, fs_base); + host_state->fs_base = fs_base; + } + if (unlikely(gs_base != host_state->gs_base)) { + vmcs_writel(HOST_GS_BASE, gs_base); + host_state->gs_base = gs_base; + } + for (i = 0; i < vmx->save_nmsrs; ++i) kvm_set_shared_msr(vmx->guest_msrs[i].index, vmx->guest_msrs[i].data, vmx->guest_msrs[i].mask); } -static void __vmx_load_host_state(struct vcpu_vmx *vmx) +static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) { - if (!vmx->host_state.loaded) + struct vmcs_host_state *host_state; + + if (!vmx->loaded_cpu_state) return; + WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs); + host_state = &vmx->loaded_cpu_state->host_state; + ++vmx->vcpu.stat.host_state_reload; - vmx->host_state.loaded = 0; + vmx->loaded_cpu_state = NULL; + #ifdef CONFIG_X86_64 if (is_long_mode(&vmx->vcpu)) rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #endif - if (vmx->host_state.gs_ldt_reload_needed) { - kvm_load_ldt(vmx->host_state.ldt_sel); + if (host_state->ldt_sel || (host_state->gs_sel & 7)) { + kvm_load_ldt(host_state->ldt_sel); #ifdef CONFIG_X86_64 - load_gs_index(vmx->host_state.gs_sel); + load_gs_index(host_state->gs_sel); #else - loadsegment(gs, vmx->host_state.gs_sel); + loadsegment(gs, host_state->gs_sel); #endif } - if (vmx->host_state.fs_reload_needed) - loadsegment(fs, vmx->host_state.fs_sel); + if (host_state->fs_sel & 7) + loadsegment(fs, host_state->fs_sel); #ifdef CONFIG_X86_64 - if (unlikely(vmx->host_state.ds_sel | vmx->host_state.es_sel)) { - loadsegment(ds, vmx->host_state.ds_sel); - loadsegment(es, vmx->host_state.es_sel); + if (unlikely(host_state->ds_sel | host_state->es_sel)) { + loadsegment(ds, host_state->ds_sel); + loadsegment(es, host_state->es_sel); } #endif invalidate_tss_limit(); #ifdef CONFIG_X86_64 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif - if (vmx->host_state.msr_host_bndcfgs) - wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); load_fixmap_gdt(raw_smp_processor_id()); } -static void vmx_load_host_state(struct vcpu_vmx *vmx) +#ifdef CONFIG_X86_64 +static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) { - preempt_disable(); - __vmx_load_host_state(vmx); - preempt_enable(); + if (is_long_mode(&vmx->vcpu)) { + preempt_disable(); + if (vmx->loaded_cpu_state) + rdmsrl(MSR_KERNEL_GS_BASE, + vmx->msr_guest_kernel_gs_base); + preempt_enable(); + } + return vmx->msr_guest_kernel_gs_base; } +static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) +{ + if (is_long_mode(&vmx->vcpu)) { + preempt_disable(); + if (vmx->loaded_cpu_state) + wrmsrl(MSR_KERNEL_GS_BASE, data); + preempt_enable(); + } + vmx->msr_guest_kernel_gs_base = data; +} +#endif + static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); @@ -2991,7 +3138,7 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { vmx_vcpu_pi_put(vcpu); - __vmx_load_host_state(to_vmx(vcpu)); + vmx_prepare_switch_to_host(to_vmx(vcpu)); } static bool emulation_required(struct kvm_vcpu *vcpu) @@ -3212,7 +3359,7 @@ static bool vmx_rdtscp_supported(void) static bool vmx_invpcid_supported(void) { - return cpu_has_vmx_invpcid() && enable_ept; + return cpu_has_vmx_invpcid(); } /* @@ -3455,6 +3602,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING; + /* + * We can emulate "VMCS shadowing," even if the hardware + * doesn't support it. + */ + msrs->secondary_ctls_high |= + SECONDARY_EXEC_SHADOW_VMCS; if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ @@ -3922,8 +4075,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vmcs_readl(GUEST_GS_BASE); break; case MSR_KERNEL_GS_BASE: - vmx_load_host_state(vmx); - msr_info->data = vmx->msr_guest_kernel_gs_base; + msr_info->data = vmx_read_guest_kernel_gs_base(vmx); break; #endif case MSR_EFER: @@ -4023,8 +4175,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_GS_BASE, data); break; case MSR_KERNEL_GS_BASE: - vmx_load_host_state(vmx); - vmx->msr_guest_kernel_gs_base = data; + vmx_write_guest_kernel_gs_base(vmx, data); break; #endif case MSR_IA32_SYSENTER_CS: @@ -4412,7 +4563,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_TSC_SCALING | - SECONDARY_EXEC_ENABLE_VMFUNC; + SECONDARY_EXEC_ENABLE_VMFUNC | + SECONDARY_EXEC_ENCLS_EXITING; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -4559,7 +4711,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) return 0; } -static struct vmcs *alloc_vmcs_cpu(int cpu) +static struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu) { int node = cpu_to_node(cpu); struct page *pages; @@ -4573,10 +4725,12 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) /* KVM supports Enlightened VMCS v1 only */ if (static_branch_unlikely(&enable_evmcs)) - vmcs->revision_id = KVM_EVMCS_VERSION; + vmcs->hdr.revision_id = KVM_EVMCS_VERSION; else - vmcs->revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmcs_config.revision_id; + if (shadow) + vmcs->hdr.shadow_vmcs = 1; return vmcs; } @@ -4600,14 +4754,14 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } -static struct vmcs *alloc_vmcs(void) +static struct vmcs *alloc_vmcs(bool shadow) { - return alloc_vmcs_cpu(raw_smp_processor_id()); + return alloc_vmcs_cpu(shadow, raw_smp_processor_id()); } static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) { - loaded_vmcs->vmcs = alloc_vmcs(); + loaded_vmcs->vmcs = alloc_vmcs(false); if (!loaded_vmcs->vmcs) return -ENOMEM; @@ -4629,6 +4783,9 @@ static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) evmcs->hv_enlightenments_control.msr_bitmap = 1; } } + + memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); + return 0; out_vmcs: @@ -4738,7 +4895,7 @@ static __init int alloc_kvm_area(void) for_each_possible_cpu(cpu) { struct vmcs *vmcs; - vmcs = alloc_vmcs_cpu(cpu); + vmcs = alloc_vmcs_cpu(false, cpu); if (!vmcs) { free_kvm_area(); return -ENOMEM; @@ -4755,7 +4912,7 @@ static __init int alloc_kvm_area(void) * physical CPU. */ if (static_branch_unlikely(&enable_evmcs)) - vmcs->revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmcs_config.revision_id; per_cpu(vmxarea, cpu) = vmcs; } @@ -4912,10 +5069,18 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) return; /* - * Force kernel_gs_base reloading before EFER changes, as control - * of this msr depends on is_long_mode(). + * MSR_KERNEL_GS_BASE is not intercepted when the guest is in + * 64-bit mode as a 64-bit kernel may frequently access the + * MSR. This means we need to manually save/restore the MSR + * when switching between guest and host state, but only if + * the guest is in 64-bit mode. Sync our cached value if the + * guest is transitioning to 32-bit mode and the CPU contains + * guest state, i.e. the cache is stale. */ - vmx_load_host_state(to_vmx(vcpu)); +#ifdef CONFIG_X86_64 + if (!(efer & EFER_LMA)) + (void)vmx_read_guest_kernel_gs_base(vmx); +#endif vcpu->arch.efer = efer; if (efer & EFER_LMA) { vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); @@ -4972,6 +5137,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); } +static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) +{ + int vpid = to_vmx(vcpu)->vpid; + + if (!vpid_sync_vcpu_addr(vpid, addr)) + vpid_sync_context(vpid); + + /* + * If VPIDs are not supported or enabled, then the above is a no-op. + * But we don't really need a TLB flush in that case anyway, because + * each VM entry/exit includes an implicit flush when VPID is 0. + */ +} + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; @@ -5153,6 +5332,7 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + struct kvm *kvm = vcpu->kvm; unsigned long guest_cr3; u64 eptp; @@ -5160,15 +5340,23 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (enable_ept) { eptp = construct_eptp(vcpu, cr3); vmcs_write64(EPT_POINTER, eptp); + + if (kvm_x86_ops->tlb_remote_flush) { + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); + to_vmx(vcpu)->ept_pointer = eptp; + to_kvm_vmx(kvm)->ept_pointers_match + = EPT_POINTERS_CHECK; + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); + } + if (enable_unrestricted_guest || is_paging(vcpu) || is_guest_mode(vcpu)) guest_cr3 = kvm_read_cr3(vcpu); else - guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr; + guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; ept_load_pdptrs(vcpu); } - vmx_flush_tlb(vcpu, true); vmcs_writel(GUEST_CR3, guest_cr3); } @@ -6104,19 +6292,19 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) */ cr3 = __read_cr3(); vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ - vmx->loaded_vmcs->vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->host_state.cr3 = cr3; /* Save the most likely value for this task's CR4 in the VMCS. */ cr4 = cr4_read_shadow(); vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ - vmx->loaded_vmcs->vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->host_state.cr4 = cr4; vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ #ifdef CONFIG_X86_64 /* * Load null selectors, so we can avoid reloading them in - * __vmx_load_host_state(), in case userspace uses the null selectors - * too (the expected case). + * vmx_prepare_switch_to_host(), in case userspace uses + * the null selectors too (the expected case). */ vmcs_write16(HOST_DS_SELECTOR, 0); vmcs_write16(HOST_ES_SELECTOR, 0); @@ -6241,8 +6429,6 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (!enable_ept) { exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; enable_unrestricted_guest = 0; - /* Enable INVPCID for non-ept guests may cause performance regression. */ - exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; } if (!enable_unrestricted_guest) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; @@ -6371,9 +6557,6 @@ static void ept_set_mmio_spte_mask(void) */ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) { -#ifdef CONFIG_X86_64 - unsigned long a; -#endif int i; if (enable_shadow_vmcs) { @@ -6428,15 +6611,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ vmx_set_constant_host_state(vmx); -#ifdef CONFIG_X86_64 - rdmsrl(MSR_FS_BASE, a); - vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ - rdmsrl(MSR_GS_BASE, a); - vmcs_writel(HOST_GS_BASE, a); /* 22.2.4 */ -#else vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */ vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ -#endif if (cpu_has_vmx_vmfunc()) vmcs_write64(VM_FUNCTION_CONTROL, 0); @@ -6485,6 +6661,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); } + + if (cpu_has_vmx_encls_vmexit()) + vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); } static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -7670,6 +7849,7 @@ static void vmx_enable_tdp(void) static __init int hardware_setup(void) { + unsigned long host_bndcfgs; int r = -ENOMEM, i; rdmsrl_safe(MSR_EFER, &host_efer); @@ -7694,6 +7874,11 @@ static __init int hardware_setup(void) if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); + if (boot_cpu_has(X86_FEATURE_MPX)) { + rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs); + WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost"); + } + if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) enable_vpid = 0; @@ -7730,6 +7915,12 @@ static __init int hardware_setup(void) if (enable_ept && !cpu_has_vmx_ept_2m_page()) kvm_disable_largepages(); +#if IS_ENABLED(CONFIG_HYPERV) + if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH + && enable_ept) + kvm_x86_ops->tlb_remote_flush = vmx_hv_remote_flush_tlb; +#endif + if (!cpu_has_vmx_ple()) { ple_gap = 0; ple_window = 0; @@ -7756,6 +7947,11 @@ static __init int hardware_setup(void) else kvm_disable_tdp(); + if (!nested) { + kvm_x86_ops->get_nested_state = NULL; + kvm_x86_ops->set_nested_state = NULL; + } + /* * Only enable PML when hardware supports PML feature, and both EPT * and EPT A/D bit features are enabled -- PML depends on them to work. @@ -8032,10 +8228,35 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) return 0; } +/* + * Allocate a shadow VMCS and associate it with the currently loaded + * VMCS, unless such a shadow VMCS already exists. The newly allocated + * VMCS is also VMCLEARed, so that it is ready for use. + */ +static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs; + + /* + * We should allocate a shadow vmcs for vmcs01 only when L1 + * executes VMXON and free it when L1 executes VMXOFF. + * As it is invalid to execute VMXON twice, we shouldn't reach + * here when vmcs01 already have an allocated shadow vmcs. + */ + WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs); + + if (!loaded_vmcs->shadow_vmcs) { + loaded_vmcs->shadow_vmcs = alloc_vmcs(true); + if (loaded_vmcs->shadow_vmcs) + vmcs_clear(loaded_vmcs->shadow_vmcs); + } + return loaded_vmcs->shadow_vmcs; +} + static int enter_vmx_operation(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmcs *shadow_vmcs; int r; r = alloc_loaded_vmcs(&vmx->nested.vmcs02); @@ -8046,16 +8267,12 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) if (!vmx->nested.cached_vmcs12) goto out_cached_vmcs12; - if (enable_shadow_vmcs) { - shadow_vmcs = alloc_vmcs(); - if (!shadow_vmcs) - goto out_shadow_vmcs; - /* mark vmcs as shadow */ - shadow_vmcs->revision_id |= (1u << 31); - /* init shadow vmcs */ - vmcs_clear(shadow_vmcs); - vmx->vmcs01.shadow_vmcs = shadow_vmcs; - } + vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); + if (!vmx->nested.cached_shadow_vmcs12) + goto out_cached_shadow_vmcs12; + + if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu)) + goto out_shadow_vmcs; hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); @@ -8067,6 +8284,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) return 0; out_shadow_vmcs: + kfree(vmx->nested.cached_shadow_vmcs12); + +out_cached_shadow_vmcs12: kfree(vmx->nested.cached_vmcs12); out_cached_vmcs12: @@ -8109,7 +8329,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) /* CPL=0 must be checked manually. */ if (vmx_get_cpl(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); + kvm_inject_gp(vcpu, 0); return 1; } @@ -8172,15 +8392,16 @@ static int handle_vmon(struct kvm_vcpu *vcpu) */ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) { - if (vmx_get_cpl(vcpu)) { + if (!to_vmx(vcpu)->nested.vmxon) { kvm_queue_exception(vcpu, UD_VECTOR); return 0; } - if (!to_vmx(vcpu)->nested.vmxon) { - kvm_queue_exception(vcpu, UD_VECTOR); + if (vmx_get_cpl(vcpu)) { + kvm_inject_gp(vcpu, 0); return 0; } + return 1; } @@ -8233,6 +8454,7 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->vmcs01.shadow_vmcs = NULL; } kfree(vmx->nested.cached_vmcs12); + kfree(vmx->nested.cached_shadow_vmcs12); /* Unpin physical memory we referred to in the vmcs02 */ if (vmx->nested.apic_access_page) { kvm_release_page_dirty(vmx->nested.apic_access_page); @@ -8318,7 +8540,7 @@ static int handle_vmresume(struct kvm_vcpu *vcpu) * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of * 64-bit fields are to be returned). */ -static inline int vmcs12_read_any(struct kvm_vcpu *vcpu, +static inline int vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field, u64 *ret) { short offset = vmcs_field_to_offset(field); @@ -8327,7 +8549,7 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu, if (offset < 0) return offset; - p = ((char *)(get_vmcs12(vcpu))) + offset; + p = (char *)vmcs12 + offset; switch (vmcs_field_width(field)) { case VMCS_FIELD_WIDTH_NATURAL_WIDTH: @@ -8349,10 +8571,10 @@ static inline int vmcs12_read_any(struct kvm_vcpu *vcpu, } -static inline int vmcs12_write_any(struct kvm_vcpu *vcpu, +static inline int vmcs12_write_any(struct vmcs12 *vmcs12, unsigned long field, u64 field_value){ short offset = vmcs_field_to_offset(field); - char *p = ((char *) get_vmcs12(vcpu)) + offset; + char *p = (char *)vmcs12 + offset; if (offset < 0) return offset; @@ -8405,7 +8627,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) for (i = 0; i < max_fields[q]; i++) { field = fields[q][i]; field_value = __vmcs_readl(field); - vmcs12_write_any(&vmx->vcpu, field, field_value); + vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value); } /* * Skip the VM-exit information fields if they are read-only. @@ -8440,7 +8662,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) for (q = 0; q < ARRAY_SIZE(fields); q++) { for (i = 0; i < max_fields[q]; i++) { field = fields[q][i]; - vmcs12_read_any(&vmx->vcpu, field, &field_value); + vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value); __vmcs_writel(field, field_value); } } @@ -8470,6 +8692,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); gva_t gva = 0; + struct vmcs12 *vmcs12; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -8477,10 +8700,24 @@ static int handle_vmread(struct kvm_vcpu *vcpu) if (!nested_vmx_check_vmcs12(vcpu)) return kvm_skip_emulated_instruction(vcpu); + if (!is_guest_mode(vcpu)) + vmcs12 = get_vmcs12(vcpu); + else { + /* + * When vmcs->vmcs_link_pointer is -1ull, any VMREAD + * to shadowed-field sets the ALU flags for VMfailInvalid. + */ + if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + vmcs12 = get_shadow_vmcs12(vcpu); + } + /* Decode instruction info and find the field to read */ field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); /* Read the field, zero-extended to a u64 field_value */ - if (vmcs12_read_any(vcpu, field, &field_value) < 0) { + if (vmcs12_read_any(vmcs12, field, &field_value) < 0) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); return kvm_skip_emulated_instruction(vcpu); } @@ -8522,6 +8759,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) */ u64 field_value = 0; struct x86_exception e; + struct vmcs12 *vmcs12; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -8556,23 +8794,44 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } - if (vmcs12_write_any(vcpu, field, field_value) < 0) { + if (!is_guest_mode(vcpu)) + vmcs12 = get_vmcs12(vcpu); + else { + /* + * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE + * to shadowed-field sets the ALU flags for VMfailInvalid. + */ + if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + vmcs12 = get_shadow_vmcs12(vcpu); + + } + + if (vmcs12_write_any(vmcs12, field, field_value) < 0) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); return kvm_skip_emulated_instruction(vcpu); } - switch (field) { + /* + * Do not track vmcs12 dirty-state if in guest-mode + * as we actually dirty shadow vmcs12 instead of vmcs12. + */ + if (!is_guest_mode(vcpu)) { + switch (field) { #define SHADOW_FIELD_RW(x) case x: #include "vmx_shadow_fields.h" - /* - * The fields that can be updated by L1 without a vmexit are - * always updated in the vmcs02, the others go down the slow - * path of prepare_vmcs02. - */ - break; - default: - vmx->nested.dirty_vmcs12 = true; - break; + /* + * The fields that can be updated by L1 without a vmexit are + * always updated in the vmcs02, the others go down the slow + * path of prepare_vmcs02. + */ + break; + default: + vmx->nested.dirty_vmcs12 = true; + break; + } } nested_vmx_succeed(vcpu); @@ -8623,7 +8882,9 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } new_vmcs12 = kmap(page); - if (new_vmcs12->revision_id != VMCS12_REVISION) { + if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || + (new_vmcs12->hdr.shadow_vmcs && + !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { kunmap(page); kvm_release_page_clean(page); nested_vmx_failValid(vcpu, @@ -8821,6 +9082,105 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } +static int handle_invpcid(struct kvm_vcpu *vcpu) +{ + u32 vmx_instruction_info; + unsigned long type; + bool pcid_enabled; + gva_t gva; + struct x86_exception e; + unsigned i; + unsigned long roots_to_free = 0; + struct { + u64 pcid; + u64 gla; + } operand; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); + + if (type > 3) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + /* According to the Intel instruction reference, the memory operand + * is read even if it isn't needed (e.g., for type==all) + */ + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + vmx_instruction_info, false, &gva)) + return 1; + + if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } + + if (operand.pcid >> 12 != 0) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); + + switch (type) { + case INVPCID_TYPE_INDIV_ADDR: + if ((!pcid_enabled && (operand.pcid != 0)) || + is_noncanonical_address(operand.gla, vcpu)) { + kvm_inject_gp(vcpu, 0); + return 1; + } + kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid); + return kvm_skip_emulated_instruction(vcpu); + + case INVPCID_TYPE_SINGLE_CTXT: + if (!pcid_enabled && (operand.pcid != 0)) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + if (kvm_get_active_pcid(vcpu) == operand.pcid) { + kvm_mmu_sync_roots(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + } + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3) + == operand.pcid) + roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); + + kvm_mmu_free_roots(vcpu, roots_to_free); + /* + * If neither the current cr3 nor any of the prev_roots use the + * given PCID, then nothing needs to be done here because a + * resync will happen anyway before switching to any other CR3. + */ + + return kvm_skip_emulated_instruction(vcpu); + + case INVPCID_TYPE_ALL_NON_GLOBAL: + /* + * Currently, KVM doesn't mark global entries in the shadow + * page tables, so a non-global flush just degenerates to a + * global flush. If needed, we could optimize this later by + * keeping track of global entries in shadow page tables. + */ + + /* fall-through */ + case INVPCID_TYPE_ALL_INCL_GLOBAL: + kvm_mmu_unload(vcpu); + return kvm_skip_emulated_instruction(vcpu); + + default: + BUG(); /* We have already checked above that type <= 3 */ + } +} + static int handle_pml_full(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; @@ -8970,6 +9330,17 @@ fail: return 1; } +static int handle_encls(struct kvm_vcpu *vcpu) +{ + /* + * SGX virtualization is not yet supported. There is no software + * enable bit for SGX, so we have to trap ENCLS and inject a #UD + * to prevent the guest from executing ENCLS. + */ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -9024,8 +9395,10 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_XSAVES] = handle_xsaves, [EXIT_REASON_XRSTORS] = handle_xrstors, [EXIT_REASON_PML_FULL] = handle_pml_full, + [EXIT_REASON_INVPCID] = handle_invpcid, [EXIT_REASON_VMFUNC] = handle_vmfunc, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, + [EXIT_REASON_ENCLS] = handle_encls, }; static const int kvm_vmx_max_exit_handlers = @@ -9196,6 +9569,30 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, return false; } +static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12, gpa_t bitmap) +{ + u32 vmx_instruction_info; + unsigned long field; + u8 b; + + if (!nested_cpu_has_shadow_vmcs(vmcs12)) + return true; + + /* Decode instruction info and find the field to access */ + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + + /* Out-of-range fields always cause a VM exit from L2 to L1 */ + if (field >> 15) + return true; + + if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1)) + return true; + + return 1 & (b >> (field & 7)); +} + /* * Return 1 if we should exit from L2 to L1 to handle an exit, or 0 if we * should handle it ourselves in L0 (and then continue L2). Only call this @@ -9280,10 +9677,15 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING); case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); + case EXIT_REASON_VMREAD: + return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12, + vmcs12->vmread_bitmap); + case EXIT_REASON_VMWRITE: + return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12, + vmcs12->vmwrite_bitmap); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: - case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: - case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: + case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: /* @@ -9367,6 +9769,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_VMFUNC: /* VM functions are emulated through L2->L0 vmexits. */ return false; + case EXIT_REASON_ENCLS: + /* SGX is never exposed to L1 */ + return false; default: return true; } @@ -10244,15 +10649,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { vmcs_writel(HOST_CR3, cr3); - vmx->loaded_vmcs->vmcs_host_cr3 = cr3; + vmx->loaded_vmcs->host_state.cr3 = cr3; } cr4 = cr4_read_shadow(); - if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { + if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { vmcs_writel(HOST_CR4, cr4); - vmx->loaded_vmcs->vmcs_host_cr4 = cr4; + vmx->loaded_vmcs->host_state.cr4 = cr4; } /* When single-stepping over STI and MOV SS, we must clear the @@ -10448,9 +10853,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * The sysexit path does not restore ds/es, so we must set them to * a reasonable value ourselves. * - * We can't defer this to vmx_load_host_state() since that function - * may be executed in interrupt context, which saves and restore segments - * around it, nullifying its effect. + * We can't defer this to vmx_prepare_switch_to_host() since that + * function may be executed in interrupt context, which saves and + * restore segments around it, nullifying its effect. */ loadsegment(ds, __USER_DS); loadsegment(es, __USER_DS); @@ -10511,8 +10916,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) return; cpu = get_cpu(); - vmx->loaded_vmcs = vmcs; vmx_vcpu_put(vcpu); + vmx->loaded_vmcs = vmcs; vmx_vcpu_load(vcpu, cpu); put_cpu(); } @@ -10652,6 +11057,8 @@ free_vcpu: static int vmx_vm_init(struct kvm *kvm) { + spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock); + if (!ple_gap) kvm->arch.pause_in_guest = true; @@ -10876,11 +11283,11 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) return 1; - kvm_mmu_unload(vcpu); kvm_init_shadow_ept_mmu(vcpu, to_vmx(vcpu)->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT, - nested_ept_ad_enabled(vcpu)); + nested_ept_ad_enabled(vcpu), + nested_ept_get_cr3(vcpu)); vcpu->arch.mmu.set_cr3 = vmx_set_cr3; vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; @@ -10928,9 +11335,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12); -static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); struct page *page; u64 hpa; @@ -11171,6 +11578,38 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, return true; } +static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + struct vmcs12 *shadow; + struct page *page; + + if (!nested_cpu_has_shadow_vmcs(vmcs12) || + vmcs12->vmcs_link_pointer == -1ull) + return; + + shadow = get_shadow_vmcs12(vcpu); + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); + + memcpy(shadow, kmap(page), VMCS12_SIZE); + + kunmap(page); + kvm_release_page_clean(page); +} + +static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!nested_cpu_has_shadow_vmcs(vmcs12) || + vmcs12->vmcs_link_pointer == -1ull) + return; + + kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer, + get_shadow_vmcs12(vcpu), VMCS12_SIZE); +} + static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -11228,11 +11667,12 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, unsigned long count_field, unsigned long addr_field) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); int maxphyaddr; u64 count, addr; - if (vmcs12_read_any(vcpu, count_field, &count) || - vmcs12_read_any(vcpu, addr_field, &addr)) { + if (vmcs12_read_any(vmcs12, count_field, &count) || + vmcs12_read_any(vmcs12, addr_field, &addr)) { WARN_ON(1); return -EINVAL; } @@ -11282,6 +11722,19 @@ static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu, return 0; } +static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + if (!nested_cpu_has_shadow_vmcs(vmcs12)) + return 0; + + if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) || + !page_address_valid(vcpu, vmcs12->vmwrite_bitmap)) + return -EINVAL; + + return 0; +} + static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, struct vmx_msr_entry *e) { @@ -11431,12 +11884,16 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne return 1; } } - - vcpu->arch.cr3 = cr3; - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); } - kvm_mmu_reset_context(vcpu); + if (!nested_ept) + kvm_mmu_new_cr3(vcpu, cr3, false); + + vcpu->arch.cr3 = cr3; + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + + kvm_init_mmu(vcpu, false); + return 0; } @@ -11523,7 +11980,8 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * Set host-state according to L0's settings (vmcs12 is irrelevant here) * Some constant fields are set here by vmx_set_constant_host_state(). * Other fields are different per CPU, and will be set later when - * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. + * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest() + * is called. */ vmx_set_constant_host_state(vmx); @@ -11595,11 +12053,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base); vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base); - /* - * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR, - * HOST_FS_BASE, HOST_GS_BASE. - */ - if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); @@ -11664,6 +12117,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, exec_control |= vmcs12_exec_ctrl; } + /* VMCS shadowing for L2 is emulated for now */ + exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; + if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) vmcs_write16(GUEST_INTR_STATUS, vmcs12->guest_intr_status); @@ -11676,6 +12132,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) vmcs_write64(APIC_ACCESS_ADDR, -1ull); + if (exec_control & SECONDARY_EXEC_ENCLS_EXITING) + vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } @@ -11883,6 +12342,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (nested_vmx_check_pml_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, vmx->nested.msrs.procbased_ctls_low, vmx->nested.msrs.procbased_ctls_high) || @@ -11983,6 +12445,33 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) return 0; } +static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + int r; + struct page *page; + struct vmcs12 *shadow; + + if (vmcs12->vmcs_link_pointer == -1ull) + return 0; + + if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)) + return -EINVAL; + + page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); + if (is_error_page(page)) + return -EINVAL; + + r = 0; + shadow = kmap(page); + if (shadow->hdr.revision_id != VMCS12_REVISION || + shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)) + r = -EINVAL; + kunmap(page); + kvm_release_page_clean(page); + return r; +} + static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, u32 *exit_qual) { @@ -11994,8 +12483,7 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) return 1; - if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS) && - vmcs12->vmcs_link_pointer != -1ull) { + if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; return 1; } @@ -12042,12 +12530,17 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 0; } -static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu) +/* + * If exit_qual is NULL, this is being called from state restore (either RSM + * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume. + */ +static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u32 exit_qual; - int r; + bool from_vmentry = !!exit_qual; + u32 dummy_exit_qual; + int r = 0; enter_guest_mode(vcpu); @@ -12061,17 +12554,28 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu) vcpu->arch.tsc_offset += vmcs12->tsc_offset; r = EXIT_REASON_INVALID_STATE; - if (prepare_vmcs02(vcpu, vmcs12, &exit_qual)) + if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual)) goto fail; - nested_get_vmcs12_pages(vcpu, vmcs12); + if (from_vmentry) { + nested_get_vmcs12_pages(vcpu); - r = EXIT_REASON_MSR_LOAD_FAIL; - exit_qual = nested_vmx_load_msr(vcpu, - vmcs12->vm_entry_msr_load_addr, - vmcs12->vm_entry_msr_load_count); - if (exit_qual) - goto fail; + r = EXIT_REASON_MSR_LOAD_FAIL; + *exit_qual = nested_vmx_load_msr(vcpu, + vmcs12->vm_entry_msr_load_addr, + vmcs12->vm_entry_msr_load_count); + if (*exit_qual) + goto fail; + } else { + /* + * The MMU is not initialized to point at the right entities yet and + * "get pages" would need to read data from the guest (i.e. we will + * need to perform gpa to hpa translation). Request a call + * to nested_get_vmcs12_pages before the next VM-entry. The MSRs + * have already been set at vmentry time and should not be reset. + */ + kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu); + } /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point @@ -12086,8 +12590,7 @@ fail: vcpu->arch.tsc_offset -= vmcs12->tsc_offset; leave_guest_mode(vcpu); vmx_switch_vmcs(vcpu, &vmx->vmcs01); - nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual); - return 1; + return r; } /* @@ -12110,6 +12613,17 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmcs12 = get_vmcs12(vcpu); + /* + * Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact + * that there *is* a valid VMCS pointer, RFLAGS.CF is set + * rather than RFLAGS.ZF, and no error number is stored to the + * VM-instruction error field. + */ + if (vmcs12->hdr.shadow_vmcs) { + nested_vmx_failInvalid(vcpu); + goto out; + } + if (enable_shadow_vmcs) copy_shadow_to_vmcs12(vmx); @@ -12164,16 +12678,29 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) */ vmx->nested.nested_run_pending = 1; - ret = enter_vmx_non_root_mode(vcpu); + ret = enter_vmx_non_root_mode(vcpu, &exit_qual); if (ret) { + nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual); vmx->nested.nested_run_pending = 0; - return ret; + return 1; } /* Hide L1D cache contents from the nested guest. */ vmx->vcpu.arch.l1tf_flush_l1d = true; /* + * Must happen outside of enter_vmx_non_root_mode() as it will + * also be used as part of restoring nVMX state for + * snapshot restore (migration). + * + * In this flow, it is assumed that vmcs12 cache was + * trasferred as part of captured nVMX state and should + * therefore not be read from guest memory (which may not + * exist on destination host yet). + */ + nested_cache_shadow_vmcs12(vcpu, vmcs12); + + /* * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken * by event injection, halt vcpu. */ @@ -12682,6 +13209,17 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, exit_qualification); + /* + * Must happen outside of sync_vmcs12() as it will + * also be used to capture vmcs12 cache as part of + * capturing nVMX state for snapshot (migration). + * + * Otherwise, this flush will dirty guest memory at a + * point it is already assumed by user-space to be + * immutable. + */ + nested_flush_cached_shadow_vmcs12(vcpu, vmcs12); + if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr, vmcs12->vm_exit_msr_store_count)) nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL); @@ -13256,7 +13794,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) if (vmx->nested.smm.guest_mode) { vcpu->arch.hflags &= ~HF_SMM_MASK; - ret = enter_vmx_non_root_mode(vcpu); + ret = enter_vmx_non_root_mode(vcpu, NULL); vcpu->arch.hflags |= HF_SMM_MASK; if (ret) return ret; @@ -13271,6 +13809,199 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) return 0; } +static int vmx_get_nested_state(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + u32 user_data_size) +{ + struct vcpu_vmx *vmx; + struct vmcs12 *vmcs12; + struct kvm_nested_state kvm_state = { + .flags = 0, + .format = 0, + .size = sizeof(kvm_state), + .vmx.vmxon_pa = -1ull, + .vmx.vmcs_pa = -1ull, + }; + + if (!vcpu) + return kvm_state.size + 2 * VMCS12_SIZE; + + vmx = to_vmx(vcpu); + vmcs12 = get_vmcs12(vcpu); + if (nested_vmx_allowed(vcpu) && + (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { + kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr; + kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr; + + if (vmx->nested.current_vmptr != -1ull) { + kvm_state.size += VMCS12_SIZE; + + if (is_guest_mode(vcpu) && + nested_cpu_has_shadow_vmcs(vmcs12) && + vmcs12->vmcs_link_pointer != -1ull) + kvm_state.size += VMCS12_SIZE; + } + + if (vmx->nested.smm.vmxon) + kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON; + + if (vmx->nested.smm.guest_mode) + kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE; + + if (is_guest_mode(vcpu)) { + kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; + + if (vmx->nested.nested_run_pending) + kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; + } + } + + if (user_data_size < kvm_state.size) + goto out; + + if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) + return -EFAULT; + + if (vmx->nested.current_vmptr == -1ull) + goto out; + + /* + * When running L2, the authoritative vmcs12 state is in the + * vmcs02. When running L1, the authoritative vmcs12 state is + * in the shadow vmcs linked to vmcs01, unless + * sync_shadow_vmcs is set, in which case, the authoritative + * vmcs12 state is in the vmcs12 already. + */ + if (is_guest_mode(vcpu)) + sync_vmcs12(vcpu, vmcs12); + else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs) + copy_shadow_to_vmcs12(vmx); + + if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12))) + return -EFAULT; + + if (nested_cpu_has_shadow_vmcs(vmcs12) && + vmcs12->vmcs_link_pointer != -1ull) { + if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE, + get_shadow_vmcs12(vcpu), sizeof(*vmcs12))) + return -EFAULT; + } + +out: + return kvm_state.size; +} + +static int vmx_set_nested_state(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs12 *vmcs12; + u32 exit_qual; + int ret; + + if (kvm_state->format != 0) + return -EINVAL; + + if (!nested_vmx_allowed(vcpu)) + return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL; + + if (kvm_state->vmx.vmxon_pa == -1ull) { + if (kvm_state->vmx.smm.flags) + return -EINVAL; + + if (kvm_state->vmx.vmcs_pa != -1ull) + return -EINVAL; + + vmx_leave_nested(vcpu); + return 0; + } + + if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa)) + return -EINVAL; + + if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12)) + return -EINVAL; + + if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa || + !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa)) + return -EINVAL; + + if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && + (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) + return -EINVAL; + + if (kvm_state->vmx.smm.flags & + ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON)) + return -EINVAL; + + if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && + !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) + return -EINVAL; + + vmx_leave_nested(vcpu); + if (kvm_state->vmx.vmxon_pa == -1ull) + return 0; + + vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa; + ret = enter_vmx_operation(vcpu); + if (ret) + return ret; + + set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa); + + if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) { + vmx->nested.smm.vmxon = true; + vmx->nested.vmxon = false; + + if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) + vmx->nested.smm.guest_mode = true; + } + + vmcs12 = get_vmcs12(vcpu); + if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12))) + return -EFAULT; + + if (vmcs12->hdr.revision_id != VMCS12_REVISION) + return -EINVAL; + + if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) + return 0; + + vmx->nested.nested_run_pending = + !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + + if (nested_cpu_has_shadow_vmcs(vmcs12) && + vmcs12->vmcs_link_pointer != -1ull) { + struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); + if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12)) + return -EINVAL; + + if (copy_from_user(shadow_vmcs12, + user_kvm_nested_state->data + VMCS12_SIZE, + sizeof(*vmcs12))) + return -EFAULT; + + if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || + !shadow_vmcs12->hdr.shadow_vmcs) + return -EINVAL; + } + + if (check_vmentry_prereqs(vcpu, vmcs12) || + check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) + return -EINVAL; + + if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING) + vmx->nested.nested_run_pending = 1; + + vmx->nested.dirty_vmcs12 = true; + ret = enter_vmx_non_root_mode(vcpu, NULL); + if (ret) + return -EINVAL; + + return 0; +} + static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -13290,7 +14021,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .vcpu_free = vmx_free_vcpu, .vcpu_reset = vmx_vcpu_reset, - .prepare_guest_switch = vmx_save_host_state, + .prepare_guest_switch = vmx_prepare_switch_to_guest, .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, @@ -13323,6 +14054,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .set_rflags = vmx_set_rflags, .tlb_flush = vmx_flush_tlb, + .tlb_flush_gva = vmx_flush_tlb_gva, .run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, @@ -13405,6 +14137,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .setup_mce = vmx_setup_mce, + .get_nested_state = vmx_get_nested_state, + .set_nested_state = vmx_set_nested_state, + .get_vmcs12_pages = nested_get_vmcs12_pages, + .smi_allowed = vmx_smi_allowed, .pre_enter_smm = vmx_pre_enter_smm, .pre_leave_smm = vmx_pre_leave_smm, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a5caa5e5480c..506bd2b4b8bb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -848,16 +848,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + bool skip_tlb_flush = false; #ifdef CONFIG_X86_64 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); - if (pcid_enabled) - cr3 &= ~CR3_PCID_INVD; + if (pcid_enabled) { + skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH; + cr3 &= ~X86_CR3_PCID_NOFLUSH; + } #endif if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { - kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + if (!skip_tlb_flush) { + kvm_mmu_sync_roots(vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + } return 0; } @@ -868,9 +873,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; + kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush); vcpu->arch.cr3 = cr3; __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); - kvm_mmu_new_cr3(vcpu); + return 0; } EXPORT_SYMBOL_GPL(kvm_set_cr3); @@ -2185,10 +2191,11 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.mcg_status = data; break; case MSR_IA32_MCG_CTL: - if (!(mcg_cap & MCG_CTL_P)) + if (!(mcg_cap & MCG_CTL_P) && + (data || !msr_info->host_initiated)) return 1; if (data != 0 && data != ~(u64)0) - return -1; + return 1; vcpu->arch.mcg_ctl = data; break; default: @@ -2576,7 +2583,7 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) } EXPORT_SYMBOL_GPL(kvm_get_msr); -static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) { u64 data; u64 mcg_cap = vcpu->arch.mcg_cap; @@ -2591,7 +2598,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) data = vcpu->arch.mcg_cap; break; case MSR_IA32_MCG_CTL: - if (!(mcg_cap & MCG_CTL_P)) + if (!(mcg_cap & MCG_CTL_P) && !host) return 1; data = vcpu->arch.mcg_ctl; break; @@ -2724,7 +2731,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: - return get_msr_mce(vcpu, msr_info->index, &msr_info->data); + return get_msr_mce(vcpu, msr_info->index, &msr_info->data, + msr_info->host_initiated); case MSR_K7_CLK_CTL: /* * Provide expected ramp-up count for K7. All other @@ -2745,7 +2753,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_STATUS: return kvm_hv_get_msr_common(vcpu, - msr_info->index, &msr_info->data); + msr_info->index, &msr_info->data, + msr_info->host_initiated); break; case MSR_IA32_BBL_CR_CTL3: /* This legacy MSR exists but isn't fully documented in current @@ -2969,6 +2978,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_X2APIC_API: r = KVM_X2APIC_API_VALID_FLAGS; break; + case KVM_CAP_NESTED_STATE: + r = kvm_x86_ops->get_nested_state ? + kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0; + break; default: break; } @@ -3985,6 +3998,56 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } + case KVM_GET_NESTED_STATE: { + struct kvm_nested_state __user *user_kvm_nested_state = argp; + u32 user_data_size; + + r = -EINVAL; + if (!kvm_x86_ops->get_nested_state) + break; + + BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size)); + if (get_user(user_data_size, &user_kvm_nested_state->size)) + return -EFAULT; + + r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state, + user_data_size); + if (r < 0) + return r; + + if (r > user_data_size) { + if (put_user(r, &user_kvm_nested_state->size)) + return -EFAULT; + return -E2BIG; + } + r = 0; + break; + } + case KVM_SET_NESTED_STATE: { + struct kvm_nested_state __user *user_kvm_nested_state = argp; + struct kvm_nested_state kvm_state; + + r = -EINVAL; + if (!kvm_x86_ops->set_nested_state) + break; + + if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state))) + return -EFAULT; + + if (kvm_state.size < sizeof(kvm_state)) + return -EINVAL; + + if (kvm_state.flags & + ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE)) + return -EINVAL; + + /* nested_run_pending implies guest_mode. */ + if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING) + return -EINVAL; + + r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); + break; + } default: r = -EINVAL; } @@ -6503,20 +6566,22 @@ static void kvm_set_mmio_spte_mask(void) * Set the reserved bits and the present bit of an paging-structure * entry to generate page fault with PFER.RSV = 1. */ - /* Mask the reserved physical address bits. */ - mask = rsvd_bits(maxphyaddr, 51); + + /* + * Mask the uppermost physical address bit, which would be reserved as + * long as the supported physical address width is less than 52. + */ + mask = 1ull << 51; /* Set the present bit. */ mask |= 1ull; -#ifdef CONFIG_X86_64 /* * If reserved bit is not supported, clear the present bit to disable * mmio page fault. */ - if (maxphyaddr == 52) + if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52) mask &= ~1ull; -#endif kvm_mmu_set_mmio_spte_mask(mask, mask); } @@ -6769,6 +6834,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_CLOCK_PAIRING: ret = kvm_pv_clock_pairing(vcpu, a0, a1); break; + case KVM_HC_SEND_IPI: + ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); + break; #endif default: ret = -KVM_ENOSYS; @@ -7235,8 +7303,9 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end) +int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end, + bool blockable) { unsigned long apic_address; @@ -7247,6 +7316,8 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); if (start <= apic_address && apic_address < end) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); + + return 0; } void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) @@ -7287,6 +7358,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) + kvm_x86_ops->get_vmcs12_pages(vcpu); if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -7302,6 +7375,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) kvm_mmu_sync_roots(vcpu); + if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu)) + kvm_mmu_load_cr3(vcpu); if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvm_vcpu_flush_tlb(vcpu, true); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { @@ -8013,6 +8088,10 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + (sregs->cr4 & X86_CR4_OSXSAVE)) + return -EINVAL; + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* * When EFER.LME and CR0.PG are set, the processor is in @@ -8043,10 +8122,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) struct desc_ptr dt; int ret = -EINVAL; - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - (sregs->cr4 & X86_CR4_OSXSAVE)) - goto out; - if (kvm_valid_sregs(vcpu, sregs)) goto out; diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 801491e98890..04d038f3b6fa 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -4,12 +4,15 @@ config ZONE_DMA config XTENSA def_bool y + select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select DMA_NONCOHERENT_OPS select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_IRQ_SHOW @@ -72,9 +75,6 @@ config TRACE_IRQFLAGS_SUPPORT config MMU def_bool n -config VARIANT_IRQ_SWITCH - def_bool n - config HAVE_XTENSA_GPIO32 def_bool n @@ -244,6 +244,23 @@ config INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX If in doubt, say Y. +config MEMMAP_CACHEATTR + hex "Cache attributes for the memory address space" + depends on !MMU + default 0x22222222 + help + These cache attributes are set up for noMMU systems. Each hex digit + specifies cache attributes for the corresponding 512MB memory + region: bits 0..3 -- for addresses 0x00000000..0x1fffffff, + bits 4..7 -- for addresses 0x20000000..0x3fffffff, and so on. + + Cache attribute values are specific for the MMU type, so e.g. + for region protection MMUs: 2 is cache bypass, 4 is WB cached, + 1 is WT cached, f is illegal. For ful MMU: bit 0 makes it executable, + bit 1 makes it writable, bits 2..3 meaning is 0: cache bypass, + 1: WB cache, 2: WT cache, 3: special (c and e are illegal, f is + reserved). + config KSEG_PADDR hex "Physical address of the KSEG mapping" depends on INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX && MMU @@ -413,6 +430,10 @@ config XTENSA_PLATFORM_XTFPGA endchoice +config PLATFORM_NR_IRQS + int + default 3 if XTENSA_PLATFORM_XT2000 + default 0 config XTENSA_CPU_CLOCK int "CPU clock rate [MHz]" @@ -450,6 +471,15 @@ config BUILTIN_DTB string "DTB to build into the kernel image" depends on OF +config PARSE_BOOTPARAM + bool "Parse bootparam block" + default y + help + Parse parameters passed to the kernel from the bootloader. It may + be disabled if the kernel is known to run without the bootloader. + + If unsure, say Y. + config BLK_DEV_SIMDISK tristate "Host file-based simulated block device support" default n @@ -506,25 +536,13 @@ config PLATFORM_WANT_DEFAULT_MEM def_bool n config DEFAULT_MEM_START - hex "Physical address of the default memory area start" - depends on PLATFORM_WANT_DEFAULT_MEM - default 0x00000000 if MMU - default 0x60000000 if !MMU - help - This is the base address of the default memory area. - Default memory area has platform-specific meaning, it may be used - for e.g. early cache initialization. - - If unsure, leave the default value here. - -config DEFAULT_MEM_SIZE - hex "Maximal size of the default memory area" - depends on PLATFORM_WANT_DEFAULT_MEM - default 0x04000000 + hex + prompt "PAGE_OFFSET/PHYS_OFFSET" if !MMU && PLATFORM_WANT_DEFAULT_MEM + default 0x60000000 if PLATFORM_WANT_DEFAULT_MEM + default 0x00000000 help - This is the size of the default memory area. - Default memory area has platform-specific meaning, it may be used - for e.g. early cache initialization. + This is the base address used for both PAGE_OFFSET and PHYS_OFFSET + in noMMU configurations. If unsure, leave the default value here. diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S index b6aa85328ac0..29c68426ab56 100644 --- a/arch/xtensa/boot/boot-elf/bootstrap.S +++ b/arch/xtensa/boot/boot-elf/bootstrap.S @@ -15,10 +15,6 @@ */ #include <asm/bootparam.h> -#include <asm/processor.h> -#include <asm/pgtable.h> -#include <asm/page.h> -#include <asm/cacheasm.h> #include <asm/initialize_mmu.h> #include <asm/vectors.h> #include <linux/linkage.h> @@ -33,16 +29,18 @@ _ResetVector: .begin no-absolute-literals .literal_position - .align 4 -RomInitAddr: #if defined(CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX) && \ XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY - .word CONFIG_KERNEL_LOAD_ADDRESS + .literal RomInitAddr, CONFIG_KERNEL_LOAD_ADDRESS #else - .word KERNELOFFSET + .literal RomInitAddr, KERNELOFFSET #endif -RomBootParam: - .word _bootparam +#ifndef CONFIG_PARSE_BOOTPARAM + .literal RomBootParam, 0 +#else + .literal RomBootParam, _bootparam + + .align 4 _bootparam: .short BP_TAG_FIRST .short 4 @@ -50,6 +48,7 @@ _bootparam: .short BP_TAG_LAST .short 0 .long 0 +#endif .align 4 _SetupMMU: diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index 624f9b3a3878..f3fc4f970ca8 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -33,13 +33,13 @@ CONFIG_XTENSA_VARIANT_CUSTOM_NAME="de212" # CONFIG_XTENSA_VARIANT_MMU is not set CONFIG_XTENSA_UNALIGNED_USER=y CONFIG_PREEMPT=y +CONFIG_MEMMAP_CACHEATTR=0xfff2442f # CONFIG_PCI is not set CONFIG_XTENSA_PLATFORM_XTFPGA=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon=uart8250,mmio32native,0x9d050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug memmap=256M@0x60000000" CONFIG_USE_OF=y CONFIG_BUILTIN_DTB="kc705_nommu" -CONFIG_DEFAULT_MEM_SIZE=0x10000000 CONFIG_BINFMT_FLAT=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index e5e1e61c538c..82c756431b49 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += compat.h generic-y += device.h generic-y += div64.h generic-y += dma-contiguous.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h diff --git a/arch/xtensa/include/asm/cacheasm.h b/arch/xtensa/include/asm/cacheasm.h index 2041abb10a23..34545ecfdd6b 100644 --- a/arch/xtensa/include/asm/cacheasm.h +++ b/arch/xtensa/include/asm/cacheasm.h @@ -31,16 +31,32 @@ * */ - .macro __loop_cache_all ar at insn size line_width - movi \ar, 0 + .macro __loop_cache_unroll ar at insn size line_width max_immed + + .if (1 << (\line_width)) > (\max_immed) + .set _reps, 1 + .elseif (2 << (\line_width)) > (\max_immed) + .set _reps, 2 + .else + .set _reps, 4 + .endif + + __loopi \ar, \at, \size, (_reps << (\line_width)) + .set _index, 0 + .rep _reps + \insn \ar, _index << (\line_width) + .set _index, _index + 1 + .endr + __endla \ar, \at, _reps << (\line_width) + + .endm + - __loopi \ar, \at, \size, (4 << (\line_width)) - \insn \ar, 0 << (\line_width) - \insn \ar, 1 << (\line_width) - \insn \ar, 2 << (\line_width) - \insn \ar, 3 << (\line_width) - __endla \ar, \at, 4 << (\line_width) + .macro __loop_cache_all ar at insn size line_width max_immed + + movi \ar, 0 + __loop_cache_unroll \ar, \at, \insn, \size, \line_width, \max_immed .endm @@ -57,14 +73,9 @@ .endm - .macro __loop_cache_page ar at insn line_width + .macro __loop_cache_page ar at insn line_width max_immed - __loopi \ar, \at, PAGE_SIZE, 4 << (\line_width) - \insn \ar, 0 << (\line_width) - \insn \ar, 1 << (\line_width) - \insn \ar, 2 << (\line_width) - \insn \ar, 3 << (\line_width) - __endla \ar, \at, 4 << (\line_width) + __loop_cache_unroll \ar, \at, \insn, PAGE_SIZE, \line_width, \max_immed .endm @@ -72,7 +83,8 @@ .macro ___unlock_dcache_all ar at #if XCHAL_DCACHE_LINE_LOCKABLE && XCHAL_DCACHE_SIZE - __loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH + __loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE \ + XCHAL_DCACHE_LINEWIDTH 240 #endif .endm @@ -81,7 +93,8 @@ .macro ___unlock_icache_all ar at #if XCHAL_ICACHE_LINE_LOCKABLE && XCHAL_ICACHE_SIZE - __loop_cache_all \ar \at iiu XCHAL_ICACHE_SIZE XCHAL_ICACHE_LINEWIDTH + __loop_cache_all \ar \at iiu XCHAL_ICACHE_SIZE \ + XCHAL_ICACHE_LINEWIDTH 240 #endif .endm @@ -90,7 +103,8 @@ .macro ___flush_invalidate_dcache_all ar at #if XCHAL_DCACHE_SIZE - __loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH + __loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE \ + XCHAL_DCACHE_LINEWIDTH 240 #endif .endm @@ -99,7 +113,8 @@ .macro ___flush_dcache_all ar at #if XCHAL_DCACHE_SIZE - __loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH + __loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE \ + XCHAL_DCACHE_LINEWIDTH 240 #endif .endm @@ -108,8 +123,8 @@ .macro ___invalidate_dcache_all ar at #if XCHAL_DCACHE_SIZE - __loop_cache_all \ar \at dii __stringify(DCACHE_WAY_SIZE) \ - XCHAL_DCACHE_LINEWIDTH + __loop_cache_all \ar \at dii XCHAL_DCACHE_SIZE \ + XCHAL_DCACHE_LINEWIDTH 1020 #endif .endm @@ -118,8 +133,8 @@ .macro ___invalidate_icache_all ar at #if XCHAL_ICACHE_SIZE - __loop_cache_all \ar \at iii __stringify(ICACHE_WAY_SIZE) \ - XCHAL_ICACHE_LINEWIDTH + __loop_cache_all \ar \at iii XCHAL_ICACHE_SIZE \ + XCHAL_ICACHE_LINEWIDTH 1020 #endif .endm @@ -166,7 +181,7 @@ .macro ___flush_invalidate_dcache_page ar as #if XCHAL_DCACHE_SIZE - __loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH + __loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH 1020 #endif .endm @@ -175,7 +190,7 @@ .macro ___flush_dcache_page ar as #if XCHAL_DCACHE_SIZE - __loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH + __loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH 1020 #endif .endm @@ -184,7 +199,7 @@ .macro ___invalidate_dcache_page ar as #if XCHAL_DCACHE_SIZE - __loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH + __loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH 1020 #endif .endm @@ -193,7 +208,7 @@ .macro ___invalidate_icache_page ar as #if XCHAL_ICACHE_SIZE - __loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH + __loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH 1020 #endif .endm diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h deleted file mode 100644 index 44098800dad7..000000000000 --- a/arch/xtensa/include/asm/dma-mapping.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2003 - 2005 Tensilica Inc. - * Copyright (C) 2015 Cadence Design Systems Inc. - */ - -#ifndef _XTENSA_DMA_MAPPING_H -#define _XTENSA_DMA_MAPPING_H - -#include <asm/cache.h> -#include <asm/io.h> - -#include <linux/mm.h> -#include <linux/scatterlist.h> - -extern const struct dma_map_ops xtensa_dma_map_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &xtensa_dma_map_ops; -} - -#endif /* _XTENSA_DMA_MAPPING_H */ diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h index 42410f253597..10e9852b2fb4 100644 --- a/arch/xtensa/include/asm/initialize_mmu.h +++ b/arch/xtensa/include/asm/initialize_mmu.h @@ -177,36 +177,36 @@ #endif /* defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY */ -#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS && \ - (XCHAL_DCACHE_SIZE || XCHAL_ICACHE_SIZE) - /* Enable data and instruction cache in the DEFAULT_MEMORY region - * if the processor has DTLB and ITLB. - */ + .endm + + .macro initialize_cacheattr - movi a5, PLATFORM_DEFAULT_MEM_START | XCHAL_SPANNING_WAY +#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS +#if CONFIG_MEMMAP_CACHEATTR == 0x22222222 && XCHAL_HAVE_PTP_MMU +#error Default MEMMAP_CACHEATTR of 0x22222222 does not work with full MMU. +#endif + + movi a5, XCHAL_SPANNING_WAY movi a6, ~_PAGE_ATTRIB_MASK - movi a7, CA_WRITEBACK + movi a4, CONFIG_MEMMAP_CACHEATTR movi a8, 0x20000000 - movi a9, PLATFORM_DEFAULT_MEM_SIZE - j 2f 1: - sub a9, a9, a8 -2: -#if XCHAL_DCACHE_SIZE rdtlb1 a3, a5 + xor a3, a3, a4 and a3, a3, a6 - or a3, a3, a7 + xor a3, a3, a4 wdtlb a3, a5 -#endif -#if XCHAL_ICACHE_SIZE - ritlb1 a4, a5 - and a4, a4, a6 - or a4, a4, a7 - witlb a4, a5 -#endif + ritlb1 a3, a5 + xor a3, a3, a4 + and a3, a3, a6 + xor a3, a3, a4 + witlb a3, a5 + add a5, a5, a8 - bltu a8, a9, 1b + srli a4, a4, 4 + bgeu a5, a8, 1b + isync #endif .endm diff --git a/arch/xtensa/include/asm/irq.h b/arch/xtensa/include/asm/irq.h index 19707db966f1..6c6ed23e0c79 100644 --- a/arch/xtensa/include/asm/irq.h +++ b/arch/xtensa/include/asm/irq.h @@ -12,32 +12,17 @@ #define _XTENSA_IRQ_H #include <linux/init.h> -#include <platform/hardware.h> #include <variant/core.h> -#ifdef CONFIG_VARIANT_IRQ_SWITCH -#include <variant/irq.h> +#ifdef CONFIG_PLATFORM_NR_IRQS +# define PLATFORM_NR_IRQS CONFIG_PLATFORM_NR_IRQS #else -static inline void variant_irq_enable(unsigned int irq) { } -static inline void variant_irq_disable(unsigned int irq) { } -#endif - -#ifndef VARIANT_NR_IRQS -# define VARIANT_NR_IRQS 0 -#endif -#ifndef PLATFORM_NR_IRQS # define PLATFORM_NR_IRQS 0 #endif #define XTENSA_NR_IRQS XCHAL_NUM_INTERRUPTS -#define NR_IRQS (XTENSA_NR_IRQS + VARIANT_NR_IRQS + PLATFORM_NR_IRQS + 1) +#define NR_IRQS (XTENSA_NR_IRQS + PLATFORM_NR_IRQS + 1) #define XTENSA_PIC_LINUX_IRQ(hwirq) ((hwirq) + 1) -#if VARIANT_NR_IRQS == 0 -static inline void variant_init_irq(void) { } -#else -void variant_init_irq(void) __init; -#endif - static __inline__ int irq_canonicalize(int irq) { return (irq); diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h index 2317c835a4db..9c12babc016c 100644 --- a/arch/xtensa/include/asm/kmem_layout.h +++ b/arch/xtensa/include/asm/kmem_layout.h @@ -63,12 +63,6 @@ #error XCHAL_KSEG_PADDR is not properly aligned to XCHAL_KSEG_ALIGNMENT #endif -#else - -#define XCHAL_KSEG_CACHED_VADDR __XTENSA_UL_CONST(0xd0000000) -#define XCHAL_KSEG_BYPASS_VADDR __XTENSA_UL_CONST(0xd8000000) -#define XCHAL_KSEG_SIZE __XTENSA_UL_CONST(0x08000000) - #endif #ifndef CONFIG_KASAN diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index 5d69c11c01b8..09c56cba442e 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -14,7 +14,6 @@ #include <asm/processor.h> #include <asm/types.h> #include <asm/cache.h> -#include <platform/hardware.h> #include <asm/kmem_layout.h> /* @@ -31,8 +30,8 @@ #define MAX_LOW_PFN (PHYS_PFN(XCHAL_KSEG_PADDR) + \ PHYS_PFN(XCHAL_KSEG_SIZE)) #else -#define PAGE_OFFSET PLATFORM_DEFAULT_MEM_START -#define PHYS_OFFSET PLATFORM_DEFAULT_MEM_START +#define PAGE_OFFSET _AC(CONFIG_DEFAULT_MEM_START, UL) +#define PHYS_OFFSET _AC(CONFIG_DEFAULT_MEM_START, UL) #define MAX_LOW_PFN PHYS_PFN(0xfffffffful) #endif diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 38802259978f..29cfe421cf41 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -66,6 +66,7 @@ #define FIRST_USER_ADDRESS 0UL #define FIRST_USER_PGD_NR (FIRST_USER_ADDRESS >> PGDIR_SHIFT) +#ifdef CONFIG_MMU /* * Virtual memory area. We keep a distance to other memory regions to be * on the safe side. We also use this area for cache aliasing. @@ -80,6 +81,13 @@ #define TLBTEMP_SIZE ICACHE_WAY_SIZE #endif +#else + +#define VMALLOC_START __XTENSA_UL_CONST(0) +#define VMALLOC_END __XTENSA_UL_CONST(0xffffffff) + +#endif + /* * For the Xtensa architecture, the PTE layout is as follows: * diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h index f8fbef67bc5f..560483356a06 100644 --- a/arch/xtensa/include/asm/platform.h +++ b/arch/xtensa/include/asm/platform.h @@ -75,4 +75,31 @@ extern void platform_calibrate_ccount (void); */ void cpu_reset(void) __attribute__((noreturn)); +/* + * Memory caching is platform-dependent in noMMU xtensa configurations. + * The following set of functions should be implemented in platform code + * in order to enable coherent DMA memory operations when CONFIG_MMU is not + * enabled. Default implementations do nothing and issue a warning. + */ + +/* + * Check whether p points to a cached memory. + */ +bool platform_vaddr_cached(const void *p); + +/* + * Check whether p points to an uncached memory. + */ +bool platform_vaddr_uncached(const void *p); + +/* + * Return pointer to an uncached view of the cached sddress p. + */ +void *platform_vaddr_to_uncached(void *p); + +/* + * Return pointer to a cached view of the uncached sddress p. + */ +void *platform_vaddr_to_cached(void *p); + #endif /* _XTENSA_PLATFORM_H */ diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 5b0027d4ecc0..e4ccb88b7996 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -11,7 +11,6 @@ #define _XTENSA_PROCESSOR_H #include <variant/core.h> -#include <platform/hardware.h> #include <linux/compiler.h> #include <asm/ptrace.h> diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h index 65d3da9db19b..7111280c8842 100644 --- a/arch/xtensa/include/asm/vectors.h +++ b/arch/xtensa/include/asm/vectors.h @@ -19,7 +19,6 @@ #define _XTENSA_VECTORS_H #include <variant/core.h> -#include <platform/hardware.h> #include <asm/kmem_layout.h> #if XCHAL_HAVE_PTP_MMU diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 9c4e9433e536..2f76118ecf62 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -181,6 +181,8 @@ ENTRY(_startup) isync + initialize_cacheattr + #ifdef CONFIG_HAVE_SMP movi a2, CCON # MX External Register to Configure Cache movi a3, 1 diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index 18e4ef34ac45..a48bf2d10ac2 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -158,7 +158,6 @@ void __init init_IRQ(void) #ifdef CONFIG_SMP ipi_init(); #endif - variant_init_irq(); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index a02dc563d290..1fc138b6bc0a 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -16,26 +16,25 @@ */ #include <linux/dma-contiguous.h> +#include <linux/dma-noncoherent.h> #include <linux/dma-direct.h> #include <linux/gfp.h> #include <linux/highmem.h> #include <linux/mm.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/string.h> #include <linux/types.h> #include <asm/cacheflush.h> #include <asm/io.h> +#include <asm/platform.h> -static void do_cache_op(dma_addr_t dma_handle, size_t size, +static void do_cache_op(phys_addr_t paddr, size_t size, void (*fn)(unsigned long, unsigned long)) { - unsigned long off = dma_handle & (PAGE_SIZE - 1); - unsigned long pfn = PFN_DOWN(dma_handle); + unsigned long off = paddr & (PAGE_SIZE - 1); + unsigned long pfn = PFN_DOWN(paddr); struct page *page = pfn_to_page(pfn); if (!PageHighMem(page)) - fn((unsigned long)bus_to_virt(dma_handle), size); + fn((unsigned long)phys_to_virt(paddr), size); else while (size > 0) { size_t sz = min_t(size_t, size, PAGE_SIZE - off); @@ -49,14 +48,13 @@ static void do_cache_op(dma_addr_t dma_handle, size_t size, } } -static void xtensa_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { switch (dir) { case DMA_BIDIRECTIONAL: case DMA_FROM_DEVICE: - do_cache_op(dma_handle, size, __invalidate_dcache_range); + do_cache_op(paddr, size, __invalidate_dcache_range); break; case DMA_NONE: @@ -68,15 +66,14 @@ static void xtensa_sync_single_for_cpu(struct device *dev, } } -static void xtensa_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { switch (dir) { case DMA_BIDIRECTIONAL: case DMA_TO_DEVICE: if (XCHAL_DCACHE_IS_WRITEBACK) - do_cache_op(dma_handle, size, __flush_dcache_range); + do_cache_op(paddr, size, __flush_dcache_range); break; case DMA_NONE: @@ -88,43 +85,66 @@ static void xtensa_sync_single_for_device(struct device *dev, } } -static void xtensa_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir) +#ifdef CONFIG_MMU +bool platform_vaddr_cached(const void *p) { - struct scatterlist *s; - int i; + unsigned long addr = (unsigned long)p; - for_each_sg(sg, s, nents, i) { - xtensa_sync_single_for_cpu(dev, sg_dma_address(s), - sg_dma_len(s), dir); - } + return addr >= XCHAL_KSEG_CACHED_VADDR && + addr - XCHAL_KSEG_CACHED_VADDR < XCHAL_KSEG_SIZE; } -static void xtensa_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir) +bool platform_vaddr_uncached(const void *p) { - struct scatterlist *s; - int i; + unsigned long addr = (unsigned long)p; - for_each_sg(sg, s, nents, i) { - xtensa_sync_single_for_device(dev, sg_dma_address(s), - sg_dma_len(s), dir); - } + return addr >= XCHAL_KSEG_BYPASS_VADDR && + addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE; +} + +void *platform_vaddr_to_uncached(void *p) +{ + return p + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR; +} + +void *platform_vaddr_to_cached(void *p) +{ + return p + XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; +} +#else +bool __attribute__((weak)) platform_vaddr_cached(const void *p) +{ + WARN_ONCE(1, "Default %s implementation is used\n", __func__); + return true; +} + +bool __attribute__((weak)) platform_vaddr_uncached(const void *p) +{ + WARN_ONCE(1, "Default %s implementation is used\n", __func__); + return false; +} + +void __attribute__((weak)) *platform_vaddr_to_uncached(void *p) +{ + WARN_ONCE(1, "Default %s implementation is used\n", __func__); + return p; +} + +void __attribute__((weak)) *platform_vaddr_to_cached(void *p) +{ + WARN_ONCE(1, "Default %s implementation is used\n", __func__); + return p; } +#endif /* * Note: We assume that the full memory space is always mapped to 'kseg' * Otherwise we have to use page attributes (not implemented). */ -static void *xtensa_dma_alloc(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t flag, - unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t flag, unsigned long attrs) { - unsigned long ret; - unsigned long uncached; unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page = NULL; @@ -147,6 +167,10 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, *handle = phys_to_dma(dev, page_to_phys(page)); + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + return page; + } + #ifdef CONFIG_MMU if (PageHighMem(page)) { void *p; @@ -161,27 +185,21 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, return p; } #endif - ret = (unsigned long)page_address(page); - BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR || - ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); - - uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR; - __invalidate_dcache_range(ret, size); - - return (void *)uncached; + BUG_ON(!platform_vaddr_cached(page_address(page))); + __invalidate_dcache_range((unsigned long)page_address(page), size); + return platform_vaddr_to_uncached(page_address(page)); } -static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) { unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long addr = (unsigned long)vaddr; struct page *page; - if (addr >= XCHAL_KSEG_BYPASS_VADDR && - addr - XCHAL_KSEG_BYPASS_VADDR < XCHAL_KSEG_SIZE) { - addr += XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; - page = virt_to_page(addr); + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { + page = vaddr; + } else if (platform_vaddr_uncached(vaddr)) { + page = virt_to_page(platform_vaddr_to_cached(vaddr)); } else { #ifdef CONFIG_MMU dma_common_free_remap(vaddr, size, VM_MAP); @@ -192,72 +210,3 @@ static void xtensa_dma_free(struct device *dev, size_t size, void *vaddr, if (!dma_release_from_contiguous(dev, page, count)) __free_pages(page, get_order(size)); } - -static dma_addr_t xtensa_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t dma_handle = page_to_phys(page) + offset; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - xtensa_sync_single_for_device(dev, dma_handle, size, dir); - - return dma_handle; -} - -static void xtensa_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - xtensa_sync_single_for_cpu(dev, dma_handle, size, dir); -} - -static int xtensa_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) { - s->dma_address = xtensa_map_page(dev, sg_page(s), s->offset, - s->length, dir, attrs); - } - return nents; -} - -static void xtensa_unmap_sg(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) { - xtensa_unmap_page(dev, sg_dma_address(s), - sg_dma_len(s), dir, attrs); - } -} - -int xtensa_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} - -const struct dma_map_ops xtensa_dma_map_ops = { - .alloc = xtensa_dma_alloc, - .free = xtensa_dma_free, - .map_page = xtensa_map_page, - .unmap_page = xtensa_unmap_page, - .map_sg = xtensa_map_sg, - .unmap_sg = xtensa_unmap_sg, - .sync_single_for_cpu = xtensa_sync_single_for_cpu, - .sync_single_for_device = xtensa_sync_single_for_device, - .sync_sg_for_cpu = xtensa_sync_sg_for_cpu, - .sync_sg_for_device = xtensa_sync_sg_for_device, - .mapping_error = xtensa_dma_mapping_error, -}; -EXPORT_SYMBOL(xtensa_dma_map_ops); diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 686a27444bba..351283b60df6 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -47,8 +47,6 @@ #include <asm/smp.h> #include <asm/sysmem.h> -#include <platform/hardware.h> - #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) struct screen_info screen_info = { .orig_x = 0, @@ -81,6 +79,7 @@ static char __initdata command_line[COMMAND_LINE_SIZE]; static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE; #endif +#ifdef CONFIG_PARSE_BOOTPARAM /* * Boot parameter parsing. * @@ -178,6 +177,13 @@ static int __init parse_bootparam(const bp_tag_t* tag) return 0; } +#else +static int __init parse_bootparam(const bp_tag_t *tag) +{ + pr_info("Ignoring boot parameters at %p\n", tag); + return 0; +} +#endif #ifdef CONFIG_OF diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 70b731edc7b8..a1c3edb8ad56 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -20,7 +20,7 @@ #include <asm/vectors.h> #include <variant/core.h> -#include <platform/hardware.h> + OUTPUT_ARCH(xtensa) ENTRY(_start) diff --git a/arch/xtensa/platforms/iss/include/platform/hardware.h b/arch/xtensa/platforms/iss/include/platform/hardware.h deleted file mode 100644 index 6930c12adc16..000000000000 --- a/arch/xtensa/platforms/iss/include/platform/hardware.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * include/asm-xtensa/platform-iss/hardware.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 Tensilica Inc. - */ - -/* - * This file contains the default configuration of ISS. - */ - -#ifndef _XTENSA_PLATFORM_ISS_HARDWARE_H -#define _XTENSA_PLATFORM_ISS_HARDWARE_H - -/* - * Memory configuration. - */ - -#define PLATFORM_DEFAULT_MEM_START 0x00000000 -#define PLATFORM_DEFAULT_MEM_SIZE 0x08000000 - -/* - * Interrupt configuration. - */ - -#endif /* _XTENSA_PLATFORM_ISS_HARDWARE_H */ diff --git a/arch/xtensa/platforms/xt2000/include/platform/hardware.h b/arch/xtensa/platforms/xt2000/include/platform/hardware.h index 886ef156ded3..8e5e0d6a81ec 100644 --- a/arch/xtensa/platforms/xt2000/include/platform/hardware.h +++ b/arch/xtensa/platforms/xt2000/include/platform/hardware.h @@ -17,17 +17,6 @@ #include <variant/core.h> -/* - * Memory configuration. - */ - -#define PLATFORM_DEFAULT_MEM_START 0x00000000 -#define PLATFORM_DEFAULT_MEM_SIZE 0x08000000 - -/* - * Number of platform IRQs - */ -#define PLATFORM_NR_IRQS 3 /* * On-board components. */ diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h index 1fda7e20dfcb..30d9cb6cf168 100644 --- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h +++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h @@ -17,15 +17,6 @@ #ifndef __XTENSA_XTAVNET_HARDWARE_H #define __XTENSA_XTAVNET_HARDWARE_H -/* Memory configuration. */ - -#define PLATFORM_DEFAULT_MEM_START __XTENSA_UL(CONFIG_DEFAULT_MEM_START) -#define PLATFORM_DEFAULT_MEM_SIZE __XTENSA_UL(CONFIG_DEFAULT_MEM_SIZE) - -/* Interrupt configuration. */ - -#define PLATFORM_NR_IRQS 0 - /* Default assignment of LX60 devices to external interrupts. */ #ifdef CONFIG_XTENSA_MX diff --git a/arch/xtensa/variants/test_kc705_be/include/variant/core.h b/arch/xtensa/variants/test_kc705_be/include/variant/core.h new file mode 100644 index 000000000000..a4ebdf7197d7 --- /dev/null +++ b/arch/xtensa/variants/test_kc705_be/include/variant/core.h @@ -0,0 +1,575 @@ +/* + * xtensa/config/core-isa.h -- HAL definitions that are dependent on Xtensa + * processor CORE configuration + * + * See <xtensa/config/core.h>, which includes this file, for more details. + */ + +/* Xtensa processor core configuration information. + + Copyright (c) 1999-2015 Tensilica Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _XTENSA_CORE_CONFIGURATION_H +#define _XTENSA_CORE_CONFIGURATION_H + + +/**************************************************************************** + Parameters Useful for Any Code, USER or PRIVILEGED + ****************************************************************************/ + +/* + * Note: Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is + * configured, and a value of 0 otherwise. These macros are always defined. + */ + + +/*---------------------------------------------------------------------- + ISA + ----------------------------------------------------------------------*/ + +#define XCHAL_HAVE_BE 1 /* big-endian byte ordering */ +#define XCHAL_HAVE_WINDOWED 1 /* windowed registers option */ +#define XCHAL_NUM_AREGS 32 /* num of physical addr regs */ +#define XCHAL_NUM_AREGS_LOG2 5 /* log2(XCHAL_NUM_AREGS) */ +#define XCHAL_MAX_INSTRUCTION_SIZE 8 /* max instr bytes (3..8) */ +#define XCHAL_HAVE_DEBUG 1 /* debug option */ +#define XCHAL_HAVE_DENSITY 1 /* 16-bit instructions */ +#define XCHAL_HAVE_LOOPS 1 /* zero-overhead loops */ +#define XCHAL_LOOP_BUFFER_SIZE 0 /* zero-ov. loop instr buffer size */ +#define XCHAL_HAVE_NSA 1 /* NSA/NSAU instructions */ +#define XCHAL_HAVE_MINMAX 1 /* MIN/MAX instructions */ +#define XCHAL_HAVE_SEXT 1 /* SEXT instruction */ +#define XCHAL_HAVE_DEPBITS 0 /* DEPBITS instruction */ +#define XCHAL_HAVE_CLAMPS 1 /* CLAMPS instruction */ +#define XCHAL_HAVE_MUL16 1 /* MUL16S/MUL16U instructions */ +#define XCHAL_HAVE_MUL32 1 /* MULL instruction */ +#define XCHAL_HAVE_MUL32_HIGH 1 /* MULUH/MULSH instructions */ +#define XCHAL_HAVE_DIV32 1 /* QUOS/QUOU/REMS/REMU instructions */ +#define XCHAL_HAVE_L32R 1 /* L32R instruction */ +#define XCHAL_HAVE_ABSOLUTE_LITERALS 0 /* non-PC-rel (extended) L32R */ +#define XCHAL_HAVE_CONST16 0 /* CONST16 instruction */ +#define XCHAL_HAVE_ADDX 1 /* ADDX#/SUBX# instructions */ +#define XCHAL_HAVE_WIDE_BRANCHES 0 /* B*.W18 or B*.W15 instr's */ +#define XCHAL_HAVE_PREDICTED_BRANCHES 0 /* B[EQ/EQZ/NE/NEZ]T instr's */ +#define XCHAL_HAVE_CALL4AND12 1 /* (obsolete option) */ +#define XCHAL_HAVE_ABS 1 /* ABS instruction */ +/*#define XCHAL_HAVE_POPC 0*/ /* POPC instruction */ +/*#define XCHAL_HAVE_CRC 0*/ /* CRC instruction */ +#define XCHAL_HAVE_RELEASE_SYNC 1 /* L32AI/S32RI instructions */ +#define XCHAL_HAVE_S32C1I 1 /* S32C1I instruction */ +#define XCHAL_HAVE_SPECULATION 0 /* speculation */ +#define XCHAL_HAVE_FULL_RESET 1 /* all regs/state reset */ +#define XCHAL_NUM_CONTEXTS 1 /* */ +#define XCHAL_NUM_MISC_REGS 2 /* num of scratch regs (0..4) */ +#define XCHAL_HAVE_TAP_MASTER 0 /* JTAG TAP control instr's */ +#define XCHAL_HAVE_PRID 1 /* processor ID register */ +#define XCHAL_HAVE_EXTERN_REGS 1 /* WER/RER instructions */ +#define XCHAL_HAVE_MX 0 /* MX core (Tensilica internal) */ +#define XCHAL_HAVE_MP_INTERRUPTS 0 /* interrupt distributor port */ +#define XCHAL_HAVE_MP_RUNSTALL 0 /* core RunStall control port */ +#define XCHAL_HAVE_PSO 0 /* Power Shut-Off */ +#define XCHAL_HAVE_PSO_CDM 0 /* core/debug/mem pwr domains */ +#define XCHAL_HAVE_PSO_FULL_RETENTION 0 /* all regs preserved on PSO */ +#define XCHAL_HAVE_THREADPTR 1 /* THREADPTR register */ +#define XCHAL_HAVE_BOOLEANS 1 /* boolean registers */ +#define XCHAL_HAVE_CP 1 /* CPENABLE reg (coprocessor) */ +#define XCHAL_CP_MAXCFG 8 /* max allowed cp id plus one */ +#define XCHAL_HAVE_MAC16 1 /* MAC16 package */ + +#define XCHAL_HAVE_FUSION 0 /* Fusion*/ +#define XCHAL_HAVE_FUSION_FP 0 /* Fusion FP option */ +#define XCHAL_HAVE_FUSION_LOW_POWER 0 /* Fusion Low Power option */ +#define XCHAL_HAVE_FUSION_AES 0 /* Fusion BLE/Wifi AES-128 CCM option */ +#define XCHAL_HAVE_FUSION_CONVENC 0 /* Fusion Conv Encode option */ +#define XCHAL_HAVE_FUSION_LFSR_CRC 0 /* Fusion LFSR-CRC option */ +#define XCHAL_HAVE_FUSION_BITOPS 0 /* Fusion Bit Operations Support option */ +#define XCHAL_HAVE_FUSION_AVS 0 /* Fusion AVS option */ +#define XCHAL_HAVE_FUSION_16BIT_BASEBAND 0 /* Fusion 16-bit Baseband option */ +#define XCHAL_HAVE_HIFIPRO 0 /* HiFiPro Audio Engine pkg */ +#define XCHAL_HAVE_HIFI4 0 /* HiFi4 Audio Engine pkg */ +#define XCHAL_HAVE_HIFI4_VFPU 0 /* HiFi4 Audio Engine VFPU option */ +#define XCHAL_HAVE_HIFI3 0 /* HiFi3 Audio Engine pkg */ +#define XCHAL_HAVE_HIFI3_VFPU 0 /* HiFi3 Audio Engine VFPU option */ +#define XCHAL_HAVE_HIFI2 1 /* HiFi2 Audio Engine pkg */ +#define XCHAL_HAVE_HIFI2_MUL32X24 1 /* HiFi2 and 32x24 MACs */ +#define XCHAL_HAVE_HIFI2EP 1 /* HiFi2EP */ +#define XCHAL_HAVE_HIFI_MINI 0 + + +#define XCHAL_HAVE_VECTORFPU2005 0 /* vector or user floating-point pkg */ +#define XCHAL_HAVE_USER_DPFPU 0 /* user DP floating-point pkg */ +#define XCHAL_HAVE_USER_SPFPU 0 /* user DP floating-point pkg */ +#define XCHAL_HAVE_FP 0 /* single prec floating point */ +#define XCHAL_HAVE_FP_DIV 0 /* FP with DIV instructions */ +#define XCHAL_HAVE_FP_RECIP 0 /* FP with RECIP instructions */ +#define XCHAL_HAVE_FP_SQRT 0 /* FP with SQRT instructions */ +#define XCHAL_HAVE_FP_RSQRT 0 /* FP with RSQRT instructions */ +#define XCHAL_HAVE_DFP 0 /* double precision FP pkg */ +#define XCHAL_HAVE_DFP_DIV 0 /* DFP with DIV instructions */ +#define XCHAL_HAVE_DFP_RECIP 0 /* DFP with RECIP instructions*/ +#define XCHAL_HAVE_DFP_SQRT 0 /* DFP with SQRT instructions */ +#define XCHAL_HAVE_DFP_RSQRT 0 /* DFP with RSQRT instructions*/ +#define XCHAL_HAVE_DFP_ACCEL 0 /* double precision FP acceleration pkg */ +#define XCHAL_HAVE_DFP_accel XCHAL_HAVE_DFP_ACCEL /* for backward compatibility */ + +#define XCHAL_HAVE_DFPU_SINGLE_ONLY 0 /* DFPU Coprocessor, single precision only */ +#define XCHAL_HAVE_DFPU_SINGLE_DOUBLE 0 /* DFPU Coprocessor, single and double precision */ +#define XCHAL_HAVE_VECTRA1 0 /* Vectra I pkg */ +#define XCHAL_HAVE_VECTRALX 0 /* Vectra LX pkg */ +#define XCHAL_HAVE_PDX4 0 /* PDX4 */ +#define XCHAL_HAVE_CONNXD2 0 /* ConnX D2 pkg */ +#define XCHAL_HAVE_CONNXD2_DUALLSFLIX 0 /* ConnX D2 & Dual LoadStore Flix */ +#define XCHAL_HAVE_BBE16 0 /* ConnX BBE16 pkg */ +#define XCHAL_HAVE_BBE16_RSQRT 0 /* BBE16 & vector recip sqrt */ +#define XCHAL_HAVE_BBE16_VECDIV 0 /* BBE16 & vector divide */ +#define XCHAL_HAVE_BBE16_DESPREAD 0 /* BBE16 & despread */ +#define XCHAL_HAVE_BBENEP 0 /* ConnX BBENEP pkgs */ +#define XCHAL_HAVE_BSP3 0 /* ConnX BSP3 pkg */ +#define XCHAL_HAVE_BSP3_TRANSPOSE 0 /* BSP3 & transpose32x32 */ +#define XCHAL_HAVE_SSP16 0 /* ConnX SSP16 pkg */ +#define XCHAL_HAVE_SSP16_VITERBI 0 /* SSP16 & viterbi */ +#define XCHAL_HAVE_TURBO16 0 /* ConnX Turbo16 pkg */ +#define XCHAL_HAVE_BBP16 0 /* ConnX BBP16 pkg */ +#define XCHAL_HAVE_FLIX3 0 /* basic 3-way FLIX option */ +#define XCHAL_HAVE_GRIVPEP 0 /* GRIVPEP is General Release of IVPEP */ +#define XCHAL_HAVE_GRIVPEP_HISTOGRAM 0 /* Histogram option on GRIVPEP */ + + +/*---------------------------------------------------------------------- + MISC + ----------------------------------------------------------------------*/ + +#define XCHAL_NUM_LOADSTORE_UNITS 1 /* load/store units */ +#define XCHAL_NUM_WRITEBUFFER_ENTRIES 8 /* size of write buffer */ +#define XCHAL_INST_FETCH_WIDTH 8 /* instr-fetch width in bytes */ +#define XCHAL_DATA_WIDTH 8 /* data width in bytes */ +#define XCHAL_DATA_PIPE_DELAY 1 /* d-side pipeline delay + (1 = 5-stage, 2 = 7-stage) */ +#define XCHAL_CLOCK_GATING_GLOBAL 0 /* global clock gating */ +#define XCHAL_CLOCK_GATING_FUNCUNIT 0 /* funct. unit clock gating */ +/* In T1050, applies to selected core load and store instructions (see ISA): */ +#define XCHAL_UNALIGNED_LOAD_EXCEPTION 1 /* unaligned loads cause exc. */ +#define XCHAL_UNALIGNED_STORE_EXCEPTION 1 /* unaligned stores cause exc.*/ +#define XCHAL_UNALIGNED_LOAD_HW 0 /* unaligned loads work in hw */ +#define XCHAL_UNALIGNED_STORE_HW 0 /* unaligned stores work in hw*/ + +#define XCHAL_SW_VERSION 1100002 /* sw version of this header */ + +#define XCHAL_CORE_ID "test_kc705_be" /* alphanum core name + (CoreID) set in the Xtensa + Processor Generator */ + +#define XCHAL_BUILD_UNIQUE_ID 0x00058D8C /* 22-bit sw build ID */ + +/* + * These definitions describe the hardware targeted by this software. + */ +#define XCHAL_HW_CONFIGID0 0xC1B3FFFF /* ConfigID hi 32 bits*/ +#define XCHAL_HW_CONFIGID1 0x1C858D8C /* ConfigID lo 32 bits*/ +#define XCHAL_HW_VERSION_NAME "LX6.0.2" /* full version name */ +#define XCHAL_HW_VERSION_MAJOR 2600 /* major ver# of targeted hw */ +#define XCHAL_HW_VERSION_MINOR 2 /* minor ver# of targeted hw */ +#define XCHAL_HW_VERSION 260002 /* major*100+minor */ +#define XCHAL_HW_REL_LX6 1 +#define XCHAL_HW_REL_LX6_0 1 +#define XCHAL_HW_REL_LX6_0_2 1 +#define XCHAL_HW_CONFIGID_RELIABLE 1 +/* If software targets a *range* of hardware versions, these are the bounds: */ +#define XCHAL_HW_MIN_VERSION_MAJOR 2600 /* major v of earliest tgt hw */ +#define XCHAL_HW_MIN_VERSION_MINOR 2 /* minor v of earliest tgt hw */ +#define XCHAL_HW_MIN_VERSION 260002 /* earliest targeted hw */ +#define XCHAL_HW_MAX_VERSION_MAJOR 2600 /* major v of latest tgt hw */ +#define XCHAL_HW_MAX_VERSION_MINOR 2 /* minor v of latest tgt hw */ +#define XCHAL_HW_MAX_VERSION 260002 /* latest targeted hw */ + + +/*---------------------------------------------------------------------- + CACHE + ----------------------------------------------------------------------*/ + +#define XCHAL_ICACHE_LINESIZE 32 /* I-cache line size in bytes */ +#define XCHAL_DCACHE_LINESIZE 32 /* D-cache line size in bytes */ +#define XCHAL_ICACHE_LINEWIDTH 5 /* log2(I line size in bytes) */ +#define XCHAL_DCACHE_LINEWIDTH 5 /* log2(D line size in bytes) */ + +#define XCHAL_ICACHE_SIZE 16384 /* I-cache size in bytes or 0 */ +#define XCHAL_DCACHE_SIZE 16384 /* D-cache size in bytes or 0 */ + +#define XCHAL_DCACHE_IS_WRITEBACK 1 /* writeback feature */ +#define XCHAL_DCACHE_IS_COHERENT 0 /* MP coherence feature */ + +#define XCHAL_HAVE_PREFETCH 1 /* PREFCTL register */ +#define XCHAL_HAVE_PREFETCH_L1 0 /* prefetch to L1 dcache */ +#define XCHAL_PREFETCH_CASTOUT_LINES 1 /* dcache pref. castout bufsz */ +#define XCHAL_PREFETCH_ENTRIES 8 /* cache prefetch entries */ +#define XCHAL_PREFETCH_BLOCK_ENTRIES 0 /* prefetch block streams */ +#define XCHAL_HAVE_CACHE_BLOCKOPS 0 /* block prefetch for caches */ +#define XCHAL_HAVE_ICACHE_TEST 1 /* Icache test instructions */ +#define XCHAL_HAVE_DCACHE_TEST 1 /* Dcache test instructions */ +#define XCHAL_HAVE_ICACHE_DYN_WAYS 0 /* Icache dynamic way support */ +#define XCHAL_HAVE_DCACHE_DYN_WAYS 0 /* Dcache dynamic way support */ + + + + +/**************************************************************************** + Parameters Useful for PRIVILEGED (Supervisory or Non-Virtualized) Code + ****************************************************************************/ + + +#ifndef XTENSA_HAL_NON_PRIVILEGED_ONLY + +/*---------------------------------------------------------------------- + CACHE + ----------------------------------------------------------------------*/ + +#define XCHAL_HAVE_PIF 1 /* any outbound PIF present */ + +/* If present, cache size in bytes == (ways * 2^(linewidth + setwidth)). */ + +/* Number of cache sets in log2(lines per way): */ +#define XCHAL_ICACHE_SETWIDTH 7 +#define XCHAL_DCACHE_SETWIDTH 7 + +/* Cache set associativity (number of ways): */ +#define XCHAL_ICACHE_WAYS 4 +#define XCHAL_DCACHE_WAYS 4 + +/* Cache features: */ +#define XCHAL_ICACHE_LINE_LOCKABLE 1 +#define XCHAL_DCACHE_LINE_LOCKABLE 1 +#define XCHAL_ICACHE_ECC_PARITY 0 +#define XCHAL_DCACHE_ECC_PARITY 0 + +/* Cache access size in bytes (affects operation of SICW instruction): */ +#define XCHAL_ICACHE_ACCESS_SIZE 8 +#define XCHAL_DCACHE_ACCESS_SIZE 8 + +#define XCHAL_DCACHE_BANKS 1 /* number of banks */ + +/* Number of encoded cache attr bits (see <xtensa/hal.h> for decoded bits): */ +#define XCHAL_CA_BITS 4 + +/* Whether MEMCTL register has anything useful */ +#define XCHAL_USE_MEMCTL (((XCHAL_LOOP_BUFFER_SIZE > 0) || \ + XCHAL_DCACHE_IS_COHERENT || \ + XCHAL_HAVE_ICACHE_DYN_WAYS || \ + XCHAL_HAVE_DCACHE_DYN_WAYS) && \ + (XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RE_2012_0)) + + +/*---------------------------------------------------------------------- + INTERNAL I/D RAM/ROMs and XLMI + ----------------------------------------------------------------------*/ + +#define XCHAL_NUM_INSTROM 0 /* number of core instr. ROMs */ +#define XCHAL_NUM_INSTRAM 0 /* number of core instr. RAMs */ +#define XCHAL_NUM_DATAROM 0 /* number of core data ROMs */ +#define XCHAL_NUM_DATARAM 0 /* number of core data RAMs */ +#define XCHAL_NUM_URAM 0 /* number of core unified RAMs*/ +#define XCHAL_NUM_XLMI 0 /* number of core XLMI ports */ + +#define XCHAL_HAVE_IMEM_LOADSTORE 1 /* can load/store to IROM/IRAM*/ + + +/*---------------------------------------------------------------------- + INTERRUPTS and TIMERS + ----------------------------------------------------------------------*/ + +#define XCHAL_HAVE_INTERRUPTS 1 /* interrupt option */ +#define XCHAL_HAVE_HIGHPRI_INTERRUPTS 1 /* med/high-pri. interrupts */ +#define XCHAL_HAVE_NMI 1 /* non-maskable interrupt */ +#define XCHAL_HAVE_CCOUNT 1 /* CCOUNT reg. (timer option) */ +#define XCHAL_NUM_TIMERS 3 /* number of CCOMPAREn regs */ +#define XCHAL_NUM_INTERRUPTS 22 /* number of interrupts */ +#define XCHAL_NUM_INTERRUPTS_LOG2 5 /* ceil(log2(NUM_INTERRUPTS)) */ +#define XCHAL_NUM_EXTINTERRUPTS 16 /* num of external interrupts */ +#define XCHAL_NUM_INTLEVELS 6 /* number of interrupt levels + (not including level zero) */ +#define XCHAL_EXCM_LEVEL 4 /* level masked by PS.EXCM */ + /* (always 1 in XEA1; levels 2 .. EXCM_LEVEL are "medium priority") */ + +/* Masks of interrupts at each interrupt level: */ +#define XCHAL_INTLEVEL1_MASK 0x001F00BF +#define XCHAL_INTLEVEL2_MASK 0x00000140 +#define XCHAL_INTLEVEL3_MASK 0x00200E00 +#define XCHAL_INTLEVEL4_MASK 0x00008000 +#define XCHAL_INTLEVEL5_MASK 0x00003000 +#define XCHAL_INTLEVEL6_MASK 0x00000000 +#define XCHAL_INTLEVEL7_MASK 0x00004000 + +/* Masks of interrupts at each range 1..n of interrupt levels: */ +#define XCHAL_INTLEVEL1_ANDBELOW_MASK 0x001F00BF +#define XCHAL_INTLEVEL2_ANDBELOW_MASK 0x001F01FF +#define XCHAL_INTLEVEL3_ANDBELOW_MASK 0x003F0FFF +#define XCHAL_INTLEVEL4_ANDBELOW_MASK 0x003F8FFF +#define XCHAL_INTLEVEL5_ANDBELOW_MASK 0x003FBFFF +#define XCHAL_INTLEVEL6_ANDBELOW_MASK 0x003FBFFF +#define XCHAL_INTLEVEL7_ANDBELOW_MASK 0x003FFFFF + +/* Level of each interrupt: */ +#define XCHAL_INT0_LEVEL 1 +#define XCHAL_INT1_LEVEL 1 +#define XCHAL_INT2_LEVEL 1 +#define XCHAL_INT3_LEVEL 1 +#define XCHAL_INT4_LEVEL 1 +#define XCHAL_INT5_LEVEL 1 +#define XCHAL_INT6_LEVEL 2 +#define XCHAL_INT7_LEVEL 1 +#define XCHAL_INT8_LEVEL 2 +#define XCHAL_INT9_LEVEL 3 +#define XCHAL_INT10_LEVEL 3 +#define XCHAL_INT11_LEVEL 3 +#define XCHAL_INT12_LEVEL 5 +#define XCHAL_INT13_LEVEL 5 +#define XCHAL_INT14_LEVEL 7 +#define XCHAL_INT15_LEVEL 4 +#define XCHAL_INT16_LEVEL 1 +#define XCHAL_INT17_LEVEL 1 +#define XCHAL_INT18_LEVEL 1 +#define XCHAL_INT19_LEVEL 1 +#define XCHAL_INT20_LEVEL 1 +#define XCHAL_INT21_LEVEL 3 +#define XCHAL_DEBUGLEVEL 6 /* debug interrupt level */ +#define XCHAL_HAVE_DEBUG_EXTERN_INT 1 /* OCD external db interrupt */ +#define XCHAL_NMILEVEL 7 /* NMI "level" (for use with + EXCSAVE/EPS/EPC_n, RFI n) */ + +/* Type of each interrupt: */ +#define XCHAL_INT0_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT1_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT2_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT3_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT4_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT5_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT6_TYPE XTHAL_INTTYPE_TIMER +#define XCHAL_INT7_TYPE XTHAL_INTTYPE_SOFTWARE +#define XCHAL_INT8_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT9_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT10_TYPE XTHAL_INTTYPE_TIMER +#define XCHAL_INT11_TYPE XTHAL_INTTYPE_SOFTWARE +#define XCHAL_INT12_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT13_TYPE XTHAL_INTTYPE_TIMER +#define XCHAL_INT14_TYPE XTHAL_INTTYPE_NMI +#define XCHAL_INT15_TYPE XTHAL_INTTYPE_PROFILING +#define XCHAL_INT16_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT17_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT18_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT19_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT20_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT21_TYPE XTHAL_INTTYPE_EXTERN_EDGE + +/* Masks of interrupts for each type of interrupt: */ +#define XCHAL_INTTYPE_MASK_UNCONFIGURED 0xFFC00000 +#define XCHAL_INTTYPE_MASK_SOFTWARE 0x00000880 +#define XCHAL_INTTYPE_MASK_EXTERN_EDGE 0x003F0000 +#define XCHAL_INTTYPE_MASK_EXTERN_LEVEL 0x0000133F +#define XCHAL_INTTYPE_MASK_TIMER 0x00002440 +#define XCHAL_INTTYPE_MASK_NMI 0x00004000 +#define XCHAL_INTTYPE_MASK_WRITE_ERROR 0x00000000 +#define XCHAL_INTTYPE_MASK_PROFILING 0x00008000 + +/* Interrupt numbers assigned to specific interrupt sources: */ +#define XCHAL_TIMER0_INTERRUPT 6 /* CCOMPARE0 */ +#define XCHAL_TIMER1_INTERRUPT 10 /* CCOMPARE1 */ +#define XCHAL_TIMER2_INTERRUPT 13 /* CCOMPARE2 */ +#define XCHAL_TIMER3_INTERRUPT XTHAL_TIMER_UNCONFIGURED +#define XCHAL_NMI_INTERRUPT 14 /* non-maskable interrupt */ +#define XCHAL_PROFILING_INTERRUPT 15 /* profiling interrupt */ + +/* Interrupt numbers for levels at which only one interrupt is configured: */ +#define XCHAL_INTLEVEL4_NUM 15 +#define XCHAL_INTLEVEL7_NUM 14 +/* (There are many interrupts each at level(s) 1, 2, 3, 5.) */ + + +/* + * External interrupt mapping. + * These macros describe how Xtensa processor interrupt numbers + * (as numbered internally, eg. in INTERRUPT and INTENABLE registers) + * map to external BInterrupt<n> pins, for those interrupts + * configured as external (level-triggered, edge-triggered, or NMI). + * See the Xtensa processor databook for more details. + */ + +/* Core interrupt numbers mapped to each EXTERNAL BInterrupt pin number: */ +#define XCHAL_EXTINT0_NUM 0 /* (intlevel 1) */ +#define XCHAL_EXTINT1_NUM 1 /* (intlevel 1) */ +#define XCHAL_EXTINT2_NUM 2 /* (intlevel 1) */ +#define XCHAL_EXTINT3_NUM 3 /* (intlevel 1) */ +#define XCHAL_EXTINT4_NUM 4 /* (intlevel 1) */ +#define XCHAL_EXTINT5_NUM 5 /* (intlevel 1) */ +#define XCHAL_EXTINT6_NUM 8 /* (intlevel 2) */ +#define XCHAL_EXTINT7_NUM 9 /* (intlevel 3) */ +#define XCHAL_EXTINT8_NUM 12 /* (intlevel 5) */ +#define XCHAL_EXTINT9_NUM 14 /* (intlevel 7) */ +#define XCHAL_EXTINT10_NUM 16 /* (intlevel 1) */ +#define XCHAL_EXTINT11_NUM 17 /* (intlevel 1) */ +#define XCHAL_EXTINT12_NUM 18 /* (intlevel 1) */ +#define XCHAL_EXTINT13_NUM 19 /* (intlevel 1) */ +#define XCHAL_EXTINT14_NUM 20 /* (intlevel 1) */ +#define XCHAL_EXTINT15_NUM 21 /* (intlevel 3) */ +/* EXTERNAL BInterrupt pin numbers mapped to each core interrupt number: */ +#define XCHAL_INT0_EXTNUM 0 /* (intlevel 1) */ +#define XCHAL_INT1_EXTNUM 1 /* (intlevel 1) */ +#define XCHAL_INT2_EXTNUM 2 /* (intlevel 1) */ +#define XCHAL_INT3_EXTNUM 3 /* (intlevel 1) */ +#define XCHAL_INT4_EXTNUM 4 /* (intlevel 1) */ +#define XCHAL_INT5_EXTNUM 5 /* (intlevel 1) */ +#define XCHAL_INT8_EXTNUM 6 /* (intlevel 2) */ +#define XCHAL_INT9_EXTNUM 7 /* (intlevel 3) */ +#define XCHAL_INT12_EXTNUM 8 /* (intlevel 5) */ +#define XCHAL_INT14_EXTNUM 9 /* (intlevel 7) */ +#define XCHAL_INT16_EXTNUM 10 /* (intlevel 1) */ +#define XCHAL_INT17_EXTNUM 11 /* (intlevel 1) */ +#define XCHAL_INT18_EXTNUM 12 /* (intlevel 1) */ +#define XCHAL_INT19_EXTNUM 13 /* (intlevel 1) */ +#define XCHAL_INT20_EXTNUM 14 /* (intlevel 1) */ +#define XCHAL_INT21_EXTNUM 15 /* (intlevel 3) */ + + +/*---------------------------------------------------------------------- + EXCEPTIONS and VECTORS + ----------------------------------------------------------------------*/ + +#define XCHAL_XEA_VERSION 2 /* Xtensa Exception Architecture + number: 1 == XEA1 (old) + 2 == XEA2 (new) + 0 == XEAX (extern) or TX */ +#define XCHAL_HAVE_XEA1 0 /* Exception Architecture 1 */ +#define XCHAL_HAVE_XEA2 1 /* Exception Architecture 2 */ +#define XCHAL_HAVE_XEAX 0 /* External Exception Arch. */ +#define XCHAL_HAVE_EXCEPTIONS 1 /* exception option */ +#define XCHAL_HAVE_HALT 0 /* halt architecture option */ +#define XCHAL_HAVE_BOOTLOADER 0 /* boot loader (for TX) */ +#define XCHAL_HAVE_MEM_ECC_PARITY 0 /* local memory ECC/parity */ +#define XCHAL_HAVE_VECTOR_SELECT 1 /* relocatable vectors */ +#define XCHAL_HAVE_VECBASE 1 /* relocatable vectors */ +#define XCHAL_VECBASE_RESET_VADDR 0x00002000 /* VECBASE reset value */ +#define XCHAL_VECBASE_RESET_PADDR 0x00002000 +#define XCHAL_RESET_VECBASE_OVERLAP 0 + +#define XCHAL_RESET_VECTOR0_VADDR 0xFE000000 +#define XCHAL_RESET_VECTOR0_PADDR 0xFE000000 +#define XCHAL_RESET_VECTOR1_VADDR 0x00001000 +#define XCHAL_RESET_VECTOR1_PADDR 0x00001000 +#define XCHAL_RESET_VECTOR_VADDR 0xFE000000 +#define XCHAL_RESET_VECTOR_PADDR 0xFE000000 +#define XCHAL_USER_VECOFS 0x00000340 +#define XCHAL_USER_VECTOR_VADDR 0x00002340 +#define XCHAL_USER_VECTOR_PADDR 0x00002340 +#define XCHAL_KERNEL_VECOFS 0x00000300 +#define XCHAL_KERNEL_VECTOR_VADDR 0x00002300 +#define XCHAL_KERNEL_VECTOR_PADDR 0x00002300 +#define XCHAL_DOUBLEEXC_VECOFS 0x000003C0 +#define XCHAL_DOUBLEEXC_VECTOR_VADDR 0x000023C0 +#define XCHAL_DOUBLEEXC_VECTOR_PADDR 0x000023C0 +#define XCHAL_WINDOW_OF4_VECOFS 0x00000000 +#define XCHAL_WINDOW_UF4_VECOFS 0x00000040 +#define XCHAL_WINDOW_OF8_VECOFS 0x00000080 +#define XCHAL_WINDOW_UF8_VECOFS 0x000000C0 +#define XCHAL_WINDOW_OF12_VECOFS 0x00000100 +#define XCHAL_WINDOW_UF12_VECOFS 0x00000140 +#define XCHAL_WINDOW_VECTORS_VADDR 0x00002000 +#define XCHAL_WINDOW_VECTORS_PADDR 0x00002000 +#define XCHAL_INTLEVEL2_VECOFS 0x00000180 +#define XCHAL_INTLEVEL2_VECTOR_VADDR 0x00002180 +#define XCHAL_INTLEVEL2_VECTOR_PADDR 0x00002180 +#define XCHAL_INTLEVEL3_VECOFS 0x000001C0 +#define XCHAL_INTLEVEL3_VECTOR_VADDR 0x000021C0 +#define XCHAL_INTLEVEL3_VECTOR_PADDR 0x000021C0 +#define XCHAL_INTLEVEL4_VECOFS 0x00000200 +#define XCHAL_INTLEVEL4_VECTOR_VADDR 0x00002200 +#define XCHAL_INTLEVEL4_VECTOR_PADDR 0x00002200 +#define XCHAL_INTLEVEL5_VECOFS 0x00000240 +#define XCHAL_INTLEVEL5_VECTOR_VADDR 0x00002240 +#define XCHAL_INTLEVEL5_VECTOR_PADDR 0x00002240 +#define XCHAL_INTLEVEL6_VECOFS 0x00000280 +#define XCHAL_INTLEVEL6_VECTOR_VADDR 0x00002280 +#define XCHAL_INTLEVEL6_VECTOR_PADDR 0x00002280 +#define XCHAL_DEBUG_VECOFS XCHAL_INTLEVEL6_VECOFS +#define XCHAL_DEBUG_VECTOR_VADDR XCHAL_INTLEVEL6_VECTOR_VADDR +#define XCHAL_DEBUG_VECTOR_PADDR XCHAL_INTLEVEL6_VECTOR_PADDR +#define XCHAL_NMI_VECOFS 0x000002C0 +#define XCHAL_NMI_VECTOR_VADDR 0x000022C0 +#define XCHAL_NMI_VECTOR_PADDR 0x000022C0 +#define XCHAL_INTLEVEL7_VECOFS XCHAL_NMI_VECOFS +#define XCHAL_INTLEVEL7_VECTOR_VADDR XCHAL_NMI_VECTOR_VADDR +#define XCHAL_INTLEVEL7_VECTOR_PADDR XCHAL_NMI_VECTOR_PADDR + + +/*---------------------------------------------------------------------- + DEBUG MODULE + ----------------------------------------------------------------------*/ + +/* Misc */ +#define XCHAL_HAVE_DEBUG_ERI 1 /* ERI to debug module */ +#define XCHAL_HAVE_DEBUG_APB 0 /* APB to debug module */ +#define XCHAL_HAVE_DEBUG_JTAG 1 /* JTAG to debug module */ + +/* On-Chip Debug (OCD) */ +#define XCHAL_HAVE_OCD 1 /* OnChipDebug option */ +#define XCHAL_NUM_IBREAK 2 /* number of IBREAKn regs */ +#define XCHAL_NUM_DBREAK 2 /* number of DBREAKn regs */ +#define XCHAL_HAVE_OCD_DIR_ARRAY 0 /* faster OCD option (to LX4) */ +#define XCHAL_HAVE_OCD_LS32DDR 1 /* L32DDR/S32DDR (faster OCD) */ + +/* TRAX (in core) */ +#define XCHAL_HAVE_TRAX 1 /* TRAX in debug module */ +#define XCHAL_TRAX_MEM_SIZE 262144 /* TRAX memory size in bytes */ +#define XCHAL_TRAX_MEM_SHAREABLE 1 /* start/end regs; ready sig. */ +#define XCHAL_TRAX_ATB_WIDTH 0 /* ATB width (bits), 0=no ATB */ +#define XCHAL_TRAX_TIME_WIDTH 0 /* timestamp bitwidth, 0=none */ + +/* Perf counters */ +#define XCHAL_NUM_PERF_COUNTERS 8 /* performance counters */ + + +/*---------------------------------------------------------------------- + MMU + ----------------------------------------------------------------------*/ + +/* See core-matmap.h header file for more details. */ + +#define XCHAL_HAVE_TLBS 1 /* inverse of HAVE_CACHEATTR */ +#define XCHAL_HAVE_SPANNING_WAY 1 /* one way maps I+D 4GB vaddr */ +#define XCHAL_SPANNING_WAY 6 /* TLB spanning way number */ +#define XCHAL_HAVE_IDENTITY_MAP 0 /* vaddr == paddr always */ +#define XCHAL_HAVE_CACHEATTR 0 /* CACHEATTR register present */ +#define XCHAL_HAVE_MIMIC_CACHEATTR 0 /* region protection */ +#define XCHAL_HAVE_XLT_CACHEATTR 0 /* region prot. w/translation */ +#define XCHAL_HAVE_PTP_MMU 1 /* full MMU (with page table + [autorefill] and protection) + usable for an MMU-based OS */ +/* If none of the above last 4 are set, it's a custom TLB configuration. */ +#define XCHAL_ITLB_ARF_ENTRIES_LOG2 2 /* log2(autorefill way size) */ +#define XCHAL_DTLB_ARF_ENTRIES_LOG2 2 /* log2(autorefill way size) */ + +#define XCHAL_MMU_ASID_BITS 8 /* number of bits in ASIDs */ +#define XCHAL_MMU_RINGS 4 /* number of rings (1..4) */ +#define XCHAL_MMU_RING_BITS 2 /* num of bits in RING field */ + +#endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */ + + +#endif /* _XTENSA_CORE_CONFIGURATION_H */ + diff --git a/arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h b/arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h new file mode 100644 index 000000000000..b87fe1a5177b --- /dev/null +++ b/arch/xtensa/variants/test_kc705_be/include/variant/tie-asm.h @@ -0,0 +1,308 @@ +/* + * tie-asm.h -- compile-time HAL assembler definitions dependent on CORE & TIE + * + * NOTE: This header file is not meant to be included directly. + */ + +/* This header file contains assembly-language definitions (assembly + macros, etc.) for this specific Xtensa processor's TIE extensions + and options. It is customized to this Xtensa processor configuration. + + Copyright (c) 1999-2015 Cadence Design Systems Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _XTENSA_CORE_TIE_ASM_H +#define _XTENSA_CORE_TIE_ASM_H + +/* Selection parameter values for save-area save/restore macros: */ +/* Option vs. TIE: */ +#define XTHAL_SAS_TIE 0x0001 /* custom extension or coprocessor */ +#define XTHAL_SAS_OPT 0x0002 /* optional (and not a coprocessor) */ +#define XTHAL_SAS_ANYOT 0x0003 /* both of the above */ +/* Whether used automatically by compiler: */ +#define XTHAL_SAS_NOCC 0x0004 /* not used by compiler w/o special opts/code */ +#define XTHAL_SAS_CC 0x0008 /* used by compiler without special opts/code */ +#define XTHAL_SAS_ANYCC 0x000C /* both of the above */ +/* ABI handling across function calls: */ +#define XTHAL_SAS_CALR 0x0010 /* caller-saved */ +#define XTHAL_SAS_CALE 0x0020 /* callee-saved */ +#define XTHAL_SAS_GLOB 0x0040 /* global across function calls (in thread) */ +#define XTHAL_SAS_ANYABI 0x0070 /* all of the above three */ +/* Misc */ +#define XTHAL_SAS_ALL 0xFFFF /* include all default NCP contents */ +#define XTHAL_SAS3(optie,ccuse,abi) ( ((optie) & XTHAL_SAS_ANYOT) \ + | ((ccuse) & XTHAL_SAS_ANYCC) \ + | ((abi) & XTHAL_SAS_ANYABI) ) + + + /* + * Macro to store all non-coprocessor (extra) custom TIE and optional state + * (not including zero-overhead loop registers). + * Required parameters: + * ptr Save area pointer address register (clobbered) + * (register must contain a 4 byte aligned address). + * at1..at4 Four temporary address registers (first XCHAL_NCP_NUM_ATMPS + * registers are clobbered, the remaining are unused). + * Optional parameters: + * continue If macro invoked as part of a larger store sequence, set to 1 + * if this is not the first in the sequence. Defaults to 0. + * ofs Offset from start of larger sequence (from value of first ptr + * in sequence) at which to store. Defaults to next available space + * (or 0 if <continue> is 0). + * select Select what category(ies) of registers to store, as a bitmask + * (see XTHAL_SAS_xxx constants). Defaults to all registers. + * alloc Select what category(ies) of registers to allocate; if any + * category is selected here that is not in <select>, space for + * the corresponding registers is skipped without doing any store. + */ + .macro xchal_ncp_store ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0 + xchal_sa_start \continue, \ofs + // Optional global registers used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\select) + xchal_sa_align \ptr, 0, 1020, 4, 4 + rur.THREADPTR \at1 // threadptr option + s32i \at1, \ptr, .Lxchal_ofs_+0 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 4 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1020, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 4 + .endif + // Optional caller-saved registers used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 1016, 4, 4 + rsr.ACCLO \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+0 + rsr.ACCHI \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 8 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1016, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 8 + .endif + // Optional caller-saved registers not used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 1000, 4, 4 + rsr.BR \at1 // boolean option + s32i \at1, \ptr, .Lxchal_ofs_+0 + rsr.SCOMPARE1 \at1 // conditional store option + s32i \at1, \ptr, .Lxchal_ofs_+4 + rsr.M0 \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+8 + rsr.M1 \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+12 + rsr.M2 \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+16 + rsr.M3 \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+20 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 24 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1000, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 24 + .endif + .endm // xchal_ncp_store + + /* + * Macro to load all non-coprocessor (extra) custom TIE and optional state + * (not including zero-overhead loop registers). + * Required parameters: + * ptr Save area pointer address register (clobbered) + * (register must contain a 4 byte aligned address). + * at1..at4 Four temporary address registers (first XCHAL_NCP_NUM_ATMPS + * registers are clobbered, the remaining are unused). + * Optional parameters: + * continue If macro invoked as part of a larger load sequence, set to 1 + * if this is not the first in the sequence. Defaults to 0. + * ofs Offset from start of larger sequence (from value of first ptr + * in sequence) at which to load. Defaults to next available space + * (or 0 if <continue> is 0). + * select Select what category(ies) of registers to load, as a bitmask + * (see XTHAL_SAS_xxx constants). Defaults to all registers. + * alloc Select what category(ies) of registers to allocate; if any + * category is selected here that is not in <select>, space for + * the corresponding registers is skipped without doing any load. + */ + .macro xchal_ncp_load ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0 + xchal_sa_start \continue, \ofs + // Optional global registers used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\select) + xchal_sa_align \ptr, 0, 1020, 4, 4 + l32i \at1, \ptr, .Lxchal_ofs_+0 + wur.THREADPTR \at1 // threadptr option + .set .Lxchal_ofs_, .Lxchal_ofs_ + 4 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_GLOB) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1020, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 4 + .endif + // Optional caller-saved registers used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 1016, 4, 4 + l32i \at1, \ptr, .Lxchal_ofs_+0 + wsr.ACCLO \at1 // MAC16 option + l32i \at1, \ptr, .Lxchal_ofs_+4 + wsr.ACCHI \at1 // MAC16 option + .set .Lxchal_ofs_, .Lxchal_ofs_ + 8 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1016, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 8 + .endif + // Optional caller-saved registers not used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 1000, 4, 4 + l32i \at1, \ptr, .Lxchal_ofs_+0 + wsr.BR \at1 // boolean option + l32i \at1, \ptr, .Lxchal_ofs_+4 + wsr.SCOMPARE1 \at1 // conditional store option + l32i \at1, \ptr, .Lxchal_ofs_+8 + wsr.M0 \at1 // MAC16 option + l32i \at1, \ptr, .Lxchal_ofs_+12 + wsr.M1 \at1 // MAC16 option + l32i \at1, \ptr, .Lxchal_ofs_+16 + wsr.M2 \at1 // MAC16 option + l32i \at1, \ptr, .Lxchal_ofs_+20 + wsr.M3 \at1 // MAC16 option + .set .Lxchal_ofs_, .Lxchal_ofs_ + 24 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1000, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 24 + .endif + .endm // xchal_ncp_load + + +#define XCHAL_NCP_NUM_ATMPS 1 + + /* + * Macro to store the state of TIE coprocessor AudioEngineLX. + * Required parameters: + * ptr Save area pointer address register (clobbered) + * (register must contain a 8 byte aligned address). + * at1..at4 Four temporary address registers (first XCHAL_CP1_NUM_ATMPS + * registers are clobbered, the remaining are unused). + * Optional parameters are the same as for xchal_ncp_store. + */ +#define xchal_cp_AudioEngineLX_store xchal_cp1_store + .macro xchal_cp1_store ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0 + xchal_sa_start \continue, \ofs + // Custom caller-saved registers not used by default by the compiler: + .ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 0, 8, 8 + rur.AE_OVF_SAR \at1 // ureg 240 + s32i \at1, \ptr, .Lxchal_ofs_+0 + rur.AE_BITHEAD \at1 // ureg 241 + s32i \at1, \ptr, .Lxchal_ofs_+4 + rur.AE_TS_FTS_BU_BP \at1 // ureg 242 + s32i \at1, \ptr, .Lxchal_ofs_+8 + rur.AE_SD_NO \at1 // ureg 243 + s32i \at1, \ptr, .Lxchal_ofs_+12 + rur.AE_CBEGIN0 \at1 // ureg 246 + s32i \at1, \ptr, .Lxchal_ofs_+16 + rur.AE_CEND0 \at1 // ureg 247 + s32i \at1, \ptr, .Lxchal_ofs_+20 + ae_sp24x2s.i aep0, \ptr, .Lxchal_ofs_+24 + ae_sp24x2s.i aep1, \ptr, .Lxchal_ofs_+32 + ae_sp24x2s.i aep2, \ptr, .Lxchal_ofs_+40 + ae_sp24x2s.i aep3, \ptr, .Lxchal_ofs_+48 + ae_sp24x2s.i aep4, \ptr, .Lxchal_ofs_+56 + addi \ptr, \ptr, 64 + ae_sp24x2s.i aep5, \ptr, .Lxchal_ofs_+0 + ae_sp24x2s.i aep6, \ptr, .Lxchal_ofs_+8 + ae_sp24x2s.i aep7, \ptr, .Lxchal_ofs_+16 + ae_sq56s.i aeq0, \ptr, .Lxchal_ofs_+24 + ae_sq56s.i aeq1, \ptr, .Lxchal_ofs_+32 + ae_sq56s.i aeq2, \ptr, .Lxchal_ofs_+40 + ae_sq56s.i aeq3, \ptr, .Lxchal_ofs_+48 + .set .Lxchal_pofs_, .Lxchal_pofs_ + 64 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 56 + .elseif ((XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 0, 8, 8 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 120 + .endif + .endm // xchal_cp1_store + + /* + * Macro to load the state of TIE coprocessor AudioEngineLX. + * Required parameters: + * ptr Save area pointer address register (clobbered) + * (register must contain a 8 byte aligned address). + * at1..at4 Four temporary address registers (first XCHAL_CP1_NUM_ATMPS + * registers are clobbered, the remaining are unused). + * Optional parameters are the same as for xchal_ncp_load. + */ +#define xchal_cp_AudioEngineLX_load xchal_cp1_load + .macro xchal_cp1_load ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0 + xchal_sa_start \continue, \ofs + // Custom caller-saved registers not used by default by the compiler: + .ifeq (XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 0, 8, 8 + l32i \at1, \ptr, .Lxchal_ofs_+0 + wur.AE_OVF_SAR \at1 // ureg 240 + l32i \at1, \ptr, .Lxchal_ofs_+4 + wur.AE_BITHEAD \at1 // ureg 241 + l32i \at1, \ptr, .Lxchal_ofs_+8 + wur.AE_TS_FTS_BU_BP \at1 // ureg 242 + l32i \at1, \ptr, .Lxchal_ofs_+12 + wur.AE_SD_NO \at1 // ureg 243 + l32i \at1, \ptr, .Lxchal_ofs_+16 + wur.AE_CBEGIN0 \at1 // ureg 246 + l32i \at1, \ptr, .Lxchal_ofs_+20 + wur.AE_CEND0 \at1 // ureg 247 + ae_lp24x2.i aep0, \ptr, .Lxchal_ofs_+24 + ae_lp24x2.i aep1, \ptr, .Lxchal_ofs_+32 + ae_lp24x2.i aep2, \ptr, .Lxchal_ofs_+40 + ae_lp24x2.i aep3, \ptr, .Lxchal_ofs_+48 + ae_lp24x2.i aep4, \ptr, .Lxchal_ofs_+56 + addi \ptr, \ptr, 64 + ae_lp24x2.i aep5, \ptr, .Lxchal_ofs_+0 + ae_lp24x2.i aep6, \ptr, .Lxchal_ofs_+8 + ae_lp24x2.i aep7, \ptr, .Lxchal_ofs_+16 + addi \ptr, \ptr, 24 + ae_lq56.i aeq0, \ptr, .Lxchal_ofs_+0 + ae_lq56.i aeq1, \ptr, .Lxchal_ofs_+8 + ae_lq56.i aeq2, \ptr, .Lxchal_ofs_+16 + ae_lq56.i aeq3, \ptr, .Lxchal_ofs_+24 + .set .Lxchal_pofs_, .Lxchal_pofs_ + 88 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 32 + .elseif ((XTHAL_SAS_TIE | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 0, 8, 8 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 120 + .endif + .endm // xchal_cp1_load + +#define XCHAL_CP1_NUM_ATMPS 1 +#define XCHAL_SA_NUM_ATMPS 1 + + /* Empty macros for unconfigured coprocessors: */ + .macro xchal_cp0_store p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp0_load p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp2_store p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp2_load p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp3_store p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp3_load p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp4_store p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp4_load p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp5_store p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp5_load p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp6_store p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp6_load p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp7_store p a b c d continue=0 ofs=-1 select=-1 ; .endm + .macro xchal_cp7_load p a b c d continue=0 ofs=-1 select=-1 ; .endm + +#endif /*_XTENSA_CORE_TIE_ASM_H*/ + diff --git a/arch/xtensa/variants/test_kc705_be/include/variant/tie.h b/arch/xtensa/variants/test_kc705_be/include/variant/tie.h new file mode 100644 index 000000000000..4e1b4baac8e2 --- /dev/null +++ b/arch/xtensa/variants/test_kc705_be/include/variant/tie.h @@ -0,0 +1,182 @@ +/* + * tie.h -- compile-time HAL definitions dependent on CORE & TIE configuration + * + * NOTE: This header file is not meant to be included directly. + */ + +/* This header file describes this specific Xtensa processor's TIE extensions + that extend basic Xtensa core functionality. It is customized to this + Xtensa processor configuration. + + Copyright (c) 1999-2015 Cadence Design Systems Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _XTENSA_CORE_TIE_H +#define _XTENSA_CORE_TIE_H + +#define XCHAL_CP_NUM 2 /* number of coprocessors */ +#define XCHAL_CP_MAX 8 /* max CP ID + 1 (0 if none) */ +#define XCHAL_CP_MASK 0x82 /* bitmask of all CPs by ID */ +#define XCHAL_CP_PORT_MASK 0x80 /* bitmask of only port CPs */ + +/* Basic parameters of each coprocessor: */ +#define XCHAL_CP1_NAME "AudioEngineLX" +#define XCHAL_CP1_IDENT AudioEngineLX +#define XCHAL_CP1_SA_SIZE 120 /* size of state save area */ +#define XCHAL_CP1_SA_ALIGN 8 /* min alignment of save area */ +#define XCHAL_CP_ID_AUDIOENGINELX 1 /* coprocessor ID (0..7) */ +#define XCHAL_CP7_NAME "XTIOP" +#define XCHAL_CP7_IDENT XTIOP +#define XCHAL_CP7_SA_SIZE 0 /* size of state save area */ +#define XCHAL_CP7_SA_ALIGN 1 /* min alignment of save area */ +#define XCHAL_CP_ID_XTIOP 7 /* coprocessor ID (0..7) */ + +/* Filler info for unassigned coprocessors, to simplify arrays etc: */ +#define XCHAL_CP0_SA_SIZE 0 +#define XCHAL_CP0_SA_ALIGN 1 +#define XCHAL_CP2_SA_SIZE 0 +#define XCHAL_CP2_SA_ALIGN 1 +#define XCHAL_CP3_SA_SIZE 0 +#define XCHAL_CP3_SA_ALIGN 1 +#define XCHAL_CP4_SA_SIZE 0 +#define XCHAL_CP4_SA_ALIGN 1 +#define XCHAL_CP5_SA_SIZE 0 +#define XCHAL_CP5_SA_ALIGN 1 +#define XCHAL_CP6_SA_SIZE 0 +#define XCHAL_CP6_SA_ALIGN 1 + +/* Save area for non-coprocessor optional and custom (TIE) state: */ +#define XCHAL_NCP_SA_SIZE 36 +#define XCHAL_NCP_SA_ALIGN 4 + +/* Total save area for optional and custom state (NCP + CPn): */ +#define XCHAL_TOTAL_SA_SIZE 160 /* with 16-byte align padding */ +#define XCHAL_TOTAL_SA_ALIGN 8 /* actual minimum alignment */ + +/* + * Detailed contents of save areas. + * NOTE: caller must define the XCHAL_SA_REG macro (not defined here) + * before expanding the XCHAL_xxx_SA_LIST() macros. + * + * XCHAL_SA_REG(s,ccused,abikind,kind,opt,name,galign,align,asize, + * dbnum,base,regnum,bitsz,gapsz,reset,x...) + * + * s = passed from XCHAL_*_LIST(s), eg. to select how to expand + * ccused = set if used by compiler without special options or code + * abikind = 0 (caller-saved), 1 (callee-saved), or 2 (thread-global) + * kind = 0 (special reg), 1 (TIE user reg), or 2 (TIE regfile reg) + * opt = 0 (custom TIE extension or coprocessor), or 1 (optional reg) + * name = lowercase reg name (no quotes) + * galign = group byte alignment (power of 2) (galign >= align) + * align = register byte alignment (power of 2) + * asize = allocated size in bytes (asize*8 == bitsz + gapsz + padsz) + * (not including any pad bytes required to galign this or next reg) + * dbnum = unique target number f/debug (see <xtensa-libdb-macros.h>) + * base = reg shortname w/o index (or sr=special, ur=TIE user reg) + * regnum = reg index in regfile, or special/TIE-user reg number + * bitsz = number of significant bits (regfile width, or ur/sr mask bits) + * gapsz = intervening bits, if bitsz bits not stored contiguously + * (padsz = pad bits at end [TIE regfile] or at msbits [ur,sr] of asize) + * reset = register reset value (or 0 if undefined at reset) + * x = reserved for future use (0 until then) + * + * To filter out certain registers, e.g. to expand only the non-global + * registers used by the compiler, you can do something like this: + * + * #define XCHAL_SA_REG(s,ccused,p...) SELCC##ccused(p) + * #define SELCC0(p...) + * #define SELCC1(abikind,p...) SELAK##abikind(p) + * #define SELAK0(p...) REG(p) + * #define SELAK1(p...) REG(p) + * #define SELAK2(p...) + * #define REG(kind,tie,name,galn,aln,asz,csz,dbnum,base,rnum,bsz,rst,x...) \ + * ...what you want to expand... + */ + +#define XCHAL_NCP_SA_NUM 9 +#define XCHAL_NCP_SA_LIST(s) \ + XCHAL_SA_REG(s,1,2,1,1, threadptr, 4, 4, 4,0x03E7, ur,231, 32,0,0,0) \ + XCHAL_SA_REG(s,1,0,0,1, acclo, 4, 4, 4,0x0210, sr,16 , 32,0,0,0) \ + XCHAL_SA_REG(s,1,0,0,1, acchi, 4, 4, 4,0x0211, sr,17 , 8,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, br, 4, 4, 4,0x0204, sr,4 , 16,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, scompare1, 4, 4, 4,0x020C, sr,12 , 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, m0, 4, 4, 4,0x0220, sr,32 , 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, m1, 4, 4, 4,0x0221, sr,33 , 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, m2, 4, 4, 4,0x0222, sr,34 , 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, m3, 4, 4, 4,0x0223, sr,35 , 32,0,0,0) + +#define XCHAL_CP0_SA_NUM 0 +#define XCHAL_CP0_SA_LIST(s) /* empty */ + +#define XCHAL_CP1_SA_NUM 18 +#define XCHAL_CP1_SA_LIST(s) \ + XCHAL_SA_REG(s,0,0,1,0, ae_ovf_sar, 8, 4, 4,0x03F0, ur,240, 7,0,0,0) \ + XCHAL_SA_REG(s,0,0,1,0, ae_bithead, 4, 4, 4,0x03F1, ur,241, 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,1,0,ae_ts_fts_bu_bp, 4, 4, 4,0x03F2, ur,242, 16,0,0,0) \ + XCHAL_SA_REG(s,0,0,1,0, ae_sd_no, 4, 4, 4,0x03F3, ur,243, 28,0,0,0) \ + XCHAL_SA_REG(s,0,0,1,0, ae_cbegin0, 4, 4, 4,0x03F6, ur,246, 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,1,0, ae_cend0, 4, 4, 4,0x03F7, ur,247, 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aep0, 8, 8, 8,0x0060, aep,0 , 48,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aep1, 8, 8, 8,0x0061, aep,1 , 48,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aep2, 8, 8, 8,0x0062, aep,2 , 48,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aep3, 8, 8, 8,0x0063, aep,3 , 48,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aep4, 8, 8, 8,0x0064, aep,4 , 48,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aep5, 8, 8, 8,0x0065, aep,5 , 48,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aep6, 8, 8, 8,0x0066, aep,6 , 48,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aep7, 8, 8, 8,0x0067, aep,7 , 48,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aeq0, 8, 8, 8,0x0068, aeq,0 , 56,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aeq1, 8, 8, 8,0x0069, aeq,1 , 56,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aeq2, 8, 8, 8,0x006A, aeq,2 , 56,0,0,0) \ + XCHAL_SA_REG(s,0,0,2,0, aeq3, 8, 8, 8,0x006B, aeq,3 , 56,0,0,0) + +#define XCHAL_CP2_SA_NUM 0 +#define XCHAL_CP2_SA_LIST(s) /* empty */ + +#define XCHAL_CP3_SA_NUM 0 +#define XCHAL_CP3_SA_LIST(s) /* empty */ + +#define XCHAL_CP4_SA_NUM 0 +#define XCHAL_CP4_SA_LIST(s) /* empty */ + +#define XCHAL_CP5_SA_NUM 0 +#define XCHAL_CP5_SA_LIST(s) /* empty */ + +#define XCHAL_CP6_SA_NUM 0 +#define XCHAL_CP6_SA_LIST(s) /* empty */ + +#define XCHAL_CP7_SA_NUM 0 +#define XCHAL_CP7_SA_LIST(s) /* empty */ + +/* Byte length of instruction from its first nibble (op0 field), per FLIX. */ +#define XCHAL_OP0_FORMAT_LENGTHS 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,8 +/* Byte length of instruction from its first byte, per FLIX. */ +#define XCHAL_BYTE0_FORMAT_LENGTHS \ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,\ + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,\ + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,\ + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,\ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 + +#endif /*_XTENSA_CORE_TIE_H*/ + |