diff options
Diffstat (limited to 'arch')
122 files changed, 1427 insertions, 588 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 8a7f7e1f2ca7..86ae4c4edd6f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -272,6 +272,13 @@ config HAVE_REGS_AND_STACK_ACCESS_API declared in asm/ptrace.h For example the kprobes-based event tracer needs this API. +config HAVE_RSEQ + bool + depends on HAVE_REGS_AND_STACK_ACCESS_API + help + This symbol should be selected by an architecture if it + supports an implementation of restartable sequences. + config HAVE_CLK bool help diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 89d47eac18b2..e81bcd271be7 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -48,6 +48,7 @@ config ARC select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA + select ARCH_HAS_PTE_SPECIAL config MIGHT_HAVE_PCI bool diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 08fe33830d4b..8ec5599a0957 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -320,8 +320,6 @@ PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE)); PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); -#define __HAVE_ARCH_PTE_SPECIAL - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8f460bdd4be1..94d222545920 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -8,6 +8,7 @@ config ARM select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_PTE_SPECIAL if ARM_LPAE select ARCH_HAS_SET_MEMORY select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL @@ -90,6 +91,7 @@ config ARM select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 select HAVE_VIRT_CPU_ACCOUNTING_GEN diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index 200714e3feea..d74dd7f19507 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -120,7 +120,7 @@ <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_ENET_AXI_ROOT_CLK>, + clocks = <&clks IMX7D_ENET2_IPG_ROOT_CLK>, <&clks IMX7D_ENET_AXI_ROOT_CLK>, <&clks IMX7D_ENET2_TIME_ROOT_CLK>, <&clks IMX7D_PLL_ENET_MAIN_125M_CLK>, diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index ce85b3ca1a55..69436b9a404c 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1092,7 +1092,7 @@ <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX7D_ENET_AXI_ROOT_CLK>, + clocks = <&clks IMX7D_ENET1_IPG_ROOT_CLK>, <&clks IMX7D_ENET_AXI_ROOT_CLK>, <&clks IMX7D_ENET1_TIME_ROOT_CLK>, <&clks IMX7D_PLL_ENET_MAIN_125M_CLK>, diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 2b913f17d50f..ad574d20415c 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -9,6 +9,7 @@ * published by the Free Software Foundation. */ +#include <linux/export.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/irqflags.h> @@ -174,6 +175,7 @@ bool mcpm_is_available(void) { return (platform_ops) ? true : false; } +EXPORT_SYMBOL_GPL(mcpm_is_available); /* * We can't use regular spinlocks. In the switcher case, it is possible diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 343fc9e6f78d..2d75e77bf7bb 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -325,6 +325,18 @@ static inline bool kvm_arm_harden_branch_predictor(void) } } +#define KVM_SSBD_UNKNOWN -1 +#define KVM_SSBD_FORCE_DISABLE 0 +#define KVM_SSBD_KERNEL 1 +#define KVM_SSBD_FORCE_ENABLE 2 +#define KVM_SSBD_MITIGATED 3 + +static inline int kvm_arm_have_ssbd(void) +{ + /* No way to detect it yet, pretend it is not there. */ + return KVM_SSBD_UNKNOWN; +} + static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index c94d291fd1a8..8553d68b7c8a 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -356,6 +356,11 @@ static inline int kvm_map_vectors(void) return 0; } +static inline int hyp_map_aux_data(void) +{ + return 0; +} + #define kvm_phys_to_vttbr(addr) (addr) #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 2a4836087358..6d50a11d7793 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -219,7 +219,6 @@ static inline pte_t pte_mkspecial(pte_t pte) pte_val(pte) |= L_PTE_SPECIAL; return pte; } -#define __HAVE_ARCH_PTE_SPECIAL #define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 20df608bf343..106a1466518d 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -39,12 +39,13 @@ saved_pc .req lr .section .entry.text,"ax",%progbits .align 5 -#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING)) +#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING) || \ + IS_ENABLED(CONFIG_DEBUG_RSEQ)) /* * This is the fast syscall return path. We do as little as possible here, * such as avoiding writing r0 to the stack. We only use this path if we - * have tracing and context tracking disabled - the overheads from those - * features make this path too inefficient. + * have tracing, context tracking and rseq debug disabled - the overheads + * from those features make this path too inefficient. */ ret_fast_syscall: UNWIND(.fnstart ) @@ -71,14 +72,20 @@ fast_work_pending: /* fall through to work_pending */ #else /* - * The "replacement" ret_fast_syscall for when tracing or context tracking - * is enabled. As we will need to call out to some C functions, we save - * r0 first to avoid needing to save registers around each C function call. + * The "replacement" ret_fast_syscall for when tracing, context tracking, + * or rseq debug is enabled. As we will need to call out to some C functions, + * we save r0 first to avoid needing to save registers around each C function + * call. */ ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 +#if IS_ENABLED(CONFIG_DEBUG_RSEQ) + /* do_rseq_syscall needs interrupts enabled. */ + mov r0, sp @ 'regs' + bl do_rseq_syscall +#endif disable_irq_notrace @ disable interrupts ldr r2, [tsk, #TI_ADDR_LIMIT] cmp r2, #TASK_SIZE @@ -113,6 +120,12 @@ ENDPROC(ret_fast_syscall) */ ENTRY(ret_to_user) ret_slow_syscall: +#if IS_ENABLED(CONFIG_DEBUG_RSEQ) + /* do_rseq_syscall needs interrupts enabled. */ + enable_irq_notrace @ enable interrupts + mov r0, sp @ 'regs' + bl do_rseq_syscall +#endif disable_irq_notrace @ disable interrupts ENTRY(ret_to_user_from_irq) ldr r2, [tsk, #TI_ADDR_LIMIT] diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 1d7061a38922..be42c4f66a40 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -303,12 +303,10 @@ static void armv6pmu_enable_event(struct perf_event *event) } static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) +armv6pmu_handle_irq(struct arm_pmu *cpu_pmu) { unsigned long pmcr = armv6_pmcr_read(); struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 870b66c1e4ef..57f01e059f39 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -946,11 +946,10 @@ static void armv7pmu_disable_event(struct perf_event *event) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) +static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu) { u32 pmnc; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index fcf218da660e..88d1a76f5367 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -142,11 +142,10 @@ xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, } static irqreturn_t -xscale1pmu_handle_irq(int irq_num, void *dev) +xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu) { unsigned long pmnc; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; @@ -489,11 +488,10 @@ xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, } static irqreturn_t -xscale2pmu_handle_irq(int irq_num, void *dev) +xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu) { unsigned long pmnc, of_flags; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index bd8810d4acb3..f09e9d66d605 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -541,6 +541,12 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) int ret; /* + * Increment event counter and perform fixup for the pre-signal + * frame. + */ + rseq_signal_deliver(regs); + + /* * Set up the stack frame */ if (ksig->ka.sa.sa_flags & SA_SIGINFO) @@ -660,6 +666,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } else { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(regs); } } local_irq_disable(); @@ -703,3 +710,10 @@ asmlinkage void addr_limit_check_failed(void) { addr_limit_user_check(); } + +#ifdef CONFIG_DEBUG_RSEQ +asmlinkage void do_rseq_syscall(struct pt_regs *regs) +{ + rseq_syscall(regs); +} +#endif diff --git a/arch/arm/mach-s3c64xx/mach-crag6410-module.c b/arch/arm/mach-s3c64xx/mach-crag6410-module.c index f00988705408..5aa472892465 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410-module.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410-module.c @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include <linux/i2c.h> #include <linux/spi/spi.h> +#include <linux/gpio/machine.h> #include <linux/mfd/wm831x/irq.h> #include <linux/mfd/wm831x/gpio.h> @@ -206,9 +207,6 @@ static const struct i2c_board_info wm1277_devs[] = { }; static struct arizona_pdata wm5102_reva_pdata = { - .ldo1 = { - .ldoena = S3C64XX_GPN(7), - }, .gpio_base = CODEC_GPIO_BASE, .irq_flags = IRQF_TRIGGER_HIGH, .micd_pol_gpio = CODEC_GPIO_BASE + 4, @@ -237,10 +235,16 @@ static struct spi_board_info wm5102_reva_spi_devs[] = { }, }; -static struct arizona_pdata wm5102_pdata = { - .ldo1 = { - .ldoena = S3C64XX_GPN(7), +static struct gpiod_lookup_table wm5102_reva_gpiod_table = { + .dev_id = "spi0.1", /* SPI device name */ + .table = { + GPIO_LOOKUP("GPION", 7, + "wlf,ldoena", GPIO_ACTIVE_HIGH), + { }, }, +}; + +static struct arizona_pdata wm5102_pdata = { .gpio_base = CODEC_GPIO_BASE, .irq_flags = IRQF_TRIGGER_HIGH, .micd_pol_gpio = CODEC_GPIO_BASE + 2, @@ -264,6 +268,15 @@ static struct spi_board_info wm5102_spi_devs[] = { }, }; +static struct gpiod_lookup_table wm5102_gpiod_table = { + .dev_id = "spi0.1", /* SPI device name */ + .table = { + GPIO_LOOKUP("GPION", 7, + "wlf,ldo1ena", GPIO_ACTIVE_HIGH), + { }, + }, +}; + static struct spi_board_info wm5110_spi_devs[] = { [0] = { .modalias = "wm5110", @@ -366,6 +379,9 @@ static int wlf_gf_module_probe(struct i2c_client *i2c, rev == gf_mods[i].rev)) break; + gpiod_add_lookup_table(&wm5102_reva_gpiod_table); + gpiod_add_lookup_table(&wm5102_gpiod_table); + if (i < ARRAY_SIZE(gf_mods)) { dev_info(&i2c->dev, "%s revision %d\n", gf_mods[i].name, rev + 1); diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 0bb0e9c6376c..fbc74b5fa3ed 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -412,3 +412,4 @@ 395 common pkey_alloc sys_pkey_alloc 396 common pkey_free sys_pkey_free 397 common statx sys_statx +398 common rseq sys_rseq diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index ba7f4c8f5c3e..8073625371f5 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -89,6 +89,17 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); +/* Not used by XENFEAT_auto_translated guests. */ +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned int domid, struct page **pages) +{ + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); + static void xen_read_wallclock(struct timespec64 *ts) { u32 version; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b25ed7834f6c..9795b59aa28a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -7,16 +7,19 @@ config ARM64 select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI select ACPI_SPCR_TABLE if ACPI + select ACPI_PPTT if ACPI select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX @@ -922,6 +925,15 @@ config HARDEN_EL2_VECTORS If unsure, say Y. +config ARM64_SSBD + bool "Speculative Store Bypass Disable" if EXPERT + default y + help + This enables mitigation of the bypassing of previous stores + by speculative loads. + + If unsure, say Y. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT @@ -1033,6 +1045,7 @@ config ARM64_PAN config ARM64_LSE_ATOMICS bool "Atomic instructions" + default y help As part of the Large System Extensions, ARMv8.1 introduces new atomic instructions that are designed specifically to scale in @@ -1041,7 +1054,8 @@ config ARM64_LSE_ATOMICS Say Y here to make use of these instructions for the in-kernel atomic routines. This incurs a small overhead on CPUs that do not support these instructions and requires the kernel to be - built with binutils >= 2.25. + built with binutils >= 2.25 in order for the new instructions + to be used. config ARM64_VHE bool "Enable support for Virtualization Host Extensions (VHE)" diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index ecf613761e78..17ea72b1b389 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -78,7 +78,8 @@ CONFIG_PCIE_ARMADA_8K=y CONFIG_PCI_AARDVARK=y CONFIG_PCI_TEGRA=y CONFIG_PCIE_RCAR=y -CONFIG_PCIE_ROCKCHIP=m +CONFIG_PCIE_ROCKCHIP=y +CONFIG_PCIE_ROCKCHIP_HOST=m CONFIG_PCI_HOST_GENERIC=y CONFIG_PCI_XGENE=y CONFIG_PCI_HOST_THUNDER_PEM=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 32f465a80e4e..0db62a4cbce2 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -86,6 +86,10 @@ static inline bool acpi_has_cpu_in_madt(void) } struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu); +static inline u32 get_acpi_id_for_cpu(unsigned int cpu) +{ + return acpi_cpu_get_madt_gicc(cpu)->uid; +} static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 9bbffc7a301f..5df5cfe1c143 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -33,7 +33,7 @@ #define ICACHE_POLICY_VIPT 2 #define ICACHE_POLICY_PIPT 3 -#define L1_CACHE_SHIFT 7 +#define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* @@ -43,7 +43,7 @@ * cache before the transfer is done, causing old data to be seen by * the CPU. */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#define ARCH_DMA_MINALIGN (128) #ifndef __ASSEMBLY__ @@ -77,7 +77,7 @@ static inline u32 cache_type_cwg(void) static inline int cache_line_size(void) { u32 cwg = cache_type_cwg(); - return cwg ? 4 << cwg : L1_CACHE_BYTES; + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 4f5fd2a36e6e..3b0938281541 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -204,7 +204,9 @@ static inline void __cmpwait_case_##name(volatile void *ptr, \ unsigned long tmp; \ \ asm volatile( \ - " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " sevl\n" \ + " wfe\n" \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ " cbnz %" #w "[tmp], 1f\n" \ " wfe\n" \ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index bc51b72fafd4..8a699c708fc9 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -48,7 +48,8 @@ #define ARM64_HAS_CACHE_IDC 27 #define ARM64_HAS_CACHE_DIC 28 #define ARM64_HW_DBM 29 +#define ARM64_SSBD 30 -#define ARM64_NCAPS 30 +#define ARM64_NCAPS 31 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 09b0f2a80c8f..55bc1f073bfb 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -537,6 +537,28 @@ static inline u64 read_zcr_features(void) return zcr; } +#define ARM64_SSBD_UNKNOWN -1 +#define ARM64_SSBD_FORCE_DISABLE 0 +#define ARM64_SSBD_KERNEL 1 +#define ARM64_SSBD_FORCE_ENABLE 2 +#define ARM64_SSBD_MITIGATED 3 + +static inline int arm64_get_ssbd_state(void) +{ +#ifdef CONFIG_ARM64_SSBD + extern int ssbd_state; + return ssbd_state; +#else + return ARM64_SSBD_UNKNOWN; +#endif +} + +#ifdef CONFIG_ARM64_SSBD +void arm64_set_ssbd_mitigation(bool state); +#else +static inline void arm64_set_ssbd_mitigation(bool state) {} +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index fac1c4de7898..433b9554c6a1 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -121,6 +121,9 @@ #ifndef __ASSEMBLY__ +#include <linux/bug.h> +#include <asm/processor.h> /* for signal_minsigstksz, used by ARCH_DLINFO */ + typedef unsigned long elf_greg_t; #define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t)) @@ -148,6 +151,16 @@ typedef struct user_fpsimd_state elf_fpregset_t; do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (elf_addr_t)current->mm->context.vdso); \ + \ + /* \ + * Should always be nonzero unless there's a kernel bug. \ + * If we haven't determined a sensible value to give to \ + * userspace, omit the entry: \ + */ \ + if (likely(signal_minsigstksz)) \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, signal_minsigstksz); \ + else \ + NEW_AUX_ENT(AT_IGNORE, 0); \ } while (0) #define ARCH_HAS_SETUP_ADDITIONAL_PAGES diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index e050d765ca9e..46843515d77b 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -207,12 +207,14 @@ str w\nxtmp, [\xpfpsr, #4] .endm -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp +.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 mrs_s x\nxtmp, SYS_ZCR_EL1 - bic x\nxtmp, x\nxtmp, ZCR_ELx_LEN_MASK - orr x\nxtmp, x\nxtmp, \xvqminus1 - msr_s SYS_ZCR_EL1, x\nxtmp // self-synchronising - + bic \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, x\nxtmp + b.eq 921f + msr_s SYS_ZCR_EL1, \xtmp2 // self-synchronising +921: _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 _sve_ldr_p 0, \nxbase _sve_wrffr 0 diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index f6648a3e4152..951b2076a5e2 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -20,6 +20,9 @@ #include <asm/virt.h> +#define VCPU_WORKAROUND_2_FLAG_SHIFT 0 +#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT) + #define ARM_EXIT_WITH_SERROR_BIT 31 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) #define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT)) @@ -71,14 +74,37 @@ extern u32 __kvm_get_mdcr_el2(void); extern u32 __init_stage2_translation(void); +/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ +#define __hyp_this_cpu_ptr(sym) \ + ({ \ + void *__ptr = hyp_symbol_addr(sym); \ + __ptr += read_sysreg(tpidr_el2); \ + (typeof(&sym))__ptr; \ + }) + +#define __hyp_this_cpu_read(sym) \ + ({ \ + *__hyp_this_cpu_ptr(sym); \ + }) + #else /* __ASSEMBLY__ */ -.macro get_host_ctxt reg, tmp - adr_l \reg, kvm_host_cpu_state +.macro hyp_adr_this_cpu reg, sym, tmp + adr_l \reg, \sym mrs \tmp, tpidr_el2 add \reg, \reg, \tmp .endm +.macro hyp_ldr_this_cpu reg, sym, tmp + adr_l \reg, \sym + mrs \tmp, tpidr_el2 + ldr \reg, [\reg, \tmp] +.endm + +.macro get_host_ctxt reg, tmp + hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp +.endm + .macro get_vcpu_ptr vcpu, ctxt get_host_ctxt \ctxt, \vcpu ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 469de8acd06f..95d8a0e15b5f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -216,6 +216,9 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; + /* State of various workarounds, see kvm_asm.h for bit assignment */ + u64 workaround_flags; + /* Guest debug state */ u64 debug_flags; @@ -452,6 +455,29 @@ static inline bool kvm_arm_harden_branch_predictor(void) return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); } +#define KVM_SSBD_UNKNOWN -1 +#define KVM_SSBD_FORCE_DISABLE 0 +#define KVM_SSBD_KERNEL 1 +#define KVM_SSBD_FORCE_ENABLE 2 +#define KVM_SSBD_MITIGATED 3 + +static inline int kvm_arm_have_ssbd(void) +{ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_FORCE_DISABLE: + return KVM_SSBD_FORCE_DISABLE; + case ARM64_SSBD_KERNEL: + return KVM_SSBD_KERNEL; + case ARM64_SSBD_FORCE_ENABLE: + return KVM_SSBD_FORCE_ENABLE; + case ARM64_SSBD_MITIGATED: + return KVM_SSBD_MITIGATED; + case ARM64_SSBD_UNKNOWN: + default: + return KVM_SSBD_UNKNOWN; + } +} + void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 6128992c2ded..fb9a7127bb75 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -72,7 +72,6 @@ #ifdef __ASSEMBLY__ #include <asm/alternative.h> -#include <asm/cpufeature.h> /* * Convert a kernel VA into a HYP VA. @@ -473,6 +472,30 @@ static inline int kvm_map_vectors(void) } #endif +#ifdef CONFIG_ARM64_SSBD +DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); + +static inline int hyp_map_aux_data(void) +{ + int cpu, err; + + for_each_possible_cpu(cpu) { + u64 *ptr; + + ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu); + err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP); + if (err) + return err; + } + return 0; +} +#else +static inline int hyp_map_aux_data(void) +{ + return 0; +} +#endif + #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7c4c8f318ba9..9f82d6b53851 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -306,8 +306,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#define __HAVE_ARCH_PTE_SPECIAL - static inline pte_t pgd_pte(pgd_t pgd) { return __pte(pgd_val(pgd)); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 767598932549..65ab83e8926e 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -35,6 +35,8 @@ #ifdef __KERNEL__ #include <linux/build_bug.h> +#include <linux/cache.h> +#include <linux/init.h> #include <linux/stddef.h> #include <linux/string.h> @@ -244,6 +246,9 @@ void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); +extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ +extern void __init minsigstksz_setup(void); + /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 740aa03c5f0d..cbcf11b5e637 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -94,6 +94,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_32BIT 22 /* 32bit process */ #define TIF_SVE 23 /* Scalable Vector Extension in use */ #define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */ +#define TIF_SSBD 25 /* Wants SSB mitigation */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index c4f2d50491eb..df48212f767b 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -7,14 +7,16 @@ struct cpu_topology { int thread_id; int core_id; - int cluster_id; + int package_id; + int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; + cpumask_t llc_siblings; }; extern struct cpu_topology cpu_topology[NR_CPUS]; -#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) +#define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h index ec0a86d484e1..743c0b84fd30 100644 --- a/arch/arm64/include/uapi/asm/auxvec.h +++ b/arch/arm64/include/uapi/asm/auxvec.h @@ -19,7 +19,8 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +#define AT_MINSIGSTKSZ 51 /* stack needed for signal delivery */ -#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */ +#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index bf825f38d206..0025f8691046 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -54,6 +54,7 @@ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o +arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 6e47fc3ab549..97d45d5151d4 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -13,6 +13,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/sysctl.h> +#include <linux/uaccess.h> #include <asm/cpufeature.h> #include <asm/insn.h> @@ -20,8 +21,6 @@ #include <asm/system_misc.h> #include <asm/traps.h> #include <asm/kprobes.h> -#include <linux/uaccess.h> -#include <asm/cpufeature.h> #define CREATE_TRACE_POINTS #include "trace-events-emulation.h" diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5bdda651bd05..323aeb5f2fe6 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -136,6 +136,7 @@ int main(void) #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); + DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 380f2e2fbed5..0bf0a835122f 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/acpi.h> #include <linux/cacheinfo.h> #include <linux/of.h> @@ -46,7 +47,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, static int __init_cache_level(unsigned int cpu) { - unsigned int ctype, level, leaves, of_level; + unsigned int ctype, level, leaves, fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { @@ -59,15 +60,19 @@ static int __init_cache_level(unsigned int cpu) leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; } - of_level = of_find_last_cache_level(cpu); - if (level < of_level) { + if (acpi_disabled) + fw_level = of_find_last_cache_level(cpu); + else + fw_level = acpi_find_last_cache_level(cpu); + + if (level < fw_level) { /* * some external caches not specified in CLIDR_EL1 * the information may be available in the device tree * only unified external caches are considered here */ - leaves += (of_level - level); - level = of_level; + leaves += (fw_level - level); + level = fw_level; } this_cpu_ci->num_levels = level; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e4a1182deff7..1d2b6d768efe 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -16,6 +16,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/arm-smccc.h> +#include <linux/psci.h> #include <linux/types.h> #include <asm/cpu.h> #include <asm/cputype.h> @@ -232,6 +234,178 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ +#ifdef CONFIG_ARM64_SSBD +DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); + +int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; + +static const struct ssbd_options { + const char *str; + int state; +} ssbd_options[] = { + { "force-on", ARM64_SSBD_FORCE_ENABLE, }, + { "force-off", ARM64_SSBD_FORCE_DISABLE, }, + { "kernel", ARM64_SSBD_KERNEL, }, +}; + +static int __init ssbd_cfg(char *buf) +{ + int i; + + if (!buf || !buf[0]) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) { + int len = strlen(ssbd_options[i].str); + + if (strncmp(buf, ssbd_options[i].str, len)) + continue; + + ssbd_state = ssbd_options[i].state; + return 0; + } + + return -EINVAL; +} +early_param("ssbd", ssbd_cfg); + +void __init arm64_update_smccc_conduit(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, + int nr_inst) +{ + u32 insn; + + BUG_ON(nr_inst != 1); + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + insn = aarch64_insn_get_hvc_value(); + break; + case PSCI_CONDUIT_SMC: + insn = aarch64_insn_get_smc_value(); + break; + default: + return; + } + + *updptr = cpu_to_le32(insn); +} + +void __init arm64_enable_wa2_handling(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, + int nr_inst) +{ + BUG_ON(nr_inst != 1); + /* + * Only allow mitigation on EL1 entry/exit and guest + * ARCH_WORKAROUND_2 handling if the SSBD state allows it to + * be flipped. + */ + if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + +void arm64_set_ssbd_mitigation(bool state) +{ + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); + break; + + default: + WARN_ON_ONCE(1); + break; + } +} + +static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, + int scope) +{ + struct arm_smccc_res res; + bool required = true; + s32 val; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; + } + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + break; + + default: + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; + } + + val = (s32)res.a0; + + switch (val) { + case SMCCC_RET_NOT_SUPPORTED: + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; + + case SMCCC_RET_NOT_REQUIRED: + pr_info_once("%s mitigation not required\n", entry->desc); + ssbd_state = ARM64_SSBD_MITIGATED; + return false; + + case SMCCC_RET_SUCCESS: + required = true; + break; + + case 1: /* Mitigation not required on this CPU */ + required = false; + break; + + default: + WARN_ON(1); + return false; + } + + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + arm64_set_ssbd_mitigation(false); + required = false; + break; + + case ARM64_SSBD_KERNEL: + if (required) { + __this_cpu_write(arm64_ssbd_callback_required, 1); + arm64_set_ssbd_mitigation(true); + } + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); + arm64_set_ssbd_mitigation(true); + required = true; + break; + + default: + WARN_ON(1); + break; + } + + return required; +} +#endif /* CONFIG_ARM64_SSBD */ + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) @@ -488,6 +662,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), }, #endif +#ifdef CONFIG_ARM64_SSBD + { + .desc = "Speculative Store Bypass Disable", + .capability = ARM64_SSBD, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_ssbd_mitigation, + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9d1b06d67c53..d2856b129097 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1606,7 +1606,6 @@ static void __init setup_system_capabilities(void) void __init setup_cpu_features(void) { u32 cwg; - int cls; setup_system_capabilities(); mark_const_caps_ready(); @@ -1619,6 +1618,7 @@ void __init setup_cpu_features(void) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); sve_setup(); + minsigstksz_setup(); /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); @@ -1627,13 +1627,9 @@ void __init setup_cpu_features(void) * Check for sane CTR_EL0.CWG value. */ cwg = cache_type_cwg(); - cls = cache_line_size(); if (!cwg) - pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", - cls); - if (L1_CACHE_BYTES < cls) - pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", - L1_CACHE_BYTES, cls); + pr_warn("No Cache Writeback Granule information, assuming %d\n", + ARCH_DMA_MINALIGN); } static bool __maybe_unused diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 73f17bffcd23..12d4958e6429 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -49,7 +49,7 @@ ENTRY(sve_save_state) ENDPROC(sve_save_state) ENTRY(sve_load_state) - sve_load 0, x1, x2, 3 + sve_load 0, x1, x2, 3, x4 ret ENDPROC(sve_load_state) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ec2ee720e33e..28ad8799406f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -18,6 +18,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/arm-smccc.h> #include <linux/init.h> #include <linux/linkage.h> @@ -137,6 +138,25 @@ alternative_else_nop_endif add \dst, \dst, #(\sym - .entry.tramp.text) .endm + // This macro corrupts x0-x3. It is the caller's duty + // to save/restore them if required. + .macro apply_ssbd, state, targ, tmp1, tmp2 +#ifdef CONFIG_ARM64_SSBD +alternative_cb arm64_enable_wa2_handling + b \targ +alternative_cb_end + ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 + cbz \tmp2, \targ + ldr \tmp2, [tsk, #TSK_TI_FLAGS] + tbnz \tmp2, #TIF_SSBD, \targ + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 + mov w1, #\state +alternative_cb arm64_update_smccc_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end +#endif + .endm + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -163,6 +183,14 @@ alternative_else_nop_endif ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. + apply_ssbd 1, 1f, x22, x23 + +#ifdef CONFIG_ARM64_SSBD + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] +#endif +1: + mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE @@ -303,6 +331,8 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: + apply_ssbd 0, 5f, x0, x1 +5: .endif msr elr_el1, x21 // set up the return data diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4bcdd0318729..3b527ae46e49 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -31,7 +31,6 @@ #include <linux/percpu.h> #include <linux/prctl.h> #include <linux/preempt.h> -#include <linux/prctl.h> #include <linux/ptrace.h> #include <linux/sched/signal.h> #include <linux/sched/task_stack.h> @@ -129,7 +128,7 @@ static int sve_default_vl = -1; #ifdef CONFIG_ARM64_SVE /* Maximum supported vector length across all CPUs (initially poisoned) */ -int __ro_after_init sve_max_vl = -1; +int __ro_after_init sve_max_vl = SVE_VL_MIN; /* Set of available vector lengths, as vq_to_bit(vq): */ static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); static void __percpu *efi_sve_state; @@ -360,22 +359,13 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, return ret; /* Writing -1 has the special meaning "set to max": */ - if (vl == -1) { - /* Fail safe if sve_max_vl wasn't initialised */ - if (WARN_ON(!sve_vl_valid(sve_max_vl))) - vl = SVE_VL_MIN; - else - vl = sve_max_vl; - - goto chosen; - } + if (vl == -1) + vl = sve_max_vl; if (!sve_vl_valid(vl)) return -EINVAL; - vl = find_supported_vector_length(vl); -chosen: - sve_default_vl = vl; + sve_default_vl = find_supported_vector_length(vl); return 0; } diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 1ec5f28c39fc..6b2686d54411 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -313,6 +313,17 @@ int swsusp_arch_suspend(void) sleep_cpu = -EINVAL; __cpu_suspend_exit(); + + /* + * Just in case the boot kernel did turn the SSBD + * mitigation off behind our back, let's set the state + * to what we expect it to be. + */ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_FORCE_ENABLE: + case ARM64_SSBD_KERNEL: + arm64_set_ssbd_mitigation(true); + } } local_daif_restore(flags); diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 85a251b6dfa8..33147aacdafd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -670,11 +670,10 @@ static void armv8pmu_disable_event(struct perf_event *event) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) +static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) { u32 pmovsr; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 7ff81fed46e1..bd732644c2f6 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -766,9 +766,6 @@ static void sve_init_header_from_task(struct user_sve_header *header, vq = sve_vq_from_vl(header->vl); header->max_vl = sve_max_vl; - if (WARN_ON(!sve_vl_valid(sve_max_vl))) - header->max_vl = header->vl; - header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); @@ -1046,8 +1043,6 @@ static const struct user_regset_view user_aarch64_view = { }; #ifdef CONFIG_COMPAT -#include <linux/compat.h> - enum compat_regset { REGSET_COMPAT_GPR, REGSET_COMPAT_VFP, diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 154b7d30145d..511af13e8d8f 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cache.h> #include <linux/compat.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -570,8 +571,15 @@ badframe: return 0; } -/* Determine the layout of optional records in the signal frame */ -static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) +/* + * Determine the layout of optional records in the signal frame + * + * add_all: if true, lays out the biggest possible signal frame for + * this task; otherwise, generates a layout for the current state + * of the task. + */ +static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, + bool add_all) { int err; @@ -581,7 +589,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) return err; /* fault information, if valid */ - if (current->thread.fault_code) { + if (add_all || current->thread.fault_code) { err = sigframe_alloc(user, &user->esr_offset, sizeof(struct esr_context)); if (err) @@ -591,8 +599,14 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) if (system_supports_sve()) { unsigned int vq = 0; - if (test_thread_flag(TIF_SVE)) - vq = sve_vq_from_vl(current->thread.sve_vl); + if (add_all || test_thread_flag(TIF_SVE)) { + int vl = sve_max_vl; + + if (!add_all) + vl = current->thread.sve_vl; + + vq = sve_vq_from_vl(vl); + } err = sigframe_alloc(user, &user->sve_offset, SVE_SIG_CONTEXT_SIZE(vq)); @@ -603,7 +617,6 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) return sigframe_alloc_end(user); } - static int setup_sigframe(struct rt_sigframe_user_layout *user, struct pt_regs *regs, sigset_t *set) { @@ -701,7 +714,7 @@ static int get_sigframe(struct rt_sigframe_user_layout *user, int err; init_user_layout(user); - err = setup_sigframe_layout(user); + err = setup_sigframe_layout(user, false); if (err) return err; @@ -830,11 +843,12 @@ static void do_signal(struct pt_regs *regs) unsigned long continue_addr = 0, restart_addr = 0; int retval = 0; struct ksignal ksig; + bool syscall = in_syscall(regs); /* * If we were from a system call, check for system call restarting... */ - if (in_syscall(regs)) { + if (syscall) { continue_addr = regs->pc; restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4); retval = regs->regs[0]; @@ -886,7 +900,7 @@ static void do_signal(struct pt_regs *regs) * Handle restarting a different system call. As above, if a debugger * has chosen to restart at a different PC, ignore the restart. */ - if (in_syscall(regs) && regs->pc == restart_addr) { + if (syscall && regs->pc == restart_addr) { if (retval == -ERESTART_RESTARTBLOCK) setup_restart_syscall(regs); user_rewind_single_step(current); @@ -936,3 +950,28 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, thread_flags = READ_ONCE(current_thread_info()->flags); } while (thread_flags & _TIF_WORK_MASK); } + +unsigned long __ro_after_init signal_minsigstksz; + +/* + * Determine the stack space required for guaranteed signal devliery. + * This function is used to populate AT_MINSIGSTKSZ at process startup. + * cpufeatures setup is assumed to be complete. + */ +void __init minsigstksz_setup(void) +{ + struct rt_sigframe_user_layout user; + + init_user_layout(&user); + + /* + * If this fails, SIGFRAME_MAXSZ needs to be enlarged. It won't + * be big enough, but it's our best guess: + */ + if (WARN_ON(setup_sigframe_layout(&user, true))) + return; + + signal_minsigstksz = sigframe_size(&user) + + round_up(sizeof(struct frame_record), 16) + + 16; /* max alignment padding */ +} diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c new file mode 100644 index 000000000000..3432e5ef9f41 --- /dev/null +++ b/arch/arm64/kernel/ssbd.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 ARM Ltd, All Rights Reserved. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/thread_info.h> + +#include <asm/cpufeature.h> + +/* + * prctl interface for SSBD + * FIXME: Drop the below ifdefery once merged in 4.18. + */ +#ifdef PR_SPEC_STORE_BYPASS +static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + int state = arm64_get_ssbd_state(); + + /* Unsupported */ + if (state == ARM64_SSBD_UNKNOWN) + return -EINVAL; + + /* Treat the unaffected/mitigated state separately */ + if (state == ARM64_SSBD_MITIGATED) { + switch (ctrl) { + case PR_SPEC_ENABLE: + return -EPERM; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + return 0; + } + } + + /* + * Things are a bit backward here: the arm64 internal API + * *enables the mitigation* when the userspace API *disables + * speculation*. So much fun. + */ + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (state == ARM64_SSBD_FORCE_ENABLE || + task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + clear_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_DISABLE: + if (state == ARM64_SSBD_FORCE_DISABLE) + return -EPERM; + task_set_spec_ssb_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_FORCE_DISABLE: + if (state == ARM64_SSBD_FORCE_DISABLE) + return -EPERM; + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); + break; + default: + return -ERANGE; + } + + return 0; +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +static int ssbd_prctl_get(struct task_struct *task) +{ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_UNKNOWN: + return -EINVAL; + case ARM64_SSBD_FORCE_ENABLE: + return PR_SPEC_DISABLE; + case ARM64_SSBD_KERNEL: + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case ARM64_SSBD_FORCE_DISABLE: + return PR_SPEC_ENABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_get(task); + default: + return -ENODEV; + } +} +#endif /* PR_SPEC_STORE_BYPASS */ diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index a307b9e13392..70c283368b64 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -62,6 +62,14 @@ void notrace __cpu_suspend_exit(void) */ if (hw_breakpoint_restore) hw_breakpoint_restore(cpu); + + /* + * On resume, firmware implementing dynamic mitigation will + * have turned the mitigation on. If the user has forcefully + * disabled it, make sure their wishes are obeyed. + */ + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + arm64_set_ssbd_mitigation(false); } /* diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 21868530018e..f845a8617812 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -11,7 +11,9 @@ * for more details. */ +#include <linux/acpi.h> #include <linux/arch_topology.h> +#include <linux/cacheinfo.h> #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/init.h> @@ -22,6 +24,7 @@ #include <linux/sched.h> #include <linux/sched/topology.h> #include <linux/slab.h> +#include <linux/smp.h> #include <linux/string.h> #include <asm/cpu.h> @@ -47,7 +50,7 @@ static int __init get_cpu_for_node(struct device_node *node) return cpu; } -static int __init parse_core(struct device_node *core, int cluster_id, +static int __init parse_core(struct device_node *core, int package_id, int core_id) { char name[10]; @@ -63,7 +66,7 @@ static int __init parse_core(struct device_node *core, int cluster_id, leaf = false; cpu = get_cpu_for_node(t); if (cpu >= 0) { - cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].package_id = package_id; cpu_topology[cpu].core_id = core_id; cpu_topology[cpu].thread_id = i; } else { @@ -85,7 +88,7 @@ static int __init parse_core(struct device_node *core, int cluster_id, return -EINVAL; } - cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].package_id = package_id; cpu_topology[cpu].core_id = core_id; } else if (leaf) { pr_err("%pOF: Can't get CPU for leaf core\n", core); @@ -101,7 +104,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) bool leaf = true; bool has_cores = false; struct device_node *c; - static int cluster_id __initdata; + static int package_id __initdata; int core_id = 0; int i, ret; @@ -140,7 +143,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, cluster_id, core_id++); + ret = parse_core(c, package_id, core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -158,7 +161,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) pr_warn("%pOF: empty cluster\n", cluster); if (leaf) - cluster_id++; + package_id++; return 0; } @@ -194,7 +197,7 @@ static int __init parse_dt_topology(void) * only mark cores described in the DT as possible. */ for_each_possible_cpu(cpu) - if (cpu_topology[cpu].cluster_id == -1) + if (cpu_topology[cpu].package_id == -1) ret = -EINVAL; out_map: @@ -212,7 +215,14 @@ EXPORT_SYMBOL_GPL(cpu_topology); const struct cpumask *cpu_coregroup_mask(int cpu) { - return &cpu_topology[cpu].core_sibling; + const cpumask_t *core_mask = &cpu_topology[cpu].core_sibling; + + if (cpu_topology[cpu].llc_id != -1) { + if (cpumask_subset(&cpu_topology[cpu].llc_siblings, core_mask)) + core_mask = &cpu_topology[cpu].llc_siblings; + } + + return core_mask; } static void update_siblings_masks(unsigned int cpuid) @@ -224,7 +234,12 @@ static void update_siblings_masks(unsigned int cpuid) for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + if (cpuid_topo->llc_id == cpu_topo->llc_id) { + cpumask_set_cpu(cpu, &cpuid_topo->llc_siblings); + cpumask_set_cpu(cpuid, &cpu_topo->llc_siblings); + } + + if (cpuid_topo->package_id != cpu_topo->package_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); @@ -245,7 +260,7 @@ void store_cpu_topology(unsigned int cpuid) struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; u64 mpidr; - if (cpuid_topo->cluster_id != -1) + if (cpuid_topo->package_id != -1) goto topology_populated; mpidr = read_cpuid_mpidr(); @@ -259,19 +274,19 @@ void store_cpu_topology(unsigned int cpuid) /* Multiprocessor system : Multi-threads per core */ cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | + cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; } else { /* Multiprocessor system : Single-thread per core */ cpuid_topo->thread_id = -1; cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | + cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; } pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", - cpuid, cpuid_topo->cluster_id, cpuid_topo->core_id, + cpuid, cpuid_topo->package_id, cpuid_topo->core_id, cpuid_topo->thread_id, mpidr); topology_populated: @@ -287,7 +302,11 @@ static void __init reset_cpu_topology(void) cpu_topo->thread_id = -1; cpu_topo->core_id = 0; - cpu_topo->cluster_id = -1; + cpu_topo->package_id = -1; + + cpu_topo->llc_id = -1; + cpumask_clear(&cpu_topo->llc_siblings); + cpumask_set_cpu(cpu, &cpu_topo->llc_siblings); cpumask_clear(&cpu_topo->core_sibling); cpumask_set_cpu(cpu, &cpu_topo->core_sibling); @@ -296,6 +315,59 @@ static void __init reset_cpu_topology(void) } } +#ifdef CONFIG_ACPI +/* + * Propagate the topology information of the processor_topology_node tree to the + * cpu_topology array. + */ +static int __init parse_acpi_topology(void) +{ + bool is_threaded; + int cpu, topology_id; + + is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK; + + for_each_possible_cpu(cpu) { + int i, cache_id; + + topology_id = find_acpi_cpu_topology(cpu, 0); + if (topology_id < 0) + return topology_id; + + if (is_threaded) { + cpu_topology[cpu].thread_id = topology_id; + topology_id = find_acpi_cpu_topology(cpu, 1); + cpu_topology[cpu].core_id = topology_id; + } else { + cpu_topology[cpu].thread_id = -1; + cpu_topology[cpu].core_id = topology_id; + } + topology_id = find_acpi_cpu_topology_package(cpu); + cpu_topology[cpu].package_id = topology_id; + + i = acpi_find_last_cache_level(cpu); + + if (i > 0) { + /* + * this is the only part of cpu_topology that has + * a direct relationship with the cache topology + */ + cache_id = find_acpi_cpu_cache_topology(cpu, i); + if (cache_id > 0) + cpu_topology[cpu].llc_id = cache_id; + } + } + + return 0; +} + +#else +static inline int __init parse_acpi_topology(void) +{ + return -EINVAL; +} +#endif + void __init init_cpu_topology(void) { reset_cpu_topology(); @@ -304,6 +376,8 @@ void __init init_cpu_topology(void) * Discard anything that was parsed if we hit an error so we * don't use partial information. */ - if (of_have_populated_dt() && parse_dt_topology()) + if (!acpi_disabled && parse_acpi_topology()) + reset_cpu_topology(); + else if (of_have_populated_dt() && parse_dt_topology()) reset_cpu_topology(); } diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 0221aca6493d..605d1b60469c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -34,25 +34,25 @@ jiffies = jiffies_64; * 4 KB (see related ASSERT() below) \ */ \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + __hyp_idmap_text_start = .; \ *(.hyp.idmap.text) \ - VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ - VMLINUX_SYMBOL(__hyp_text_start) = .; \ + __hyp_idmap_text_end = .; \ + __hyp_text_start = .; \ *(.hyp.text) \ - VMLINUX_SYMBOL(__hyp_text_end) = .; + __hyp_text_end = .; #define IDMAP_TEXT \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__idmap_text_start) = .; \ + __idmap_text_start = .; \ *(.idmap.text) \ - VMLINUX_SYMBOL(__idmap_text_end) = .; + __idmap_text_end = .; #ifdef CONFIG_HIBERNATION #define HIBERNATE_TEXT \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + __hibernate_exit_text_start = .; \ *(.hibernate_exit.text) \ - VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; + __hibernate_exit_text_end = .; #else #define HIBERNATE_TEXT #endif @@ -60,10 +60,10 @@ jiffies = jiffies_64; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ - VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \ + __entry_tramp_text_start = .; \ *(.entry.tramp.text) \ . = ALIGN(PAGE_SIZE); \ - VMLINUX_SYMBOL(__entry_tramp_text_end) = .; + __entry_tramp_text_end = .; #else #define TRAMP_TEXT #endif diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index bffece27b5c1..05d836979032 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -106,8 +106,44 @@ el1_hvc_guest: */ ldr x1, [sp] // Guest's x0 eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1 + cbz w1, wa_epilogue + + /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ + eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ + ARM_SMCCC_ARCH_WORKAROUND_2) cbnz w1, el1_trap - mov x0, x1 + +#ifdef CONFIG_ARM64_SSBD +alternative_cb arm64_enable_wa2_handling + b wa2_end +alternative_cb_end + get_vcpu_ptr x2, x0 + ldr x0, [x2, #VCPU_WORKAROUND_FLAGS] + + // Sanitize the argument and update the guest flags + ldr x1, [sp, #8] // Guest's x1 + clz w1, w1 // Murphy's device: + lsr w1, w1, #5 // w1 = !!w1 without using + eor w1, w1, #1 // the flags... + bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1 + str x0, [x2, #VCPU_WORKAROUND_FLAGS] + + /* Check that we actually need to perform the call */ + hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2 + cbz x0, wa2_end + + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 + smc #0 + + /* Don't leak data from the SMC call */ + mov x3, xzr +wa2_end: + mov x2, xzr + mov x1, xzr +#endif + +wa_epilogue: + mov x0, xzr add sp, sp, #16 eret diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index d9645236e474..c50cedc447f1 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -15,6 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/arm-smccc.h> #include <linux/types.h> #include <linux/jump_label.h> #include <uapi/linux/psci.h> @@ -389,6 +390,39 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } +static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_const_cap(ARM64_SSBD)) + return false; + + return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); +} + +static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * The host runs with the workaround always present. If the + * guest wants it disabled, so be it... + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); +#endif +} + +static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * If the guest has disabled the workaround, bring it back on. + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); +#endif +} + /* Switch to the guest for VHE systems running in EL2 */ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { @@ -409,6 +443,8 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); + __set_guest_arch_workaround_state(vcpu); + do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu, host_ctxt); @@ -416,6 +452,8 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); + __set_host_arch_workaround_state(vcpu); + fp_enabled = fpsimd_enabled_vhe(); sysreg_save_guest_state_vhe(guest_ctxt); @@ -465,6 +503,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(guest_ctxt); __debug_switch_to_guest(vcpu); + __set_guest_arch_workaround_state(vcpu); + do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu, host_ctxt); @@ -472,6 +512,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); + __set_host_arch_workaround_state(vcpu); + fp_enabled = __fpsimd_enabled_nvhe(); __sysreg_save_state_nvhe(guest_ctxt); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 3256b9228e75..a74311beda35 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -122,6 +122,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset PMU */ kvm_pmu_vcpu_reset(vcpu); + /* Default workaround setup is enabled (if supported) */ + if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) + vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; + /* Reset timer */ return kvm_timer_vcpu_reset(vcpu); } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index db01f2709842..49e217ac7e1e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -504,6 +504,11 @@ static int __init arm64_dma_init(void) max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb = 1; + WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), + TAINT_CPU_OUT_OF_SPEC, + "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + ARCH_DMA_MINALIGN, cache_line_size()); + return atomic_pool_init(); } arch_initcall(arm64_dma_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 576f15153080..b8eecc7b9531 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -235,8 +235,9 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, - unsigned long addr) +static inline bool is_el1_permission_fault(unsigned int esr, + struct pt_regs *regs, + unsigned long addr) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -254,6 +255,22 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, return false; } +static void die_kernel_fault(const char *msg, unsigned long addr, + unsigned int esr, struct pt_regs *regs) +{ + bust_spinlocks(1); + + pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg, + addr); + + mem_abort_decode(esr); + + show_pte(addr); + die("Oops", regs, esr); + bust_spinlocks(0); + do_exit(SIGKILL); +} + static void __do_kernel_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -266,9 +283,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; - bust_spinlocks(1); - - if (is_permission_fault(esr, regs, addr)) { + if (is_el1_permission_fault(esr, regs, addr)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; else @@ -279,15 +294,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, msg = "paging request"; } - pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, - addr); - - mem_abort_decode(esr); - - show_pte(addr); - die("Oops", regs, esr); - bust_spinlocks(0); - do_exit(SIGKILL); + die_kernel_fault(msg, addr, esr, regs); } static void __do_user_fault(struct siginfo *info, unsigned int esr) @@ -447,16 +454,19 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < TASK_SIZE && is_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) - die("Accessing user space memory with fs=KERNEL_DS", regs, esr); + die_kernel_fault("access to user memory with fs=KERNEL_DS", + addr, esr, regs); if (is_el1_instruction_abort(esr)) - die("Attempting to execute userspace memory", regs, esr); + die_kernel_fault("execution of user memory", + addr, esr, regs); if (!search_exception_tables(regs->pc)) - die("Accessing user space memory outside uaccess.h routines", regs, esr); + die_kernel_fault("access to user memory outside uaccess routines", + addr, esr, regs); } perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index 66cf3a5a2f83..859c19828dd4 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -61,10 +61,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, #define HAVE_PCI_LEGACY 1 -extern void pcibios_claim_one_bus(struct pci_bus *b); - -extern void pcibios_finish_adding_to_bus(struct pci_bus *bus); - extern void pcibios_resource_survey(void); struct file; diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 161f9758c631..f34346d56095 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -915,67 +915,6 @@ void __init pcibios_resource_survey(void) pci_assign_unassigned_resources(); } -/* This is used by the PCI hotplug driver to allocate resource - * of newly plugged busses. We can try to consolidate with the - * rest of the code later, for now, keep it as-is as our main - * resource allocation function doesn't deal with sub-trees yet. - */ -void pcibios_claim_one_bus(struct pci_bus *bus) -{ - struct pci_dev *dev; - struct pci_bus *child_bus; - - list_for_each_entry(dev, &bus->devices, bus_list) { - int i; - - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - struct resource *r = &dev->resource[i]; - - if (r->parent || !r->start || !r->flags) - continue; - - pr_debug("PCI: Claiming %s: ", pci_name(dev)); - pr_debug("Resource %d: %016llx..%016llx [%x]\n", - i, (unsigned long long)r->start, - (unsigned long long)r->end, - (unsigned int)r->flags); - - if (pci_claim_resource(dev, i) == 0) - continue; - - pci_claim_bridge_resource(dev, i); - } - } - - list_for_each_entry(child_bus, &bus->children, node) - pcibios_claim_one_bus(child_bus); -} -EXPORT_SYMBOL_GPL(pcibios_claim_one_bus); - - -/* pcibios_finish_adding_to_bus - * - * This is to be called by the hotplug code after devices have been - * added to a bus, this include calling it for a PHB that is just - * being added - */ -void pcibios_finish_adding_to_bus(struct pci_bus *bus) -{ - pr_debug("PCI: Finishing adding to hotplug bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); - - /* Allocate bus and devices resources */ - pcibios_allocate_bus_resources(bus); - pcibios_claim_one_bus(bus); - - /* Add new devices to global lists. Register in proc, sysfs. */ - pci_bus_add_devices(bus); - - /* Fixup EEH */ - /* eeh_add_device_tree_late(bus); */ -} -EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); - static void pcibios_setup_phb_resources(struct pci_controller *hose, struct list_head *resources) { diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 0c65c38e05d6..f1e92bf743c2 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -263,9 +263,8 @@ static int pcibios_enable_resources(struct pci_dev *dev, int mask) (!(r->flags & IORESOURCE_ROM_ENABLE))) continue; if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available " - "because of resource collisions\n", - pci_name(dev)); + pci_err(dev, + "can't enable device: resource collisions\n"); return -EINVAL; } if (r->flags & IORESOURCE_IO) @@ -274,8 +273,7 @@ static int pcibios_enable_resources(struct pci_dev *dev, int mask) cmd |= PCI_COMMAND_MEMORY; } if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); + pci_info(dev, "enabling device (%04x -> %04x)\n", old_cmd, cmd); pci_write_config_word(dev, PCI_COMMAND, cmd); } return 0; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 076fe3094856..eaba5920234d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -135,6 +135,7 @@ config PPC select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if PPC64 + select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select ARCH_HAS_SG_CHAIN @@ -219,6 +220,7 @@ config PPC select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_RSEQ select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 42fe7c2ff2df..63cee159022b 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -335,9 +335,6 @@ extern unsigned long pci_io_base; /* Advertise special mapping type for AGP */ #define HAVE_PAGE_AGP -/* Advertise support for _PAGE_SPECIAL */ -#define __HAVE_ARCH_PTE_SPECIAL - #ifndef __ASSEMBLY__ /* diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 050b0d775324..bef56141a549 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -208,9 +208,6 @@ static inline bool pte_user(pte_t pte) #define PAGE_AGP (PAGE_KERNEL_NC) #define HAVE_PAGE_AGP -/* Advertise support for _PAGE_SPECIAL */ -#define __HAVE_ARCH_PTE_SPECIAL - #ifndef _PAGE_READ /* if not defined, we should not find _PAGE_WRITE too */ #define _PAGE_READ 0 diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 79a3b47e4839..cfcf6a874cfa 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -392,3 +392,4 @@ SYSCALL(statx) SYSCALL(pkey_alloc) SYSCALL(pkey_free) SYSCALL(pkey_mprotect) +SYSCALL(rseq) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index daf1ba97a00c..1e9708632dce 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 387 +#define NR_syscalls 388 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 389c36fd8299..ac5ba55066dd 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -398,5 +398,6 @@ #define __NR_pkey_alloc 384 #define __NR_pkey_free 385 #define __NR_pkey_mprotect 386 +#define __NR_rseq 387 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index eb8d01bae8c6..973577f2141c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -365,6 +365,13 @@ syscall_dotrace_cont: blrl /* Call handler */ .globl ret_from_syscall ret_from_syscall: +#ifdef CONFIG_DEBUG_RSEQ + /* Check whether the syscall is issued inside a restartable sequence */ + stw r3,GPR3(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl rseq_syscall + lwz r3,GPR3(r1) +#endif mr r6,r3 CURRENT_THREAD_INFO(r12, r1) /* disable interrupts so current_thread_info()->flags can't change */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b10e01021214..729e9ef4d3bb 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -194,6 +194,14 @@ system_call: /* label this so stack traces look sane */ .Lsyscall_exit: std r3,RESULT(r1) + +#ifdef CONFIG_DEBUG_RSEQ + /* Check whether the syscall is issued inside a restartable sequence */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl rseq_syscall + ld r3,RESULT(r1) +#endif + CURRENT_THREAD_INFO(r12, r1) ld r8,_MSR(r1) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index fb932f1202c7..17fe4339ba59 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -134,6 +134,8 @@ static void do_signal(struct task_struct *tsk) /* Re-enable the breakpoints for the signal stack */ thread_change_pc(tsk, tsk->thread.regs); + rseq_signal_deliver(tsk->thread.regs); + if (is32) { if (ksig.ka.sa.sa_flags & SA_SIGINFO) ret = handle_rt_signal32(&ksig, oldset, tsk); @@ -168,6 +170,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(regs); } user_enter(); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 274bc064c41f..17f19e67993b 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -42,6 +42,7 @@ config RISCV select THREAD_INFO_IN_TASK select RISCV_TIMER select GENERIC_IRQ_MULTI_HANDLER + select ARCH_HAS_PTE_SPECIAL config MMU def_bool y diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 997ddbb1d370..2fa2942be221 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -42,7 +42,4 @@ _PAGE_WRITE | _PAGE_EXEC | \ _PAGE_USER | _PAGE_GLOBAL)) -/* Advertise support for _PAGE_SPECIAL */ -#define __HAVE_ARCH_PTE_SPECIAL - #endif /* _ASM_RISCV_PGTABLE_BITS_H */ diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 10ed2749e246..0bc86e5f8f3f 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -20,7 +20,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, struct device_node *node, enum cache_type type, unsigned int level) { - this_leaf->of_node = node; this_leaf->level = level; this_leaf->type = type; /* not a sector cache */ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b7deee7e738f..baed39772c84 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -65,6 +65,7 @@ config S390 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV + select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2d24d33bf188..9809694e1389 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -171,7 +171,6 @@ static inline int is_module_addr(void *addr) #define _PAGE_WRITE 0x020 /* SW pte write bit */ #define _PAGE_SPECIAL 0x040 /* SW associated with special page */ #define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */ -#define __HAVE_ARCH_PTE_SPECIAL #ifdef CONFIG_MEM_SOFT_DIRTY #define _PAGE_SOFT_DIRTY 0x002 /* SW pte soft dirty bit */ diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 562f72955956..84bd6329a88d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -190,14 +190,15 @@ unsigned long *page_table_alloc(struct mm_struct *mm) if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, struct page, lru); - mask = atomic_read(&page->_mapcount); + mask = atomic_read(&page->_refcount) >> 24; mask = (mask | (mask >> 4)) & 3; if (mask != 3) { table = (unsigned long *) page_to_phys(page); bit = mask & 1; /* =1 -> second 2K */ if (bit) table += PTRS_PER_PTE; - atomic_xor_bits(&page->_mapcount, 1U << bit); + atomic_xor_bits(&page->_refcount, + 1U << (bit + 24)); list_del(&page->lru); } } @@ -218,12 +219,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm) table = (unsigned long *) page_to_phys(page); if (mm_alloc_pgste(mm)) { /* Return 4K page table with PGSTEs */ - atomic_set(&page->_mapcount, 3); + atomic_xor_bits(&page->_refcount, 3 << 24); memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); } else { /* Return the first 2K fragment of the page */ - atomic_set(&page->_mapcount, 1); + atomic_xor_bits(&page->_refcount, 1 << 24); memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE); spin_lock_bh(&mm->context.lock); list_add(&page->lru, &mm->context.pgtable_list); @@ -242,7 +243,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) /* Free 2K page table fragment of a 4K page */ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); - mask = atomic_xor_bits(&page->_mapcount, 1U << bit); + mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); + mask >>= 24; if (mask & 3) list_add(&page->lru, &mm->context.pgtable_list); else @@ -253,7 +255,6 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) } pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); __free_page(page); } @@ -274,7 +275,8 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, } bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); - mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); + mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); + mask >>= 24; if (mask & 3) list_add_tail(&page->lru, &mm->context.pgtable_list); else @@ -296,12 +298,13 @@ static void __tlb_remove_table(void *_table) break; case 1: /* lower 2K of a 4K page table */ case 2: /* higher 2K of a 4K page table */ - if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) + mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24)); + mask >>= 24; + if (mask != 0) break; /* fallthrough */ case 3: /* 4K page table with pgstes */ pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); __free_page(page); break; } diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index ae619d54018c..4d61a085982b 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 config SUPERH def_bool y + select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_NO_COHERENT_DMA_MMAP if !MMU diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 89c513a982fc..f6abfe2bca93 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -156,8 +156,6 @@ extern void page_table_range_init(unsigned long start, unsigned long end, #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#define __HAVE_ARCH_PTE_SPECIAL - #include <asm-generic/pgtable.h> #endif /* __ASM_SH_PGTABLE_H */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index b42ba888217d..9a2b8877f174 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -88,6 +88,7 @@ config SPARC64 select ARCH_USE_QUEUED_SPINLOCKS select GENERIC_TIME_VSYSCALL select ARCH_CLOCKSOURCE_DATA + select ARCH_HAS_PTE_SPECIAL config ARCH_DEFCONFIG string diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 44d6ac47e035..1393a8ac596b 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -117,9 +117,6 @@ bool kern_addr_valid(unsigned long addr); #define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ #define _PAGE_PUD_HUGE _PAGE_PMD_HUGE -/* Advertise support for _PAGE_SPECIAL */ -#define __HAVE_ARCH_PTE_SPECIAL - /* SUN4U pte bits... */ #define _PAGE_SZ4MB_4U _AC(0x6000000000000000,UL) /* 4MB Page */ #define _PAGE_SZ512K_4U _AC(0x4000000000000000,UL) /* 512K Page */ diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index 15b59169c535..e5e5ff6b9a5c 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -60,50 +60,30 @@ void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info) pci_bus_add_devices(root_bus); } -void pcibios_fixup_bus(struct pci_bus *pbus) +int pcibios_enable_device(struct pci_dev *dev, int mask) { - struct pci_dev *dev; - int i, has_io, has_mem; - u16 cmd; + u16 cmd, oldcmd; + int i; - list_for_each_entry(dev, &pbus->devices, bus_list) { - /* - * We can not rely on that the bootloader has enabled I/O - * or memory access to PCI devices. Instead we enable it here - * if the device has BARs of respective type. - */ - has_io = has_mem = 0; - for (i = 0; i < PCI_ROM_RESOURCE; i++) { - unsigned long f = dev->resource[i].flags; - if (f & IORESOURCE_IO) - has_io = 1; - else if (f & IORESOURCE_MEM) - has_mem = 1; - } - /* ROM BARs are mapped into 32-bit memory space */ - if (dev->resource[PCI_ROM_RESOURCE].end != 0) { - dev->resource[PCI_ROM_RESOURCE].flags |= - IORESOURCE_ROM_ENABLE; - has_mem = 1; - } - pci_bus_read_config_word(pbus, dev->devfn, PCI_COMMAND, &cmd); - if (has_io && !(cmd & PCI_COMMAND_IO)) { -#ifdef CONFIG_PCI_DEBUG - printk(KERN_INFO "LEONPCI: Enabling I/O for dev %s\n", - pci_name(dev)); -#endif + pci_read_config_word(dev, PCI_COMMAND, &cmd); + oldcmd = cmd; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + /* Only set up the requested stuff */ + if (!(mask & (1<<i))) + continue; + + if (res->flags & IORESOURCE_IO) cmd |= PCI_COMMAND_IO; - pci_bus_write_config_word(pbus, dev->devfn, PCI_COMMAND, - cmd); - } - if (has_mem && !(cmd & PCI_COMMAND_MEMORY)) { -#ifdef CONFIG_PCI_DEBUG - printk(KERN_INFO "LEONPCI: Enabling MEMORY for dev" - "%s\n", pci_name(dev)); -#endif + if (res->flags & IORESOURCE_MEM) cmd |= PCI_COMMAND_MEMORY; - pci_bus_write_config_word(pbus, dev->devfn, PCI_COMMAND, - cmd); - } } + + if (cmd != oldcmd) { + pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; } diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 41b20edb427d..17ea16a1337c 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -214,8 +214,8 @@ static void pci_parse_of_addrs(struct platform_device *op, if (!addrs) return; if (ofpci_verbose) - printk(" parse addresses (%d bytes) @ %p\n", - proplen, addrs); + pci_info(dev, " parse addresses (%d bytes) @ %p\n", + proplen, addrs); op_res = &op->resource[0]; for (; proplen >= 20; proplen -= 20, addrs += 5, op_res++) { struct resource *res; @@ -227,8 +227,8 @@ static void pci_parse_of_addrs(struct platform_device *op, continue; i = addrs[0] & 0xff; if (ofpci_verbose) - printk(" start: %llx, end: %llx, i: %x\n", - op_res->start, op_res->end, i); + pci_info(dev, " start: %llx, end: %llx, i: %x\n", + op_res->start, op_res->end, i); if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) { res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2]; @@ -236,13 +236,15 @@ static void pci_parse_of_addrs(struct platform_device *op, res = &dev->resource[PCI_ROM_RESOURCE]; flags |= IORESOURCE_READONLY | IORESOURCE_SIZEALIGN; } else { - printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i); + pci_err(dev, "bad cfg reg num 0x%x\n", i); continue; } res->start = op_res->start; res->end = op_res->end; res->flags = flags; res->name = pci_name(dev); + + pci_info(dev, "reg 0x%x: %pR\n", i, res); } } @@ -289,8 +291,8 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, type = ""; if (ofpci_verbose) - printk(" create device, devfn: %x, type: %s\n", - devfn, type); + pci_info(bus," create device, devfn: %x, type: %s\n", + devfn, type); dev->sysdata = node; dev->dev.parent = bus->bridge; @@ -323,10 +325,6 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus), dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); - if (ofpci_verbose) - printk(" class: 0x%x device name: %s\n", - dev->class, pci_name(dev)); - /* I have seen IDE devices which will not respond to * the bmdma simplex check reads if bus mastering is * disabled. @@ -353,10 +351,13 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, dev->irq = PCI_IRQ_NONE; } + pci_info(dev, "[%04x:%04x] type %02x class %#08x\n", + dev->vendor, dev->device, dev->hdr_type, dev->class); + pci_parse_of_addrs(sd->op, node, dev); if (ofpci_verbose) - printk(" adding to system ...\n"); + pci_info(dev, " adding to system ...\n"); pci_device_add(dev, bus); @@ -430,19 +431,19 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm, u64 size; if (ofpci_verbose) - printk("of_scan_pci_bridge(%s)\n", node->full_name); + pci_info(dev, "of_scan_pci_bridge(%s)\n", node->full_name); /* parse bus-range property */ busrange = of_get_property(node, "bus-range", &len); if (busrange == NULL || len != 8) { - printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n", + pci_info(dev, "Can't get bus-range for PCI-PCI bridge %s\n", node->full_name); return; } if (ofpci_verbose) - printk(" Bridge bus range [%u --> %u]\n", - busrange[0], busrange[1]); + pci_info(dev, " Bridge bus range [%u --> %u]\n", + busrange[0], busrange[1]); ranges = of_get_property(node, "ranges", &len); simba = 0; @@ -454,8 +455,8 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm, bus = pci_add_new_bus(dev->bus, dev, busrange[0]); if (!bus) { - printk(KERN_ERR "Failed to create pci bus for %s\n", - node->full_name); + pci_err(dev, "Failed to create pci bus for %s\n", + node->full_name); return; } @@ -464,8 +465,8 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm, bus->bridge_ctl = 0; if (ofpci_verbose) - printk(" Bridge ranges[%p] simba[%d]\n", - ranges, simba); + pci_info(dev, " Bridge ranges[%p] simba[%d]\n", + ranges, simba); /* parse ranges property, or cook one up by hand for Simba */ /* PCI #address-cells == 3 and #size-cells == 2 always */ @@ -487,10 +488,10 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm, u64 start; if (ofpci_verbose) - printk(" RAW Range[%08x:%08x:%08x:%08x:%08x:%08x:" - "%08x:%08x]\n", - ranges[0], ranges[1], ranges[2], ranges[3], - ranges[4], ranges[5], ranges[6], ranges[7]); + pci_info(dev, " RAW Range[%08x:%08x:%08x:%08x:%08x:%08x:" + "%08x:%08x]\n", + ranges[0], ranges[1], ranges[2], ranges[3], + ranges[4], ranges[5], ranges[6], ranges[7]); flags = pci_parse_of_flags(ranges[0]); size = GET_64BIT(ranges, 6); @@ -510,14 +511,14 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm, if (flags & IORESOURCE_IO) { res = bus->resource[0]; if (res->flags) { - printk(KERN_ERR "PCI: ignoring extra I/O range" - " for bridge %s\n", node->full_name); + pci_err(dev, "ignoring extra I/O range" + " for bridge %s\n", node->full_name); continue; } } else { if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) { - printk(KERN_ERR "PCI: too many memory ranges" - " for bridge %s\n", node->full_name); + pci_err(dev, "too many memory ranges" + " for bridge %s\n", node->full_name); continue; } res = bus->resource[i]; @@ -529,8 +530,8 @@ static void of_scan_pci_bridge(struct pci_pbm_info *pbm, region.end = region.start + size - 1; if (ofpci_verbose) - printk(" Using flags[%08x] start[%016llx] size[%016llx]\n", - flags, start, size); + pci_info(dev, " Using flags[%08x] start[%016llx] size[%016llx]\n", + flags, start, size); pcibios_bus_to_resource(dev->bus, res, ®ion); } @@ -538,7 +539,7 @@ after_ranges: sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), bus->number); if (ofpci_verbose) - printk(" bus name: %s\n", bus->name); + pci_info(dev, " bus name: %s\n", bus->name); pci_of_scan_bus(pbm, node, bus); } @@ -553,14 +554,14 @@ static void pci_of_scan_bus(struct pci_pbm_info *pbm, struct pci_dev *dev; if (ofpci_verbose) - printk("PCI: scan_bus[%s] bus no %d\n", - node->full_name, bus->number); + pci_info(bus, "scan_bus[%s] bus no %d\n", + node->full_name, bus->number); child = NULL; prev_devfn = -1; while ((child = of_get_next_child(node, child)) != NULL) { if (ofpci_verbose) - printk(" * %s\n", child->full_name); + pci_info(bus, " * %s\n", child->full_name); reg = of_get_property(child, "reg", ®len); if (reg == NULL || reglen < 20) continue; @@ -581,8 +582,7 @@ static void pci_of_scan_bus(struct pci_pbm_info *pbm, if (!dev) continue; if (ofpci_verbose) - printk("PCI: dev header type: %x\n", - dev->hdr_type); + pci_info(dev, "dev header type: %x\n", dev->hdr_type); if (pci_is_bridge(dev)) of_scan_pci_bridge(pbm, child, dev); @@ -624,6 +624,45 @@ static void pci_bus_register_of_sysfs(struct pci_bus *bus) pci_bus_register_of_sysfs(child_bus); } +static void pci_claim_legacy_resources(struct pci_dev *dev) +{ + struct pci_bus_region region; + struct resource *p, *root, *conflict; + + if ((dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) + return; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return; + + p->name = "Video RAM area"; + p->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + region.start = 0xa0000UL; + region.end = region.start + 0x1ffffUL; + pcibios_bus_to_resource(dev->bus, p, ®ion); + + root = pci_find_parent_resource(dev, p); + if (!root) { + pci_info(dev, "can't claim VGA legacy %pR: no compatible bridge window\n", p); + goto err; + } + + conflict = request_resource_conflict(root, p); + if (conflict) { + pci_info(dev, "can't claim VGA legacy %pR: address conflict with %s %pR\n", + p, conflict->name, conflict); + goto err; + } + + pci_info(dev, "VGA legacy framebuffer %pR\n", p); + return; + +err: + kfree(p); +} + static void pci_claim_bus_resources(struct pci_bus *bus) { struct pci_bus *child_bus; @@ -639,15 +678,13 @@ static void pci_claim_bus_resources(struct pci_bus *bus) continue; if (ofpci_verbose) - printk("PCI: Claiming %s: " - "Resource %d: %016llx..%016llx [%x]\n", - pci_name(dev), i, - (unsigned long long)r->start, - (unsigned long long)r->end, - (unsigned int)r->flags); + pci_info(dev, "Claiming Resource %d: %pR\n", + i, r); pci_claim_resource(dev, i); } + + pci_claim_legacy_resources(dev); } list_for_each_entry(child_bus, &bus->children, node) @@ -687,6 +724,7 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm, pci_bus_register_of_sysfs(bus); pci_claim_bus_resources(bus); + pci_bus_add_devices(bus); return bus; } @@ -713,9 +751,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) } if (cmd != oldcmd) { - printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n", - pci_name(dev), cmd); - /* Enable the appropriate bits in the PCI command register. */ + pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd); pci_write_config_word(dev, PCI_COMMAND, cmd); } return 0; @@ -1075,8 +1111,8 @@ static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus) sp = prop->names; if (ofpci_verbose) - printk("PCI: Making slots for [%s] mask[0x%02x]\n", - node->full_name, mask); + pci_info(bus, "Making slots for [%s] mask[0x%02x]\n", + node->full_name, mask); i = 0; while (mask) { @@ -1089,12 +1125,12 @@ static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus) } if (ofpci_verbose) - printk("PCI: Making slot [%s]\n", sp); + pci_info(bus, "Making slot [%s]\n", sp); pci_slot = pci_create_slot(bus, i, sp, NULL); if (IS_ERR(pci_slot)) - printk(KERN_ERR "PCI: pci_create_slot returned %ld\n", - PTR_ERR(pci_slot)); + pci_err(bus, "pci_create_slot returned %ld\n", + PTR_ERR(pci_slot)); sp += strlen(sp) + 1; mask &= ~this_bit; diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c index 38d46bcc8634..4759ccd542fe 100644 --- a/arch/sparc/kernel/pci_common.c +++ b/arch/sparc/kernel/pci_common.c @@ -329,23 +329,6 @@ void pci_get_pbm_props(struct pci_pbm_info *pbm) } } -static void pci_register_legacy_regions(struct resource *io_res, - struct resource *mem_res) -{ - struct resource *p; - - /* VGA Video RAM. */ - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return; - - p->name = "Video RAM area"; - p->start = mem_res->start + 0xa0000UL; - p->end = p->start + 0x1ffffUL; - p->flags = IORESOURCE_BUSY; - request_resource(mem_res, p); -} - static void pci_register_iommu_region(struct pci_pbm_info *pbm) { const u32 *vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma", @@ -487,8 +470,6 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) if (pbm->mem64_space.flags) request_resource(&iomem_resource, &pbm->mem64_space); - pci_register_legacy_regions(&pbm->io_space, - &pbm->mem_space); pci_register_iommu_region(pbm); } @@ -508,8 +489,8 @@ void pci_scan_for_target_abort(struct pci_pbm_info *pbm, PCI_STATUS_REC_TARGET_ABORT)); if (error_bits) { pci_write_config_word(pdev, PCI_STATUS, error_bits); - printk("%s: Device %s saw Target Abort [%016x]\n", - pbm->name, pci_name(pdev), status); + pci_info(pdev, "%s: Device saw Target Abort [%016x]\n", + pbm->name, status); } } @@ -531,8 +512,8 @@ void pci_scan_for_master_abort(struct pci_pbm_info *pbm, (status & (PCI_STATUS_REC_MASTER_ABORT)); if (error_bits) { pci_write_config_word(pdev, PCI_STATUS, error_bits); - printk("%s: Device %s received Master Abort [%016x]\n", - pbm->name, pci_name(pdev), status); + pci_info(pdev, "%s: Device received Master Abort " + "[%016x]\n", pbm->name, status); } } @@ -555,8 +536,8 @@ void pci_scan_for_parity_error(struct pci_pbm_info *pbm, PCI_STATUS_DETECTED_PARITY)); if (error_bits) { pci_write_config_word(pdev, PCI_STATUS, error_bits); - printk("%s: Device %s saw Parity Error [%016x]\n", - pbm->name, pci_name(pdev), status); + pci_info(pdev, "%s: Device saw Parity Error [%016x]\n", + pbm->name, status); } } diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 1994d7638406..fb5899cbfa51 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -191,8 +191,8 @@ static void sparc64_teardown_msi_irq(unsigned int irq, break; } if (i >= pbm->msi_num) { - printk(KERN_ERR "%s: teardown: No MSI for irq %u\n", - pbm->name, irq); + pci_err(pdev, "%s: teardown: No MSI for irq %u\n", pbm->name, + irq); return; } @@ -201,9 +201,9 @@ static void sparc64_teardown_msi_irq(unsigned int irq, err = ops->msi_teardown(pbm, msi_num); if (err) { - printk(KERN_ERR "%s: teardown: ops->teardown() on MSI %u, " - "irq %u, gives error %d\n", - pbm->name, msi_num, irq, err); + pci_err(pdev, "%s: teardown: ops->teardown() on MSI %u, " + "irq %u, gives error %d\n", pbm->name, msi_num, irq, + err); return; } diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 22f8774977d5..ee4c9a9a171c 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -518,10 +518,10 @@ static void pcic_map_pci_device(struct linux_pcic *pcic, * board in a PCI slot. We must remap it * under 64K but it is not done yet. XXX */ - printk("PCIC: Skipping I/O space at 0x%lx, " - "this will Oops if a driver attaches " - "device '%s' at %02x:%02x)\n", address, - namebuf, dev->bus->number, dev->devfn); + pci_info(dev, "PCIC: Skipping I/O space at " + "0x%lx, this will Oops if a driver " + "attaches device '%s'\n", address, + namebuf); } } } @@ -551,8 +551,8 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) p++; } if (i >= pcic->pcic_imdim) { - printk("PCIC: device %s devfn %02x:%02x not found in %d\n", - namebuf, dev->bus->number, dev->devfn, pcic->pcic_imdim); + pci_info(dev, "PCIC: device %s not found in %d\n", namebuf, + pcic->pcic_imdim); dev->irq = 0; return; } @@ -565,7 +565,7 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI); real_irq = ivec >> ((i-4) << 2) & 0xF; } else { /* Corrupted map */ - printk("PCIC: BAD PIN %d\n", i); for (;;) {} + pci_info(dev, "PCIC: BAD PIN %d\n", i); for (;;) {} } /* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */ @@ -574,10 +574,10 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) */ if (real_irq == 0 || p->force) { if (p->irq == 0 || p->irq >= 15) { /* Corrupted map */ - printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {} + pci_info(dev, "PCIC: BAD IRQ %d\n", p->irq); for (;;) {} } - printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n", - p->irq, p->pin, dev->bus->number, dev->devfn); + pci_info(dev, "PCIC: setting irq %d at pin %d\n", p->irq, + p->pin); real_irq = p->irq; i = p->pin; @@ -602,15 +602,13 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node) void pcibios_fixup_bus(struct pci_bus *bus) { struct pci_dev *dev; - int i, has_io, has_mem; - unsigned int cmd = 0; struct linux_pcic *pcic; /* struct linux_pbm_info* pbm = &pcic->pbm; */ int node; struct pcidev_cookie *pcp; if (!pcic0_up) { - printk("pcibios_fixup_bus: no PCIC\n"); + pci_info(bus, "pcibios_fixup_bus: no PCIC\n"); return; } pcic = &pcic0; @@ -619,44 +617,12 @@ void pcibios_fixup_bus(struct pci_bus *bus) * Next crud is an equivalent of pbm = pcic_bus_to_pbm(bus); */ if (bus->number != 0) { - printk("pcibios_fixup_bus: nonzero bus 0x%x\n", bus->number); + pci_info(bus, "pcibios_fixup_bus: nonzero bus 0x%x\n", + bus->number); return; } list_for_each_entry(dev, &bus->devices, bus_list) { - - /* - * Comment from i386 branch: - * There are buggy BIOSes that forget to enable I/O and memory - * access to PCI devices. We try to fix this, but we need to - * be sure that the BIOS didn't forget to assign an address - * to the device. [mj] - * OBP is a case of such BIOS :-) - */ - has_io = has_mem = 0; - for(i=0; i<6; i++) { - unsigned long f = dev->resource[i].flags; - if (f & IORESOURCE_IO) { - has_io = 1; - } else if (f & IORESOURCE_MEM) - has_mem = 1; - } - pcic_read_config(dev->bus, dev->devfn, PCI_COMMAND, 2, &cmd); - if (has_io && !(cmd & PCI_COMMAND_IO)) { - printk("PCIC: Enabling I/O for device %02x:%02x\n", - dev->bus->number, dev->devfn); - cmd |= PCI_COMMAND_IO; - pcic_write_config(dev->bus, dev->devfn, - PCI_COMMAND, 2, cmd); - } - if (has_mem && !(cmd & PCI_COMMAND_MEMORY)) { - printk("PCIC: Enabling memory for device %02x:%02x\n", - dev->bus->number, dev->devfn); - cmd |= PCI_COMMAND_MEMORY; - pcic_write_config(dev->bus, dev->devfn, - PCI_COMMAND, 2, cmd); - } - node = pdev_to_pnode(&pcic->pbm, dev); if(node == 0) node = -1; @@ -675,6 +641,34 @@ void pcibios_fixup_bus(struct pci_bus *bus) } } +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, oldcmd; + int i; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + oldcmd = cmd; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + /* Only set up the requested stuff */ + if (!(mask & (1<<i))) + continue; + + if (res->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (res->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + + if (cmd != oldcmd) { + pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + /* Makes compiler happy */ static volatile int pcic_timer_dummy; @@ -747,17 +741,11 @@ static void watchdog_reset() { } #endif -int pcibios_enable_device(struct pci_dev *pdev, int mask) -{ - return 0; -} - /* * NMI */ void pcic_nmi(unsigned int pend, struct pt_regs *regs) { - pend = swab32(pend); if (!pcic_speculative || (pend & PCI_SYS_INT_PENDING_PIO) == 0) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cb6e3a219294..297789aef9fa 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_API if X86_64 + select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_UACCESS_MCSAFE if X86_64 @@ -182,6 +183,7 @@ config X86 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION select HAVE_STACK_VALIDATION if X86_64 + select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER @@ -333,6 +335,9 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y +config DYNAMIC_PHYSICAL_MASK + bool + config PGTABLE_LEVELS int default 5 if X86_5LEVEL @@ -1485,6 +1490,7 @@ config ARCH_HAS_MEM_ENCRYPT config AMD_MEM_ENCRYPT bool "AMD Secure Memory Encryption (SME) support" depends on X86_64 && CPU_SUP_AMD + select DYNAMIC_PHYSICAL_MASK ---help--- Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 192e4d2f9efc..c6dd1d980081 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -72,6 +72,9 @@ config EARLY_PRINTK_USB_XDBC You should normally say N here, unless you want to debug early crashes or need a very simple printk logging facility. +config MCSAFE_TEST + def_bool n + config X86_PTDUMP_CORE def_bool n diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c index 522d11431433..748456c365f4 100644 --- a/arch/x86/boot/compressed/kaslr_64.c +++ b/arch/x86/boot/compressed/kaslr_64.c @@ -69,6 +69,8 @@ static struct alloc_pgt_data pgt_data; /* The top level page table entry pointer. */ static unsigned long top_level_pgt; +phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; + /* * Mapping information structure passed to kernel_ident_mapping_init(). * Due to relocation, pointers must be assigned at run time not build time. @@ -81,6 +83,9 @@ void initialize_identity_maps(void) /* If running as an SEV guest, the encryption mask is required. */ set_sev_encryption_mask(); + /* Exclude the encryption mask from __PHYSICAL_MASK */ + physical_mask &= ~sme_me_mask; + /* Init mapping_info with run-time function/buffer pointers. */ mapping_info.alloc_pgt_page = alloc_pgt_page; mapping_info.context = &pgt_data; diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index fbf6a6c3fd2d..92190879b228 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -164,6 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(regs); } if (cached_flags & _TIF_USER_RETURN_NOTIFY) @@ -254,6 +255,8 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax)) local_irq_enable(); + rseq_syscall(regs); + /* * First do one-time work. If these work items are enabled, we * want to run them exactly once per syscall exit with IRQs on. diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 14a2f996e543..3cf7b533b3d1 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -397,3 +397,4 @@ 383 i386 statx sys_statx __ia32_sys_statx 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents +386 i386 rseq sys_rseq __ia32_sys_rseq diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index cd36232ab62f..f0b1709a5ffb 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -342,6 +342,7 @@ 331 common pkey_free __x64_sys_pkey_free 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents +334 common rseq __x64_sys_rseq # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 08acd954f00e..74a9e06b6cfd 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -436,6 +436,8 @@ static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {} #endif /* CONFIG_X86_LOCAL_APIC */ +extern void apic_ack_irq(struct irq_data *data); + static inline void ack_APIC_irq(void) { /* diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index fb00a2fca990..5701f5cecd31 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -282,7 +282,9 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h new file mode 100644 index 000000000000..eb59804b6201 --- /dev/null +++ b/arch/x86/include/asm/mcsafe_test.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MCSAFE_TEST_H_ +#define _MCSAFE_TEST_H_ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_MCSAFE_TEST +extern unsigned long mcsafe_test_src; +extern unsigned long mcsafe_test_dst; + +static inline void mcsafe_inject_src(void *addr) +{ + if (addr) + mcsafe_test_src = (unsigned long) addr; + else + mcsafe_test_src = ~0UL; +} + +static inline void mcsafe_inject_dst(void *addr) +{ + if (addr) + mcsafe_test_dst = (unsigned long) addr; + else + mcsafe_test_dst = ~0UL; +} +#else /* CONFIG_MCSAFE_TEST */ +static inline void mcsafe_inject_src(void *addr) +{ +} + +static inline void mcsafe_inject_dst(void *addr) +{ +} +#endif /* CONFIG_MCSAFE_TEST */ + +#else /* __ASSEMBLY__ */ +#include <asm/export.h> + +#ifdef CONFIG_MCSAFE_TEST +.macro MCSAFE_TEST_CTL + .pushsection .data + .align 8 + .globl mcsafe_test_src + mcsafe_test_src: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_src) + .globl mcsafe_test_dst + mcsafe_test_dst: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_test_dst) + .popsection +.endm + +.macro MCSAFE_TEST_SRC reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_src, %r9 + ja \target +.endm + +.macro MCSAFE_TEST_DST reg count target + leaq \count(\reg), %r9 + cmp mcsafe_test_dst, %r9 + ja \target +.endm +#else +.macro MCSAFE_TEST_CTL +.endm + +.macro MCSAFE_TEST_SRC reg count target +.endm + +.macro MCSAFE_TEST_DST reg count target +.endm +#endif /* CONFIG_MCSAFE_TEST */ +#endif /* __ASSEMBLY__ */ +#endif /* _MCSAFE_TEST_H_ */ diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 1e53560a84bb..c85e15010f48 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -17,7 +17,6 @@ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) -#define __PHYSICAL_MASK ((phys_addr_t)(__sme_clr((1ULL << __PHYSICAL_MASK_SHIFT) - 1))) #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) /* Cast *PAGE_MASK to a signed type so that it is sign-extended if @@ -55,6 +54,13 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK +extern phys_addr_t physical_mask; +#define __PHYSICAL_MASK physical_mask +#else +#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1)) +#endif + extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 3c5385f9a88f..0fdcd21dadbd 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -216,7 +216,7 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) } #endif -static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) +static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; @@ -230,7 +230,7 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) *p4dp = native_make_p4d(native_pgd_val(pgd)); } -static inline void native_p4d_clear(p4d_t *p4d) +static __always_inline void native_p4d_clear(p4d_t *p4d) { native_set_p4d(p4d, native_make_p4d(0)); } diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 1e5a40673953..99fff853c944 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -65,7 +65,6 @@ #define _PAGE_PKEY_BIT2 (_AT(pteval_t, 0)) #define _PAGE_PKEY_BIT3 (_AT(pteval_t, 0)) #endif -#define __HAVE_ARCH_PTE_SPECIAL #define _PAGE_PKEY_MASK (_PAGE_PKEY_BIT0 | \ _PAGE_PKEY_BIT1 | \ diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 22647a642e98..0af81b590a0c 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -236,7 +236,7 @@ TRACE_EVENT(vector_alloc, TP_PROTO(unsigned int irq, unsigned int vector, bool reserved, int ret), - TP_ARGS(irq, vector, ret, reserved), + TP_ARGS(irq, vector, reserved, ret), TP_STRUCT__entry( __field( unsigned int, irq ) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2d27236c16a3..b85a7c54c6a1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -301,5 +301,6 @@ extern struct x86_apic_ops x86_apic_ops; extern void x86_early_init_platform_quirks(void); extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); +extern bool x86_pnpbios_disabled(void); #endif diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7553819c74c3..3982f79d2377 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1851,7 +1851,7 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data) * intr-remapping table entry. Hence for the io-apic * EOI we use the pin number. */ - ack_APIC_irq(); + apic_ack_irq(irq_data); eoi_ioapic_pin(data->entry.vector, data); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index bb6f7a2148d7..35aaee4fc028 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -235,6 +235,15 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) return 0; + /* + * Careful here. @apicd might either have move_in_progress set or + * be enqueued for cleanup. Assigning a new vector would either + * leave a stale vector on some CPU around or in case of a pending + * cleanup corrupt the hlist. + */ + if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist)) + return -EBUSY; + vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); if (vector > 0) apic_update_vector(irqd, vector, cpu); @@ -579,8 +588,7 @@ error: static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) { - unsigned int cpu, vector, prev_cpu, prev_vector; - struct apic_chip_data *apicd; + struct apic_chip_data apicd; unsigned long flags; int irq; @@ -596,24 +604,26 @@ static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, return; } - apicd = irqd->chip_data; - if (!apicd) { + if (!irqd->chip_data) { seq_printf(m, "%*sVector: Not assigned\n", ind, ""); return; } raw_spin_lock_irqsave(&vector_lock, flags); - cpu = apicd->cpu; - vector = apicd->vector; - prev_cpu = apicd->prev_cpu; - prev_vector = apicd->prev_vector; + memcpy(&apicd, irqd->chip_data, sizeof(apicd)); raw_spin_unlock_irqrestore(&vector_lock, flags); - seq_printf(m, "%*sVector: %5u\n", ind, "", vector); - seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu); - if (prev_vector) { - seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector); - seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu); + + seq_printf(m, "%*sVector: %5u\n", ind, "", apicd.vector); + seq_printf(m, "%*sTarget: %5u\n", ind, "", apicd.cpu); + if (apicd.prev_vector) { + seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", apicd.prev_vector); + seq_printf(m, "%*sPrevious target: %5u\n", ind, "", apicd.prev_cpu); } + seq_printf(m, "%*smove_in_progress: %u\n", ind, "", apicd.move_in_progress ? 1 : 0); + seq_printf(m, "%*sis_managed: %u\n", ind, "", apicd.is_managed ? 1 : 0); + seq_printf(m, "%*scan_reserve: %u\n", ind, "", apicd.can_reserve ? 1 : 0); + seq_printf(m, "%*shas_reserved: %u\n", ind, "", apicd.has_reserved ? 1 : 0); + seq_printf(m, "%*scleanup_pending: %u\n", ind, "", !hlist_unhashed(&apicd.clist)); } #endif @@ -800,13 +810,18 @@ static int apic_retrigger_irq(struct irq_data *irqd) return 1; } -void apic_ack_edge(struct irq_data *irqd) +void apic_ack_irq(struct irq_data *irqd) { - irq_complete_move(irqd_cfg(irqd)); irq_move_irq(irqd); ack_APIC_irq(); } +void apic_ack_edge(struct irq_data *irqd) +{ + irq_complete_move(irqd_cfg(irqd)); + apic_ack_irq(irqd); +} + static struct irq_chip lapic_controller = { .name = "APIC", .irq_ack = apic_ack_edge, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7416fc206b4a..cd0fda1fff6d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -529,18 +529,15 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) if (mode == SPEC_STORE_BYPASS_DISABLE) { setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); /* - * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses - * a completely different MSR and bit dependent on family. + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may + * use a completely different MSR and bit dependent on family. */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: + if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + x86_amd_ssb_disable(); + else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - break; - case X86_VENDOR_AMD: - x86_amd_ssb_disable(); - break; } } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 95c8e507580d..910b47ee8078 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -803,6 +803,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_STIBP); set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } + + if (cpu_has(c, X86_FEATURE_AMD_SSBD)) { + set_cpu_cap(c, X86_FEATURE_SSBD); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD); + } } void get_cpu_cap(struct cpuinfo_x86 *c) @@ -992,7 +998,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO)) + !(ia32_cap & ARCH_CAP_SSB_NO) && + !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (x86_match_cpu(cpu_no_meltdown)) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 24bfa63e86cf..ec4754f81cbd 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -845,6 +845,8 @@ static __init void rdt_quirks(void) case INTEL_FAM6_SKYLAKE_X: if (boot_cpu_data.x86_stepping <= 4) set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); + else + set_rdt_options("!l3cat"); } } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 77e201301528..08286269fd24 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -70,7 +70,7 @@ static DEFINE_MUTEX(microcode_mutex); /* * Serialize late loading so that CPUs get updated one-by-one. */ -static DEFINE_SPINLOCK(update_lock); +static DEFINE_RAW_SPINLOCK(update_lock); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; @@ -560,9 +560,9 @@ static int __reload_late(void *info) if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) return -1; - spin_lock(&update_lock); + raw_spin_lock(&update_lock); apply_microcode_local(&err); - spin_unlock(&update_lock); + raw_spin_unlock(&update_lock); /* siblings return UCODE_OK because their engine got updated already */ if (err > UCODE_NFOUND) { diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 558444b23923..c610f47373e4 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -106,17 +106,9 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) memset(line, 0, LINE_SIZE); - length = len; - length--; - - if (length > LINE_SIZE - 1) - length = LINE_SIZE - 1; - + length = strncpy_from_user(line, buf, LINE_SIZE - 1); if (length < 0) - return -EINVAL; - - if (copy_from_user(line, buf, length)) - return -EFAULT; + return length; linelen = strlen(line); ptr = line + linelen - 1; @@ -149,17 +141,16 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EINVAL; ptr = skip_spaces(ptr + 5); - for (i = 0; i < MTRR_NUM_TYPES; ++i) { - if (strcmp(ptr, mtrr_strings[i])) - continue; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); - if (err < 0) - return err; - return len; - } - return -EINVAL; + i = match_string(mtrr_strings, MTRR_NUM_TYPES, ptr); + if (i < 0) + return i; + + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); + if (err < 0) + return err; + return len; } static long diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index 8eeaa81de066..0a3e70fd00d6 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -9,10 +9,12 @@ * your option) any later version. */ +#include <linux/dmi.h> #include <linux/init.h> #include <linux/syscore_ops.h> #include <asm/dma.h> +#include <asm/x86_init.h> /* * This module just handles suspend/resume issues with the @@ -49,6 +51,29 @@ static struct syscore_ops i8237_syscore_ops = { static int __init i8237A_init_ops(void) { + /* + * From SKL PCH onwards, the legacy DMA device is removed in which the + * I/O ports (81h-83h, 87h, 89h-8Bh, 8Fh) related to it are removed + * as well. All removed ports must return 0xff for a inb() request. + * + * Note: DMA_PAGE_2 (port 0x81) should not be checked for detecting + * the presence of DMA device since it may be used by BIOS to decode + * LPC traffic for POST codes. Original LPC only decodes one byte of + * port 0x80 but some BIOS may choose to enhance PCH LPC port 0x8x + * decoding. + */ + if (dma_inb(DMA_PAGE_0) == 0xFF) + return -ENODEV; + + /* + * It is not required to load this driver as newer SoC may not + * support 8237 DMA or bus mastering from LPC. Platform firmware + * must announce the support for such legacy devices via + * ACPI_FADT_LEGACY_DEVICES field in FADT table. + */ + if (x86_pnpbios_disabled() && dmi_get_bios_year() >= 2017) + return -ENODEV; + register_syscore_ops(&i8237_syscore_ops); return 0; } diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 2c3a1b4294eb..74383a3780dc 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -317,15 +317,12 @@ void __init idt_setup_apic_and_irq_gates(void) set_intr_gate(i, entry); } - for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { #ifdef CONFIG_X86_LOCAL_APIC + for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { set_bit(i, system_vectors); set_intr_gate(i, spurious_interrupt); -#else - entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); - set_intr_gate(i, entry); -#endif } +#endif } /** diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index 235fe6008ac8..b348a672f71d 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -33,9 +33,14 @@ void __init x86_early_init_platform_quirks(void) x86_platform.set_legacy_features(); } +bool __init x86_pnpbios_disabled(void) +{ + return x86_platform.legacy.devices.pnpbios == 0; +} + #if defined(CONFIG_PNPBIOS) bool __init arch_pnpbios_disabled(void) { - return x86_platform.legacy.devices.pnpbios == 0; + return x86_pnpbios_disabled(); } #endif diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index da270b95fe4d..445ca11ff863 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -688,6 +688,12 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) sigset_t *set = sigmask_to_save(); compat_sigset_t *cset = (compat_sigset_t *) set; + /* + * Increment event counter and perform fixup for the pre-signal + * frame. + */ + rseq_signal_deliver(regs); + /* Set up the stack frame */ if (is_ia32_frame(ksig)) { if (ksig->ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 92bf2f2e7cdd..f4f30d0c25c4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -379,7 +379,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); + F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | + F(AMD_SSB_NO); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -664,7 +665,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + /* + * The preference is to use SPEC CTRL MSR instead of the + * VIRT_SPEC MSR. + */ + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->ebx |= F(VIRT_SSBD); break; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 26110c202b19..950ec50f77c3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4115,7 +4115,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; msr_info->data = svm->spec_ctrl; @@ -4217,11 +4218,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) && + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; svm->spec_ctrl = data; diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index c3b527a9f95d..298ef1479240 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -3,6 +3,7 @@ #include <linux/linkage.h> #include <asm/errno.h> #include <asm/cpufeatures.h> +#include <asm/mcsafe_test.h> #include <asm/alternative-asm.h> #include <asm/export.h> @@ -183,6 +184,9 @@ ENTRY(memcpy_orig) ENDPROC(memcpy_orig) #ifndef CONFIG_UML + +MCSAFE_TEST_CTL + /* * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. @@ -206,6 +210,8 @@ ENTRY(__memcpy_mcsafe) subl %ecx, %edx .L_read_leading_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes + MCSAFE_TEST_DST %rdi 1 .E_leading_bytes .L_write_leading_bytes: movb %al, (%rdi) incq %rsi @@ -221,6 +227,8 @@ ENTRY(__memcpy_mcsafe) .L_read_words: movq (%rsi), %r8 + MCSAFE_TEST_SRC %rsi 8 .E_read_words + MCSAFE_TEST_DST %rdi 8 .E_write_words .L_write_words: movq %r8, (%rdi) addq $8, %rsi @@ -237,6 +245,8 @@ ENTRY(__memcpy_mcsafe) movl %edx, %ecx .L_read_trailing_bytes: movb (%rsi), %al + MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes + MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes .L_write_trailing_bytes: movb %al, (%rdi) incq %rsi diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 1b2197d13832..7ae36868aed2 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -527,6 +527,7 @@ void __init sme_enable(struct boot_params *bp) /* SEV state cannot be controlled by a command line option */ sme_me_mask = me_mask; sev_enabled = true; + physical_mask &= ~sme_me_mask; return; } @@ -561,4 +562,6 @@ void __init sme_enable(struct boot_params *bp) sme_me_mask = 0; else sme_me_mask = active_by_default ? me_mask : 0; + + physical_mask &= ~sme_me_mask; } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ffc8c13c50e4..47b5951e592b 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -8,6 +8,11 @@ #include <asm/fixmap.h> #include <asm/mtrr.h> +#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK +phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; +EXPORT_SYMBOL(physical_mask); +#endif + #define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) #ifdef CONFIG_HIGHPTE @@ -114,13 +119,12 @@ static inline void pgd_list_del(pgd_t *pgd) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) { - BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); - virt_to_page(pgd)->index = (pgoff_t)mm; + virt_to_page(pgd)->pt_mm = mm; } struct mm_struct *pgd_page_get_mm(struct page *page) { - return (struct mm_struct *)page->index; + return page->pt_mm; } static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index f0114007e915..e5f753cbb1c3 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -59,24 +59,15 @@ int early_pci_allowed(void) void early_dump_pci_device(u8 bus, u8 slot, u8 func) { + u32 value[256 / 4]; int i; - int j; - u32 val; - printk(KERN_INFO "pci 0000:%02x:%02x.%d config space:", - bus, slot, func); + pr_info("pci 0000:%02x:%02x.%d config space:\n", bus, slot, func); - for (i = 0; i < 256; i += 4) { - if (!(i & 0x0f)) - printk("\n %02x:",i); + for (i = 0; i < 256; i += 4) + value[i / 4] = read_pci_config(bus, slot, func, i); - val = read_pci_config(bus, slot, func, i); - for (j = 0; j < 4; j++) { - printk(" %02x", val & 0xff); - val >>= 8; - } - } - printk("\n"); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, value, 256, false); } void early_dump_pci_devices(void) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 54ef19e90705..13f4485ca388 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -636,6 +636,10 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334a, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334b, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334c, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334d, quirk_no_aersid); #ifdef CONFIG_PHYS_ADDR_T_64BIT diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index b36caae0fb2f..b96d38288c60 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -615,7 +615,7 @@ static int uv2_3_wait_completion(struct bau_desc *bau_desc, /* spin on the status MMR, waiting for it to go idle */ while (descriptor_stat != UV2H_DESC_IDLE) { - if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { + if (descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) { /* * A h/w bug on the destination side may * have prevented the message being marked diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index e4cb9f4cde8a..fc13cbbb2dce 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -47,11 +47,6 @@ static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info) static void uv_noop(struct irq_data *data) { } -static void uv_ack_apic(struct irq_data *data) -{ - ack_APIC_irq(); -} - static int uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -73,7 +68,7 @@ static struct irq_chip uv_irq_chip = { .name = "UV-CORE", .irq_mask = uv_noop, .irq_unmask = uv_noop, - .irq_eoi = uv_ack_apic, + .irq_eoi = apic_ack_irq, .irq_set_affinity = uv_set_irq_affinity, }; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2d76106788a3..96fc2f0fdbfe 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -63,37 +63,44 @@ static noinline void xen_flush_tlb_all(void) #define REMAP_BATCH_SIZE 16 struct remap_data { - xen_pfn_t *mfn; + xen_pfn_t *pfn; bool contiguous; + bool no_translate; pgprot_t prot; struct mmu_update *mmu_update; }; -static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, +static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot)); + pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); - /* If we have a contiguous range, just update the mfn itself, - else update pointer to be "next mfn". */ + /* + * If we have a contiguous range, just update the pfn itself, + * else update pointer to be "next pfn". + */ if (rmd->contiguous) - (*rmd->mfn)++; + (*rmd->pfn)++; else - rmd->mfn++; + rmd->pfn++; - rmd->mmu_update->ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE; + rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; + rmd->mmu_update->ptr |= rmd->no_translate ? + MMU_PT_UPDATE_NO_TRANSLATE : + MMU_NORMAL_PT_UPDATE; rmd->mmu_update->val = pte_val_ma(pte); rmd->mmu_update++; return 0; } -static int do_remap_gfn(struct vm_area_struct *vma, +static int do_remap_pfn(struct vm_area_struct *vma, unsigned long addr, - xen_pfn_t *gfn, int nr, + xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot, - unsigned domid, + unsigned int domid, + bool no_translate, struct page **pages) { int err = 0; @@ -104,11 +111,14 @@ static int do_remap_gfn(struct vm_area_struct *vma, BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); - rmd.mfn = gfn; + rmd.pfn = pfn; rmd.prot = prot; - /* We use the err_ptr to indicate if there we are doing a contiguous - * mapping or a discontigious mapping. */ + /* + * We use the err_ptr to indicate if there we are doing a contiguous + * mapping or a discontigious mapping. + */ rmd.contiguous = !err_ptr; + rmd.no_translate = no_translate; while (nr) { int index = 0; @@ -119,7 +129,7 @@ static int do_remap_gfn(struct vm_area_struct *vma, rmd.mmu_update = mmu_update; err = apply_to_page_range(vma->vm_mm, addr, range, - remap_area_mfn_pte_fn, &rmd); + remap_area_pfn_pte_fn, &rmd); if (err) goto out; @@ -173,7 +183,8 @@ int xen_remap_domain_gfn_range(struct vm_area_struct *vma, if (xen_feature(XENFEAT_auto_translated_physmap)) return -EOPNOTSUPP; - return do_remap_gfn(vma, addr, &gfn, nr, NULL, prot, domid, pages); + return do_remap_pfn(vma, addr, &gfn, nr, NULL, prot, domid, false, + pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range); @@ -192,10 +203,25 @@ int xen_remap_domain_gfn_array(struct vm_area_struct *vma, * cause of "wrong memory was mapped in". */ BUG_ON(err_ptr == NULL); - return do_remap_gfn(vma, addr, gfn, nr, err_ptr, prot, domid, pages); + return do_remap_pfn(vma, addr, gfn, nr, err_ptr, prot, domid, + false, pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array); +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned int domid, struct page **pages) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -EOPNOTSUPP; + + return do_remap_pfn(vma, addr, mfn, nr, err_ptr, prot, domid, + true, pages); +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); + /* Returns: 0 success */ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, int nr, struct page **pages) diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S index e1a5fbeae08d..ca2d3b2bf2af 100644 --- a/arch/x86/xen/xen-pvh.S +++ b/arch/x86/xen/xen-pvh.S @@ -54,12 +54,19 @@ * charge of setting up it's own stack, GDT and IDT. */ +#define PVH_GDT_ENTRY_CS 1 +#define PVH_GDT_ENTRY_DS 2 +#define PVH_GDT_ENTRY_CANARY 3 +#define PVH_CS_SEL (PVH_GDT_ENTRY_CS * 8) +#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8) +#define PVH_CANARY_SEL (PVH_GDT_ENTRY_CANARY * 8) + ENTRY(pvh_start_xen) cld lgdt (_pa(gdt)) - mov $(__BOOT_DS),%eax + mov $PVH_DS_SEL,%eax mov %eax,%ds mov %eax,%es mov %eax,%ss @@ -93,11 +100,17 @@ ENTRY(pvh_start_xen) mov %eax, %cr0 /* Jump to 64-bit mode. */ - ljmp $__KERNEL_CS, $_pa(1f) + ljmp $PVH_CS_SEL, $_pa(1f) /* 64-bit entry point. */ .code64 1: + /* Set base address in stack canary descriptor. */ + mov $MSR_GS_BASE,%ecx + mov $_pa(canary), %eax + xor %edx, %edx + wrmsr + call xen_prepare_pvh /* startup_64 expects boot_params in %rsi. */ @@ -107,6 +120,17 @@ ENTRY(pvh_start_xen) #else /* CONFIG_X86_64 */ + /* Set base address in stack canary descriptor. */ + movl $_pa(gdt_start),%eax + movl $_pa(canary),%ecx + movw %cx, (PVH_GDT_ENTRY_CANARY * 8) + 2(%eax) + shrl $16, %ecx + movb %cl, (PVH_GDT_ENTRY_CANARY * 8) + 4(%eax) + movb %ch, (PVH_GDT_ENTRY_CANARY * 8) + 7(%eax) + + mov $PVH_CANARY_SEL,%eax + mov %eax,%gs + call mk_early_pgtbl_32 mov $_pa(initial_page_table), %eax @@ -116,13 +140,13 @@ ENTRY(pvh_start_xen) or $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 - ljmp $__BOOT_CS, $1f + ljmp $PVH_CS_SEL, $1f 1: call xen_prepare_pvh mov $_pa(pvh_bootparams), %esi /* startup_32 doesn't expect paging and PAE to be on. */ - ljmp $__BOOT_CS, $_pa(2f) + ljmp $PVH_CS_SEL, $_pa(2f) 2: mov %cr0, %eax and $~X86_CR0_PG, %eax @@ -131,7 +155,7 @@ ENTRY(pvh_start_xen) and $~X86_CR4_PAE, %eax mov %eax, %cr4 - ljmp $__BOOT_CS, $_pa(startup_32) + ljmp $PVH_CS_SEL, $_pa(startup_32) #endif END(pvh_start_xen) @@ -143,16 +167,19 @@ gdt: .word 0 gdt_start: .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* reserved */ #ifdef CONFIG_X86_64 - .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* __KERNEL_CS */ + .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* PVH_CS_SEL */ #else - .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */ + .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* PVH_CS_SEL */ #endif - .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */ + .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */ + .quad GDT_ENTRY(0x4090, 0, 0x18) /* PVH_CANARY_SEL */ gdt_end: - .balign 4 + .balign 16 +canary: + .fill 48, 1, 0 + early_stack: .fill 256, 1, 0 early_stack_end: diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h index 6ddf0a30c60d..883024054b05 100644 --- a/arch/xtensa/include/asm/pci.h +++ b/arch/xtensa/include/asm/pci.h @@ -20,8 +20,6 @@ #define pcibios_assign_all_busses() 0 -extern struct pci_controller* pcibios_alloc_controller(void); - /* Assume some values. (We should revise them, if necessary) */ #define PCIBIOS_MIN_IO 0x2000 diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index b7c7a60c7000..21f13e9aabe1 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -41,8 +41,8 @@ * pci_bus_add_device */ -struct pci_controller* pci_ctrl_head; -struct pci_controller** pci_ctrl_tail = &pci_ctrl_head; +static struct pci_controller *pci_ctrl_head; +static struct pci_controller **pci_ctrl_tail = &pci_ctrl_head; static int pci_bus_count; @@ -80,50 +80,6 @@ pcibios_align_resource(void *data, const struct resource *res, return start; } -int -pcibios_enable_resources(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for(idx=0; idx<6; idx++) { - r = &dev->resource[idx]; - if (!r->start && r->end) { - pr_err("PCI: Device %s not available because " - "of resource collisions\n", pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (dev->resource[PCI_ROM_RESOURCE].start) - cmd |= PCI_COMMAND_MEMORY; - if (cmd != old_cmd) { - pr_info("PCI: Enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -struct pci_controller * __init pcibios_alloc_controller(void) -{ - struct pci_controller *pci_ctrl; - - pci_ctrl = (struct pci_controller *)alloc_bootmem(sizeof(*pci_ctrl)); - memset(pci_ctrl, 0, sizeof(struct pci_controller)); - - *pci_ctrl_tail = pci_ctrl; - pci_ctrl_tail = &pci_ctrl->next; - - return pci_ctrl; -} - static void __init pci_controller_apertures(struct pci_controller *pci_ctrl, struct list_head *resources) { @@ -223,8 +179,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) for (idx=0; idx<6; idx++) { r = &dev->resource[idx]; if (!r->start && r->end) { - pr_err("PCI: Device %s not available because " - "of resource collisions\n", pci_name(dev)); + pci_err(dev, "can't enable device: resource collisions\n"); return -EINVAL; } if (r->flags & IORESOURCE_IO) @@ -233,29 +188,13 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) cmd |= PCI_COMMAND_MEMORY; } if (cmd != old_cmd) { - pr_info("PCI: Enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); + pci_info(dev, "enabling device (%04x -> %04x)\n", old_cmd, cmd); pci_write_config_word(dev, PCI_COMMAND, cmd); } return 0; } -#ifdef CONFIG_PROC_FS - -/* - * Return the index of the PCI controller for device pdev. - */ - -int -pci_controller_num(struct pci_dev *dev) -{ - struct pci_controller *pci_ctrl = (struct pci_controller*) dev->sysdata; - return pci_ctrl->index; -} - -#endif /* CONFIG_PROC_FS */ - /* * Platform support for /proc/bus/pci/X/Y mmap()s. * -- paulus. |