summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/dts/Makefile1
-rw-r--r--arch/arm/boot/dts/am335x-wega.dtsi2
-rw-r--r--arch/arm/boot/dts/dra7.dtsi20
-rw-r--r--arch/arm/boot/dts/imx23-evk.dts1
-rw-r--r--arch/arm/boot/dts/imx6qdl-udoo.dtsi5
-rw-r--r--arch/arm/boot/dts/imx7ulp.dtsi2
-rw-r--r--arch/arm/boot/dts/meson.dtsi8
-rw-r--r--arch/arm/boot/dts/meson8.dtsi24
-rw-r--r--arch/arm/boot/dts/meson8b.dtsi24
-rw-r--r--arch/arm/boot/dts/omap3-beagle-ab4.dts47
-rw-r--r--arch/arm/boot/dts/omap3-beagle.dts33
-rw-r--r--arch/arm/boot/dts/spear320-hmi.dts1
-rw-r--r--arch/arm/boot/dts/ste-ux500-samsung-skomer.dts4
-rw-r--r--arch/arm/crypto/blake2s-shash.c4
-rw-r--r--arch/arm/include/asm/assembler.h2
-rw-r--r--arch/arm/include/asm/processor.h1
-rw-r--r--arch/arm/include/asm/uaccess.h10
-rw-r--r--arch/arm/mach-omap2/display.c2
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c4
-rw-r--r--arch/arm/mach-socfpga/Kconfig2
-rw-r--r--arch/arm/probes/kprobes/Makefile3
-rw-r--r--arch/arm64/Kconfig98
-rw-r--r--arch/arm64/Kconfig.platforms3
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts8
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gx.dtsi6
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts8
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq.dtsi10
-rw-r--r--arch/arm64/boot/dts/freescale/mba8mx.dtsi2
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts14
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721s2.dtsi22
-rw-r--r--arch/arm64/include/asm/cputype.h4
-rw-r--r--arch/arm64/include/asm/el2_setup.h2
-rw-r--r--arch/arm64/kernel/cpu_errata.c37
-rw-r--r--arch/arm64/kernel/cpufeature.c3
-rw-r--r--arch/arm64/kernel/stacktrace.c5
-rw-r--r--arch/arm64/kernel/vdso/Makefile5
-rw-r--r--arch/arm64/kvm/arm.c51
-rw-r--r--arch/arm64/kvm/handle_exit.c8
-rw-r--r--arch/arm64/kvm/hyp/exception.c5
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h23
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c18
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c3
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c17
-rw-r--r--arch/arm64/mm/extable.c4
-rw-r--r--arch/arm64/tools/cpucaps4
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/ia64/pci/fixup.c4
-rw-r--r--arch/mips/boot/dts/ingenic/ci20.dts15
-rw-r--r--arch/mips/cavium-octeon/octeon-memcpy.S2
-rw-r--r--arch/mips/include/asm/asm.h4
-rw-r--r--arch/mips/include/asm/ftrace.h4
-rw-r--r--arch/mips/include/asm/r4kcache.h4
-rw-r--r--arch/mips/include/asm/unaligned-emul.h176
-rw-r--r--arch/mips/kernel/mips-r2-to-r6-emul.c104
-rw-r--r--arch/mips/kernel/r2300_fpu.S6
-rw-r--r--arch/mips/kernel/r4k_fpu.S2
-rw-r--r--arch/mips/kernel/relocate_kernel.S22
-rw-r--r--arch/mips/kernel/scall32-o32.S10
-rw-r--r--arch/mips/kernel/scall64-n32.S2
-rw-r--r--arch/mips/kernel/scall64-n64.S2
-rw-r--r--arch/mips/kernel/scall64-o32.S10
-rw-r--r--arch/mips/kernel/syscall.c8
-rw-r--r--arch/mips/kvm/mips.c50
-rw-r--r--arch/mips/kvm/vz.c12
-rw-r--r--arch/mips/lib/csum_partial.S4
-rw-r--r--arch/mips/lib/memcpy.S4
-rw-r--r--arch/mips/lib/memset.S2
-rw-r--r--arch/mips/lib/strncpy_user.S4
-rw-r--r--arch/mips/lib/strnlen_user.S2
-rw-r--r--arch/mips/loongson64/vbios_quirk.c9
-rw-r--r--arch/parisc/include/asm/bitops.h8
-rw-r--r--arch/parisc/include/asm/uaccess.h29
-rw-r--r--arch/parisc/kernel/unaligned.c14
-rw-r--r--arch/parisc/lib/iomap.c18
-rw-r--r--arch/parisc/mm/init.c9
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h2
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/fixmap.h6
-rw-r--r--arch/powerpc/include/asm/hw_irq.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h1
-rw-r--r--arch/powerpc/include/asm/syscall.h4
-rw-r--r--arch/powerpc/include/asm/thread_info.h2
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S4
-rw-r--r--arch/powerpc/kernel/interrupt_64.S2
-rw-r--r--arch/powerpc/kernel/time.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c2
-rw-r--r--arch/powerpc/lib/sstep.c2
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c10
-rw-r--r--arch/powerpc/mm/kasan/book3s_32.c59
-rw-r--r--arch/powerpc/mm/pgtable.c9
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c29
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c9
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c29
-rw-r--r--arch/powerpc/perf/core-book3s.c75
-rw-r--r--arch/riscv/kvm/vcpu.c48
-rw-r--r--arch/riscv/kvm/vcpu_sbi_base.c3
-rw-r--r--arch/s390/Kconfig15
-rw-r--r--arch/s390/configs/debug_defconfig20
-rw-r--r--arch/s390/configs/defconfig16
-rw-r--r--arch/s390/configs/zfcpdump_defconfig3
-rw-r--r--arch/s390/hypfs/hypfs_vm.c6
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/kernel/module.c37
-rw-r--r--arch/s390/kernel/nmi.c27
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/s390/lib/Makefile3
-rw-r--r--arch/s390/lib/test_modules.c32
-rw-r--r--arch/s390/lib/test_modules.h53
-rw-r--r--arch/s390/lib/test_modules_helpers.c13
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/crypto/blake2s-shash.c4
-rw-r--r--arch/x86/events/intel/core.c28
-rw-r--r--arch/x86/events/intel/lbr.c168
-rw-r--r--arch/x86/events/intel/pt.c5
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/intel/uncore.h3
-rw-r--r--arch/x86/events/intel/uncore_discovery.c4
-rw-r--r--arch/x86/events/intel/uncore_discovery.h2
-rw-r--r--arch/x86/events/intel/uncore_snb.c214
-rw-r--r--arch/x86/events/intel/uncore_snbep.c2
-rw-r--r--arch/x86/events/perf_event.h10
-rw-r--r--arch/x86/events/rapl.c9
-rw-r--r--arch/x86/include/asm/bug.h20
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h9
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/svm.h36
-rw-r--r--arch/x86/include/asm/xen/cpuid.h7
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h14
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c1
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c10
-rw-r--r--arch/x86/kernel/fpu/regset.c9
-rw-r--r--arch/x86/kernel/fpu/xstate.c5
-rw-r--r--arch/x86/kernel/kvm.c9
-rw-r--r--arch/x86/kernel/ptrace.c4
-rw-r--r--arch/x86/kernel/resource.c23
-rw-r--r--arch/x86/kvm/cpuid.c104
-rw-r--r--arch/x86/kvm/lapic.c19
-rw-r--r--arch/x86/kvm/mmu/mmu.c13
-rw-r--r--arch/x86/kvm/pmu.c7
-rw-r--r--arch/x86/kvm/svm/avic.c93
-rw-r--r--arch/x86/kvm/svm/nested.c35
-rw-r--r--arch/x86/kvm/svm/sev.c9
-rw-r--r--arch/x86/kvm/svm/svm.c288
-rw-r--r--arch/x86/kvm/svm/svm.h22
-rw-r--r--arch/x86/kvm/svm/svm_onhyperv.h12
-rw-r--r--arch/x86/kvm/vmx/capabilities.h1
-rw-r--r--arch/x86/kvm/vmx/evmcs.c4
-rw-r--r--arch/x86/kvm/vmx/evmcs.h48
-rw-r--r--arch/x86/kvm/vmx/nested.c82
-rw-r--r--arch/x86/kvm/vmx/vmcs12.c4
-rw-r--r--arch/x86/kvm/vmx/vmcs12.h6
-rw-r--r--arch/x86/kvm/vmx/vmx.c69
-rw-r--r--arch/x86/kvm/x86.c136
-rw-r--r--arch/x86/kvm/x86.h45
-rw-r--r--arch/x86/kvm/xen.c107
-rw-r--r--arch/x86/pci/fixup.c4
-rw-r--r--arch/x86/xen/enlighten_hvm.c22
-rw-r--r--arch/x86/xen/enlighten_pv.c4
-rw-r--r--arch/x86/xen/smp_pv.c26
-rw-r--r--arch/x86/xen/vga.c16
178 files changed, 2277 insertions, 1179 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fabe39169b12..4c97cb40eebb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -83,6 +83,7 @@ config ARM
select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
select HAVE_CONTEXT_TRACKING
select HAVE_C_RECORDMCOUNT
+ select HAVE_BUILDTIME_MCOUNT_SORT
select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 235ad559acb2..e41eca79c950 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -806,6 +806,7 @@ dtb-$(CONFIG_ARCH_OMAP3) += \
logicpd-som-lv-37xx-devkit.dtb \
omap3430-sdp.dtb \
omap3-beagle.dtb \
+ omap3-beagle-ab4.dtb \
omap3-beagle-xm.dtb \
omap3-beagle-xm-ab.dtb \
omap3-cm-t3517.dtb \
diff --git a/arch/arm/boot/dts/am335x-wega.dtsi b/arch/arm/boot/dts/am335x-wega.dtsi
index 673159d93a6a..f957fea8208e 100644
--- a/arch/arm/boot/dts/am335x-wega.dtsi
+++ b/arch/arm/boot/dts/am335x-wega.dtsi
@@ -55,7 +55,7 @@
2 1 0 0 /* # 0: INACTIVE, 1: TX, 2: RX */
>;
tx-num-evt = <16>;
- rt-num-evt = <16>;
+ rx-num-evt = <16>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 6b485cbed8d5..42bff117656c 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -160,7 +160,7 @@
target-module@48210000 {
compatible = "ti,sysc-omap4-simple", "ti,sysc";
power-domains = <&prm_mpu>;
- clocks = <&mpu_clkctrl DRA7_MPU_CLKCTRL 0>;
+ clocks = <&mpu_clkctrl DRA7_MPU_MPU_CLKCTRL 0>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
@@ -875,10 +875,10 @@
<0x58000014 4>;
reg-names = "rev", "syss";
ti,syss-mask = <1>;
- clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 0>,
- <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 9>,
- <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 10>,
- <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 11>;
+ clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 0>,
+ <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 9>,
+ <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 10>,
+ <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 11>;
clock-names = "fck", "hdmi_clk", "sys_clk", "tv_clk";
#address-cells = <1>;
#size-cells = <1>;
@@ -912,7 +912,7 @@
SYSC_OMAP2_SOFTRESET |
SYSC_OMAP2_AUTOIDLE)>;
ti,syss-mask = <1>;
- clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 8>;
+ clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 8>;
clock-names = "fck";
#address-cells = <1>;
#size-cells = <1>;
@@ -939,8 +939,8 @@
<SYSC_IDLE_SMART>,
<SYSC_IDLE_SMART_WKUP>;
ti,sysc-mask = <(SYSC_OMAP4_SOFTRESET)>;
- clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 9>,
- <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 8>;
+ clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 9>,
+ <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 8>;
clock-names = "fck", "dss_clk";
#address-cells = <1>;
#size-cells = <1>;
@@ -979,7 +979,7 @@
compatible = "vivante,gc";
reg = <0x0 0x700>;
interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&dss_clkctrl DRA7_BB2D_CLKCTRL 0>;
+ clocks = <&dss_clkctrl DRA7_DSS_BB2D_CLKCTRL 0>;
clock-names = "core";
};
};
@@ -1333,7 +1333,7 @@
ti,no-reset-on-init;
ti,no-idle;
timer@0 {
- assigned-clocks = <&wkupaon_clkctrl DRA7_TIMER1_CLKCTRL 24>;
+ assigned-clocks = <&wkupaon_clkctrl DRA7_WKUPAON_TIMER1_CLKCTRL 24>;
assigned-clock-parents = <&sys_32k_ck>;
};
};
diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts
index 8cbaf1c81174..3b609d987d88 100644
--- a/arch/arm/boot/dts/imx23-evk.dts
+++ b/arch/arm/boot/dts/imx23-evk.dts
@@ -79,7 +79,6 @@
MX23_PAD_LCD_RESET__GPIO_1_18
MX23_PAD_PWM3__GPIO_1_29
MX23_PAD_PWM4__GPIO_1_30
- MX23_PAD_SSP1_DETECT__SSP1_DETECT
>;
fsl,drive-strength = <MXS_DRIVE_4mA>;
fsl,voltage = <MXS_VOLTAGE_HIGH>;
diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
index d07d8f83456d..ccfa8e320be6 100644
--- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
@@ -5,6 +5,8 @@
* Author: Fabio Estevam <fabio.estevam@freescale.com>
*/
+#include <dt-bindings/gpio/gpio.h>
+
/ {
aliases {
backlight = &backlight;
@@ -226,6 +228,7 @@
MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059
MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059
MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059
+ MX6QDL_PAD_SD3_DAT5__GPIO7_IO00 0x1b0b0
>;
};
@@ -304,7 +307,7 @@
&usdhc3 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usdhc3>;
- non-removable;
+ cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>;
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi
index b7ea37ad4e55..bcec98b96411 100644
--- a/arch/arm/boot/dts/imx7ulp.dtsi
+++ b/arch/arm/boot/dts/imx7ulp.dtsi
@@ -259,7 +259,7 @@
interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>;
- assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>;
+ assigned-clock-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>;
timeout-sec = <40>;
};
diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi
index 3be7cba603d5..26eaba3fa96f 100644
--- a/arch/arm/boot/dts/meson.dtsi
+++ b/arch/arm/boot/dts/meson.dtsi
@@ -59,7 +59,7 @@
};
uart_A: serial@84c0 {
- compatible = "amlogic,meson6-uart", "amlogic,meson-uart";
+ compatible = "amlogic,meson6-uart";
reg = <0x84c0 0x18>;
interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>;
fifo-size = <128>;
@@ -67,7 +67,7 @@
};
uart_B: serial@84dc {
- compatible = "amlogic,meson6-uart", "amlogic,meson-uart";
+ compatible = "amlogic,meson6-uart";
reg = <0x84dc 0x18>;
interrupts = <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>;
status = "disabled";
@@ -105,7 +105,7 @@
};
uart_C: serial@8700 {
- compatible = "amlogic,meson6-uart", "amlogic,meson-uart";
+ compatible = "amlogic,meson6-uart";
reg = <0x8700 0x18>;
interrupts = <GIC_SPI 93 IRQ_TYPE_EDGE_RISING>;
status = "disabled";
@@ -228,7 +228,7 @@
};
uart_AO: serial@4c0 {
- compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart", "amlogic,meson-uart";
+ compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart";
reg = <0x4c0 0x18>;
interrupts = <GIC_SPI 90 IRQ_TYPE_EDGE_RISING>;
status = "disabled";
diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi
index f80ddc98d3a2..9997a5d0333a 100644
--- a/arch/arm/boot/dts/meson8.dtsi
+++ b/arch/arm/boot/dts/meson8.dtsi
@@ -736,27 +736,27 @@
};
&uart_AO {
- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8-uart", "amlogic,meson-ao-uart";
+ clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_A {
- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_B {
- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_C {
- compatible = "amlogic,meson8-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&usb0 {
diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi
index b49b7cbaed4e..94f1c03decce 100644
--- a/arch/arm/boot/dts/meson8b.dtsi
+++ b/arch/arm/boot/dts/meson8b.dtsi
@@ -724,27 +724,27 @@
};
&uart_AO {
- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8b-uart", "amlogic,meson-ao-uart";
+ clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_A {
- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8b-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_B {
- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8b-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&uart_C {
- compatible = "amlogic,meson8b-uart", "amlogic,meson-uart";
- clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>;
- clock-names = "baud", "xtal", "pclk";
+ compatible = "amlogic,meson8b-uart";
+ clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>;
+ clock-names = "xtal", "pclk", "baud";
};
&usb0 {
diff --git a/arch/arm/boot/dts/omap3-beagle-ab4.dts b/arch/arm/boot/dts/omap3-beagle-ab4.dts
new file mode 100644
index 000000000000..990ff2d84686
--- /dev/null
+++ b/arch/arm/boot/dts/omap3-beagle-ab4.dts
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/dts-v1/;
+
+#include "omap3-beagle.dts"
+
+/ {
+ model = "TI OMAP3 BeagleBoard A to B4";
+ compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3";
+};
+
+/*
+ * Workaround for capacitor C70 issue, see "Boards revision A and < B5"
+ * section at https://elinux.org/BeagleBoard_Community
+ */
+
+/* Unusable as clocksource because of unreliable oscillator */
+&counter32k {
+ status = "disabled";
+};
+
+/* Unusable as clockevent because of unreliable oscillator, allow to idle */
+&timer1_target {
+ /delete-property/ti,no-reset-on-init;
+ /delete-property/ti,no-idle;
+ timer@0 {
+ /delete-property/ti,timer-alwon;
+ };
+};
+
+/* Preferred always-on timer for clocksource */
+&timer12_target {
+ ti,no-reset-on-init;
+ ti,no-idle;
+ timer@0 {
+ /* Always clocked by secure_32k_fck */
+ };
+};
+
+/* Preferred timer for clockevent */
+&timer2_target {
+ ti,no-reset-on-init;
+ ti,no-idle;
+ timer@0 {
+ assigned-clocks = <&gpt2_fck>;
+ assigned-clock-parents = <&sys_ck>;
+ };
+};
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index f9f34b8458e9..0548b391334f 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -304,39 +304,6 @@
phys = <0 &hsusb2_phy>;
};
-/* Unusable as clocksource because of unreliable oscillator */
-&counter32k {
- status = "disabled";
-};
-
-/* Unusable as clockevent because if unreliable oscillator, allow to idle */
-&timer1_target {
- /delete-property/ti,no-reset-on-init;
- /delete-property/ti,no-idle;
- timer@0 {
- /delete-property/ti,timer-alwon;
- };
-};
-
-/* Preferred always-on timer for clocksource */
-&timer12_target {
- ti,no-reset-on-init;
- ti,no-idle;
- timer@0 {
- /* Always clocked by secure_32k_fck */
- };
-};
-
-/* Preferred timer for clockevent */
-&timer2_target {
- ti,no-reset-on-init;
- ti,no-idle;
- timer@0 {
- assigned-clocks = <&gpt2_fck>;
- assigned-clock-parents = <&sys_ck>;
- };
-};
-
&twl_gpio {
ti,use-leds;
/* pullups: BIT(1) */
diff --git a/arch/arm/boot/dts/spear320-hmi.dts b/arch/arm/boot/dts/spear320-hmi.dts
index 367ba48aac3e..b587e4ec11e5 100644
--- a/arch/arm/boot/dts/spear320-hmi.dts
+++ b/arch/arm/boot/dts/spear320-hmi.dts
@@ -235,7 +235,6 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <0x41>;
- irq-over-gpio;
irq-gpios = <&gpiopinctrl 29 0x4>;
id = <0>;
blocks = <0x5>;
diff --git a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts
index 580ca497f312..f8c5899fbdba 100644
--- a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts
+++ b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts
@@ -185,10 +185,6 @@
cap-sd-highspeed;
cap-mmc-highspeed;
/* All direction control is used */
- st,sig-dir-cmd;
- st,sig-dir-dat0;
- st,sig-dir-dat2;
- st,sig-dir-dat31;
st,sig-pin-fbclk;
full-pwr-cycle;
vmmc-supply = <&ab8500_ldo_aux3_reg>;
diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c
index 17c1c3bfe2f5..763c73beea2d 100644
--- a/arch/arm/crypto/blake2s-shash.c
+++ b/arch/arm/crypto/blake2s-shash.c
@@ -13,12 +13,12 @@
static int crypto_blake2s_update_arm(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
- return crypto_blake2s_update(desc, in, inlen, blake2s_compress);
+ return crypto_blake2s_update(desc, in, inlen, false);
}
static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
{
- return crypto_blake2s_final(desc, out, blake2s_compress);
+ return crypto_blake2s_final(desc, out, false);
}
#define BLAKE2S_ALG(name, driver_name, digest_size) \
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 7d23d4bb2168..6fe67963ba5a 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -288,6 +288,7 @@
*/
#define ALT_UP(instr...) \
.pushsection ".alt.smp.init", "a" ;\
+ .align 2 ;\
.long 9998b - . ;\
9997: instr ;\
.if . - 9997b == 2 ;\
@@ -299,6 +300,7 @@
.popsection
#define ALT_UP_B(label) \
.pushsection ".alt.smp.init", "a" ;\
+ .align 2 ;\
.long 9998b - . ;\
W(b) . + (label - 9998b) ;\
.popsection
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 6af68edfa53a..bdc35c0e8dfb 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -96,6 +96,7 @@ unsigned long __get_wchan(struct task_struct *p);
#define __ALT_SMP_ASM(smp, up) \
"9998: " smp "\n" \
" .pushsection \".alt.smp.init\", \"a\"\n" \
+ " .align 2\n" \
" .long 9998b - .\n" \
" " up "\n" \
" .popsection\n"
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 36fbc3329252..32dbfd81f42a 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <asm/memory.h>
#include <asm/domain.h>
+#include <asm/unaligned.h>
#include <asm/unified.h>
#include <asm/compiler.h>
@@ -497,7 +498,10 @@ do { \
} \
default: __err = __get_user_bad(); break; \
} \
- *(type *)(dst) = __val; \
+ if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) \
+ put_unaligned(__val, (type *)(dst)); \
+ else \
+ *(type *)(dst) = __val; /* aligned by caller */ \
if (__err) \
goto err_label; \
} while (0)
@@ -507,7 +511,9 @@ do { \
const type *__pk_ptr = (dst); \
unsigned long __dst = (unsigned long)__pk_ptr; \
int __err = 0; \
- type __val = *(type *)src; \
+ type __val = IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \
+ ? get_unaligned((type *)(src)) \
+ : *(type *)(src); /* aligned by caller */ \
switch (sizeof(type)) { \
case 1: __put_user_asm_byte(__val, __dst, __err, ""); break; \
case 2: __put_user_asm_half(__val, __dst, __err, ""); break; \
diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 6daaa645ae5d..21413a9b7b6c 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -263,9 +263,9 @@ static int __init omapdss_init_of(void)
}
r = of_platform_populate(node, NULL, NULL, &pdev->dev);
+ put_device(&pdev->dev);
if (r) {
pr_err("Unable to populate DSS submodule devices\n");
- put_device(&pdev->dev);
return r;
}
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index ccb0e3732c0d..31d1a21f6041 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -752,8 +752,10 @@ static int __init _init_clkctrl_providers(void)
for_each_matching_node(np, ti_clkctrl_match_table) {
ret = _setup_clkctrl_provider(np);
- if (ret)
+ if (ret) {
+ of_node_put(np);
break;
+ }
}
return ret;
diff --git a/arch/arm/mach-socfpga/Kconfig b/arch/arm/mach-socfpga/Kconfig
index 43ddec677c0b..594edf9bbea4 100644
--- a/arch/arm/mach-socfpga/Kconfig
+++ b/arch/arm/mach-socfpga/Kconfig
@@ -2,6 +2,7 @@
menuconfig ARCH_INTEL_SOCFPGA
bool "Altera SOCFPGA family"
depends on ARCH_MULTI_V7
+ select ARCH_HAS_RESET_CONTROLLER
select ARCH_SUPPORTS_BIG_ENDIAN
select ARM_AMBA
select ARM_GIC
@@ -18,6 +19,7 @@ menuconfig ARCH_INTEL_SOCFPGA
select PL310_ERRATA_727915
select PL310_ERRATA_753970 if PL310
select PL310_ERRATA_769419
+ select RESET_CONTROLLER
if ARCH_INTEL_SOCFPGA
config SOCFPGA_SUSPEND
diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile
index 14db56f49f0a..6159010dac4a 100644
--- a/arch/arm/probes/kprobes/Makefile
+++ b/arch/arm/probes/kprobes/Makefile
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+KASAN_SANITIZE_actions-common.o := n
+KASAN_SANITIZE_actions-arm.o := n
+KASAN_SANITIZE_actions-thumb.o := n
obj-$(CONFIG_KPROBES) += core.o actions-common.o checkers-common.o
obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o
test-kprobes-objs := test-core.o
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6978140edfa4..09b885cc4db5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -670,15 +670,42 @@ config ARM64_ERRATUM_1508412
config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
bool
+config ARM64_ERRATUM_2051678
+ bool "Cortex-A510: 2051678: disable Hardware Update of the page table dirty bit"
+ default y
+ help
+ This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678.
+ Affected Coretex-A510 might not respect the ordering rules for
+ hardware update of the page table's dirty bit. The workaround
+ is to not enable the feature on affected CPUs.
+
+ If unsure, say Y.
+
+config ARM64_ERRATUM_2077057
+ bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2"
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 2077057.
+ Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is
+ expected, but a Pointer Authentication trap is taken instead. The
+ erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow
+ EL1 to cause a return to EL2 with a guest controlled ELR_EL2.
+
+ This can only happen when EL2 is stepping EL1.
+
+ When these conditions occur, the SPSR_EL2 value is unchanged from the
+ previous guest entry, and can be restored from the in-memory copy.
+
+ If unsure, say Y.
+
config ARM64_ERRATUM_2119858
- bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode"
+ bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
default y
depends on CORESIGHT_TRBE
select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE
help
- This option adds the workaround for ARM Cortex-A710 erratum 2119858.
+ This option adds the workaround for ARM Cortex-A710/X2 erratum 2119858.
- Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace
+ Affected Cortex-A710/X2 cores could overwrite up to 3 cache lines of trace
data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in
the event of a WRAP event.
@@ -761,14 +788,14 @@ config ARM64_ERRATUM_2253138
If unsure, say Y.
config ARM64_ERRATUM_2224489
- bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range"
+ bool "Cortex-A710/X2: 2224489: workaround TRBE writing to address out-of-range"
depends on CORESIGHT_TRBE
default y
select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
help
- This option adds the workaround for ARM Cortex-A710 erratum 2224489.
+ This option adds the workaround for ARM Cortex-A710/X2 erratum 2224489.
- Affected Cortex-A710 cores might write to an out-of-range address, not reserved
+ Affected Cortex-A710/X2 cores might write to an out-of-range address, not reserved
for TRBE. Under some conditions, the TRBE might generate a write to the next
virtually addressed page following the last page of the TRBE address space
(i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base.
@@ -778,6 +805,65 @@ config ARM64_ERRATUM_2224489
If unsure, say Y.
+config ARM64_ERRATUM_2064142
+ bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled"
+ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 2064142.
+
+ Affected Cortex-A510 core might fail to write into system registers after the
+ TRBE has been disabled. Under some conditions after the TRBE has been disabled
+ writes into TRBE registers TRBLIMITR_EL1, TRBPTR_EL1, TRBBASER_EL1, TRBSR_EL1,
+ and TRBTRG_EL1 will be ignored and will not be effected.
+
+ Work around this in the driver by executing TSB CSYNC and DSB after collection
+ is stopped and before performing a system register write to one of the affected
+ registers.
+
+ If unsure, say Y.
+
+config ARM64_ERRATUM_2038923
+ bool "Cortex-A510: 2038923: workaround TRBE corruption with enable"
+ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 2038923.
+
+ Affected Cortex-A510 core might cause an inconsistent view on whether trace is
+ prohibited within the CPU. As a result, the trace buffer or trace buffer state
+ might be corrupted. This happens after TRBE buffer has been enabled by setting
+ TRBLIMITR_EL1.E, followed by just a single context synchronization event before
+ execution changes from a context, in which trace is prohibited to one where it
+ isn't, or vice versa. In these mentioned conditions, the view of whether trace
+ is prohibited is inconsistent between parts of the CPU, and the trace buffer or
+ the trace buffer state might be corrupted.
+
+ Work around this in the driver by preventing an inconsistent view of whether the
+ trace is prohibited or not based on TRBLIMITR_EL1.E by immediately following a
+ change to TRBLIMITR_EL1.E with at least one ISB instruction before an ERET, or
+ two ISB instructions if no ERET is to take place.
+
+ If unsure, say Y.
+
+config ARM64_ERRATUM_1902691
+ bool "Cortex-A510: 1902691: workaround TRBE trace corruption"
+ depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A510 erratum 1902691.
+
+ Affected Cortex-A510 core might cause trace data corruption, when being written
+ into the memory. Effectively TRBE is broken and hence cannot be used to capture
+ trace data.
+
+ Work around this problem in the driver by just preventing TRBE initialization on
+ affected cpus. The firmware must have disabled the access to TRBE for the kernel
+ on such implementations. This will cover the kernel for any firmware that doesn't
+ do this already.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 7d5d58800170..21697449d762 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -309,9 +309,6 @@ config ARCH_VISCONTI
help
This enables support for Toshiba Visconti SoCs Family.
-config ARCH_VULCAN
- def_bool n
-
config ARCH_XGENE
bool "AppliedMicro X-Gene SOC Family"
help
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
index 517519e6e87f..f84d4b489e0b 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
@@ -107,6 +107,12 @@
no-map;
};
+ /* 32 MiB reserved for ARM Trusted Firmware (BL32) */
+ secmon_reserved_bl32: secmon@5300000 {
+ reg = <0x0 0x05300000 0x0 0x2000000>;
+ no-map;
+ };
+
linux,cma {
compatible = "shared-dma-pool";
reusable;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
index d8838dde0f0f..4fb31c2ba31c 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts
@@ -157,14 +157,6 @@
regulator-always-on;
};
- reserved-memory {
- /* TEE Reserved Memory */
- bl32_reserved: bl32@5000000 {
- reg = <0x0 0x05300000 0x0 0x2000000>;
- no-map;
- };
- };
-
sdio_pwrseq: sdio-pwrseq {
compatible = "mmc-pwrseq-simple";
reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
index 3e968b244191..fd3fa82e4c33 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi
@@ -17,7 +17,7 @@
rtc1 = &vrtc;
};
- dioo2133: audio-amplifier-0 {
+ dio2133: audio-amplifier-0 {
compatible = "simple-audio-amplifier";
enable-gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_HIGH>;
VCC-supply = <&vcc_5v>;
@@ -219,7 +219,7 @@
audio-widgets = "Line", "Lineout";
audio-aux-devs = <&tdmout_b>, <&tdmout_c>, <&tdmin_a>,
<&tdmin_b>, <&tdmin_c>, <&tdmin_lb>,
- <&dioo2133>;
+ <&dio2133>;
audio-routing = "TDMOUT_B IN 0", "FRDDR_A OUT 1",
"TDMOUT_B IN 1", "FRDDR_B OUT 1",
"TDMOUT_B IN 2", "FRDDR_C OUT 1",
diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index 6b457b2c30a4..aa14ea017a61 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -49,6 +49,12 @@
no-map;
};
+ /* 32 MiB reserved for ARM Trusted Firmware (BL32) */
+ secmon_reserved_bl32: secmon@5300000 {
+ reg = <0x0 0x05300000 0x0 0x2000000>;
+ no-map;
+ };
+
linux,cma {
compatible = "shared-dma-pool";
reusable;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
index 212c6aa5a3b8..5751c48620ed 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts
@@ -123,7 +123,7 @@
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
- enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>;
+ enable-gpio = <&gpio_ao GPIOE_2 GPIO_ACTIVE_HIGH>;
enable-active-high;
regulator-always-on;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
index 0bd1e98a0eef..ddb1b345397f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi
@@ -48,7 +48,7 @@
regulator-max-microvolt = <3300000>;
vin-supply = <&vcc_5v>;
- enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>;
+ enable-gpio = <&gpio_ao GPIOE_2 GPIO_OPEN_DRAIN>;
enable-active-high;
regulator-always-on;
diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
index 427475846fc7..a5d79f2f7c19 100644
--- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts
@@ -203,14 +203,6 @@
regulator-always-on;
};
- reserved-memory {
- /* TEE Reserved Memory */
- bl32_reserved: bl32@5000000 {
- reg = <0x0 0x05300000 0x0 0x2000000>;
- no-map;
- };
- };
-
sdio_pwrseq: sdio-pwrseq {
compatible = "mmc-pwrseq-simple";
reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
index d74e738e4070..c03f4e183389 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts
@@ -157,6 +157,10 @@
};
};
+&ftm_alarm0 {
+ status = "okay";
+};
+
&gpio1 {
gpio-line-names =
"", "", "", "", "", "", "", "",
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
index f3e3418f7edc..2d4a472af6a9 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi
@@ -1115,8 +1115,8 @@
status = "okay";
ports {
- port@1 {
- reg = <1>;
+ port@0 {
+ reg = <0>;
mipi1_sensor_ep: endpoint {
remote-endpoint = <&camera1_ep>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 2df2510d0118..e92ebb6147e6 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -554,7 +554,7 @@
assigned-clock-rates = <0>, <0>, <0>, <594000000>;
status = "disabled";
- port@0 {
+ port {
lcdif_mipi_dsi: endpoint {
remote-endpoint = <&mipi_dsi_lcdif_in>;
};
@@ -1151,8 +1151,8 @@
#address-cells = <1>;
#size-cells = <0>;
- port@0 {
- reg = <0>;
+ port@1 {
+ reg = <1>;
csi1_mipi_ep: endpoint {
remote-endpoint = <&csi1_ep>;
@@ -1203,8 +1203,8 @@
#address-cells = <1>;
#size-cells = <0>;
- port@0 {
- reg = <0>;
+ port@1 {
+ reg = <1>;
csi2_mipi_ep: endpoint {
remote-endpoint = <&csi2_ep>;
diff --git a/arch/arm64/boot/dts/freescale/mba8mx.dtsi b/arch/arm64/boot/dts/freescale/mba8mx.dtsi
index f27e3c8de916..ce6d5bdba0a8 100644
--- a/arch/arm64/boot/dts/freescale/mba8mx.dtsi
+++ b/arch/arm64/boot/dts/freescale/mba8mx.dtsi
@@ -91,7 +91,7 @@
sound {
compatible = "fsl,imx-audio-tlv320aic32x4";
- model = "tqm-tlv320aic32";
+ model = "imx-audio-tlv320aic32x4";
ssi-controller = <&sai3>;
audio-codec = <&tlv320aic3x04>;
};
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts
index a5a24f9f46c5..b210cc07c539 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts
@@ -15,8 +15,18 @@
model = "Texas Instruments J721S2 EVM";
chosen {
- stdout-path = "serial10:115200n8";
- bootargs = "console=ttyS10,115200n8 earlycon=ns16550a,mmio32,2880000";
+ stdout-path = "serial2:115200n8";
+ bootargs = "console=ttyS2,115200n8 earlycon=ns16550a,mmio32,2880000";
+ };
+
+ aliases {
+ serial1 = &mcu_uart0;
+ serial2 = &main_uart8;
+ mmc0 = &main_sdhci0;
+ mmc1 = &main_sdhci1;
+ can0 = &main_mcan16;
+ can1 = &mcu_mcan0;
+ can2 = &mcu_mcan1;
};
evm_12v0: fixedregulator-evm12v0 {
diff --git a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi
index 80d3cae03e88..fe5234c40f6c 100644
--- a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi
@@ -21,28 +21,6 @@
#address-cells = <2>;
#size-cells = <2>;
- aliases {
- serial0 = &wkup_uart0;
- serial1 = &mcu_uart0;
- serial2 = &main_uart0;
- serial3 = &main_uart1;
- serial4 = &main_uart2;
- serial5 = &main_uart3;
- serial6 = &main_uart4;
- serial7 = &main_uart5;
- serial8 = &main_uart6;
- serial9 = &main_uart7;
- serial10 = &main_uart8;
- serial11 = &main_uart9;
- mmc0 = &main_sdhci0;
- mmc1 = &main_sdhci1;
- can0 = &main_mcan16;
- can1 = &mcu_mcan0;
- can2 = &mcu_mcan1;
- can3 = &main_mcan3;
- can4 = &main_mcan5;
- };
-
chosen { };
cpus {
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 19b8441aa8f2..999b9149f856 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -73,7 +73,9 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A510 0xD46
#define ARM_CPU_PART_CORTEX_A710 0xD47
+#define ARM_CPU_PART_CORTEX_X2 0xD48
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define APM_CPU_PART_POTENZA 0x000
@@ -115,7 +117,9 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
+#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 3198acb2aad8..7f3c87f7a0ce 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -106,7 +106,7 @@
msr_s SYS_ICC_SRE_EL2, x0
isb // Make sure SRE is now set
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
- tbz x0, #0, 1f // and check that it sticks
+ tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks
msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
.Lskip_gicv3_\@:
.endm
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9e1c1aef9ebd..b217941713a8 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -347,6 +347,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = {
#endif
#ifdef CONFIG_ARM64_ERRATUM_2119858
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0),
#endif
{},
};
@@ -371,6 +372,7 @@ static struct midr_range trbe_write_out_of_range_cpus[] = {
#endif
#ifdef CONFIG_ARM64_ERRATUM_2224489
MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0),
#endif
{},
};
@@ -598,6 +600,41 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2077057
+ {
+ .desc = "ARM erratum 2077057",
+ .capability = ARM64_WORKAROUND_2077057,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2064142
+ {
+ .desc = "ARM erratum 2064142",
+ .capability = ARM64_WORKAROUND_2064142,
+
+ /* Cortex-A510 r0p0 - r0p2 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2038923
+ {
+ .desc = "ARM erratum 2038923",
+ .capability = ARM64_WORKAROUND_2038923,
+
+ /* Cortex-A510 r0p0 - r0p2 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2)
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1902691
+ {
+ .desc = "ARM erratum 1902691",
+ .capability = ARM64_WORKAROUND_1902691,
+
+ /* Cortex-A510 r0p0 - r0p1 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1)
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a46ab3b1c4d5..e5f23dab1c8d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1646,6 +1646,9 @@ static bool cpu_has_broken_dbm(void)
/* Kryo4xx Silver (rdpe => r1p0) */
MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe),
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2051678
+ MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
+#endif
{},
};
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 0fb58fed54cb..e4103e085681 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -33,8 +33,8 @@
*/
-static void start_backtrace(struct stackframe *frame, unsigned long fp,
- unsigned long pc)
+static notrace void start_backtrace(struct stackframe *frame, unsigned long fp,
+ unsigned long pc)
{
frame->fp = fp;
frame->pc = pc;
@@ -55,6 +55,7 @@ static void start_backtrace(struct stackframe *frame, unsigned long fp,
frame->prev_fp = 0;
frame->prev_type = STACK_TYPE_UNKNOWN;
}
+NOKPROBE_SYMBOL(start_backtrace);
/*
* Unwind from one frame record (A) to the next frame record (B).
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 60813497a381..172452f79e46 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -29,8 +29,11 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
+# -Wmissing-prototypes and -Wmissing-declarations are removed from
+# the CFLAGS of vgettimeofday.c to make possible to build the
+# kernel with CONFIG_WERROR enabled.
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \
- $(CC_FLAGS_LTO)
+ $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a4a0063df456..ecc5958e27fe 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -797,6 +797,24 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
xfer_to_guest_mode_work_pending();
}
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guest_state_enter_irqoff();
+ ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+ guest_state_exit_irqoff();
+
+ return ret;
+}
+
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -881,9 +899,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* Enter the guest
*/
trace_kvm_entry(*vcpu_pc(vcpu));
- guest_enter_irqoff();
+ guest_timing_enter_irqoff();
- ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
+ ret = kvm_arm_vcpu_enter_exit(vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
@@ -918,26 +936,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_arch_vcpu_ctxsync_fp(vcpu);
/*
- * We may have taken a host interrupt in HYP mode (ie
- * while executing the guest). This interrupt is still
- * pending, as we haven't serviced it yet!
+ * We must ensure that any pending interrupts are taken before
+ * we exit guest timing so that timer ticks are accounted as
+ * guest time. Transiently unmask interrupts so that any
+ * pending interrupts are taken.
*
- * We're now back in SVC mode, with interrupts
- * disabled. Enabling the interrupts now will have
- * the effect of taking the interrupt again, in SVC
- * mode this time.
+ * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other
+ * context synchronization event) is necessary to ensure that
+ * pending interrupts are taken.
*/
local_irq_enable();
+ isb();
+ local_irq_disable();
+
+ guest_timing_exit_irqoff();
+
+ local_irq_enable();
- /*
- * We do local_irq_enable() before calling guest_exit() so
- * that if a timer interrupt hits while running the guest we
- * account that tick as being spent in the guest. We enable
- * preemption after calling guest_exit() so that if we get
- * preempted we make sure ticks after that is not counted as
- * guest time.
- */
- guest_exit();
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/* Exit types that need handling before we can be preempted */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index fd2dd26caf91..e3140abd2e2e 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -228,6 +228,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
{
struct kvm_run *run = vcpu->run;
+ if (ARM_SERROR_PENDING(exception_index)) {
+ /*
+ * The SError is handled by handle_exit_early(). If the guest
+ * survives it will re-execute the original instruction.
+ */
+ return 1;
+ }
+
exception_index = ARM_EXCEPTION_CODE(exception_index);
switch (exception_index) {
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 0418399e0a20..c5d009715402 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
{
- write_sysreg_el1(val, SYS_SPSR);
+ if (has_vhe())
+ write_sysreg_el1(val, SYS_SPSR);
+ else
+ __vcpu_sys_reg(vcpu, SPSR_EL1) = val;
}
static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 58e14f8ead23..701cfb964905 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -402,6 +402,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
+static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /*
+ * Check for the conditions of Cortex-A510's #2077057. When these occur
+ * SPSR_EL2 can't be trusted, but isn't needed either as it is
+ * unchanged from the value in vcpu_gp_regs(vcpu)->pstate.
+ * Are we single-stepping the guest, and took a PAC exception from the
+ * active-not-pending state?
+ */
+ if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) &&
+ vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
+ *vcpu_cpsr(vcpu) & DBG_SPSR_SS &&
+ ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC)
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+
+ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -413,7 +431,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
* Save PSTATE early so that we can evaluate the vcpu mode
* early on.
*/
- vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+ synchronize_vcpu_pstate(vcpu, exit_code);
/*
* Check whether we want to repaint the state one way or
@@ -424,7 +442,8 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
- if (ARM_SERROR_PENDING(*exit_code)) {
+ if (ARM_SERROR_PENDING(*exit_code) &&
+ ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) {
u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
/*
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 844a6f003fd5..2cb3867eb7c2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -983,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
*/
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
- if (need_flush) {
- kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
-
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
- }
+ if (need_flush && mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
if (childp)
mm_ops->put_page(childp);
@@ -1151,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct kvm_pgtable *pgt = arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
kvm_pte_t pte = *ptep;
- kvm_pte_t *pte_follow;
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
return 0;
- pte_follow = kvm_pte_follow(pte, mm_ops);
- dcache_clean_inval_poc((unsigned long)pte_follow,
- (unsigned long)pte_follow +
- kvm_granule_size(level));
+ if (mm_ops->dcache_clean_inval_poc)
+ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+ kvm_granule_size(level));
return 0;
}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 20db2f281cf2..4fb419f7b8b6 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
+ /* SEIS */
+ if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK)
+ val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT);
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c
index 7068da080799..49837d3a3ef5 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio.c
@@ -248,6 +248,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
IRQCHIP_STATE_PENDING,
&val);
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
+ } else if (vgic_irq_is_mapped_level(irq)) {
+ val = vgic_get_phys_line_level(irq);
} else {
val = irq_is_pending(irq);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index a33d4366b326..b549af8b1dc2 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -609,6 +609,18 @@ static int __init early_gicv4_enable(char *buf)
}
early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
+static const struct midr_range broken_seis[] = {
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
+ {},
+};
+
+static bool vgic_v3_broken_seis(void)
+{
+ return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) &&
+ is_midr_in_range_list(read_cpuid_id(), broken_seis));
+}
+
/**
* vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
* @info: pointer to the GIC description
@@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
group1_trap = true;
}
- if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
- kvm_info("GICv3 with locally generated SEI\n");
+ if (vgic_v3_broken_seis()) {
+ kvm_info("GICv3 with broken locally generated SEI\n");
+ kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK;
group0_trap = true;
group1_trap = true;
if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index c0181e60cc98..489455309695 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -40,8 +40,8 @@ static bool
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
- int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type);
- int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type);
+ int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
+ int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
unsigned long data, addr, offset;
addr = pt_regs_read_reg(regs, reg_addr);
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 870c39537dd0..9c65b1e25a96 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -55,6 +55,10 @@ WORKAROUND_1418040
WORKAROUND_1463225
WORKAROUND_1508412
WORKAROUND_1542419
+WORKAROUND_1902691
+WORKAROUND_2038923
+WORKAROUND_2064142
+WORKAROUND_2077057
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 703952819e10..a7e01573abd8 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -318,7 +318,7 @@ config ARCH_PROC_KCORE_TEXT
depends on PROC_KCORE
config IA64_MCA_RECOVERY
- tristate "MCA recovery from errors other than TLB."
+ bool "MCA recovery from errors other than TLB."
config IA64_PALINFO
tristate "/proc/pal support"
diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c
index acb55a41260d..2bcdd7d3a1ad 100644
--- a/arch/ia64/pci/fixup.c
+++ b/arch/ia64/pci/fixup.c
@@ -76,5 +76,5 @@ static void pci_fixup_video(struct pci_dev *pdev)
}
}
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 3e336b3dbb10..ab6e3dc0bc1d 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -83,6 +83,8 @@
label = "HDMI OUT";
type = "a";
+ ddc-en-gpios = <&gpa 25 GPIO_ACTIVE_HIGH>;
+
port {
hdmi_con: endpoint {
remote-endpoint = <&dw_hdmi_out>;
@@ -114,17 +116,6 @@
gpio = <&gpf 14 GPIO_ACTIVE_LOW>;
enable-active-high;
};
-
- hdmi_power: fixedregulator@3 {
- compatible = "regulator-fixed";
-
- regulator-name = "hdmi_power";
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
-
- gpio = <&gpa 25 0>;
- enable-active-high;
- };
};
&ext {
@@ -576,8 +567,6 @@
pinctrl-names = "default";
pinctrl-0 = <&pins_hdmi_ddc>;
- hdmi-5v-supply = <&hdmi_power>;
-
ports {
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S
index 0a515cde1c18..25860fba6218 100644
--- a/arch/mips/cavium-octeon/octeon-memcpy.S
+++ b/arch/mips/cavium-octeon/octeon-memcpy.S
@@ -74,7 +74,7 @@
#define EXC(inst_reg,addr,handler) \
9: inst_reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
/*
diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h
index 6ffdd4b5e1d0..336ac9b65235 100644
--- a/arch/mips/include/asm/asm.h
+++ b/arch/mips/include/asm/asm.h
@@ -285,7 +285,7 @@ symbol = value
#define PTR_SCALESHIFT 2
-#define PTR .word
+#define PTR_WD .word
#define PTRSIZE 4
#define PTRLOG 2
#endif
@@ -310,7 +310,7 @@ symbol = value
#define PTR_SCALESHIFT 3
-#define PTR .dword
+#define PTR_WD .dword
#define PTRSIZE 8
#define PTRLOG 3
#endif
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index b463f2aa5a61..db497a8167da 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -32,7 +32,7 @@ do { \
".previous\n" \
\
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR) "\t1b, 3b\n\t" \
+ STR(PTR_WD) "\t1b, 3b\n\t" \
".previous\n" \
\
: [tmp_dst] "=&r" (dst), [tmp_err] "=r" (error)\
@@ -54,7 +54,7 @@ do { \
".previous\n" \
\
".section\t__ex_table,\"a\"\n\t"\
- STR(PTR) "\t1b, 3b\n\t" \
+ STR(PTR_WD) "\t1b, 3b\n\t" \
".previous\n" \
\
: [tmp_err] "=r" (error) \
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h
index af3788589ee6..431a1c9d53fc 100644
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -119,7 +119,7 @@ static inline void flush_scache_line(unsigned long addr)
" j 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
- " "STR(PTR)" 1b, 3b \n" \
+ " "STR(PTR_WD)" 1b, 3b \n" \
" .previous" \
: "+r" (__err) \
: "i" (op), "r" (addr), "i" (-EFAULT)); \
@@ -142,7 +142,7 @@ static inline void flush_scache_line(unsigned long addr)
" j 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
- " "STR(PTR)" 1b, 3b \n" \
+ " "STR(PTR_WD)" 1b, 3b \n" \
" .previous" \
: "+r" (__err) \
: "i" (op), "r" (addr), "i" (-EFAULT)); \
diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h
index 2022b18944b9..9af0f4d3d288 100644
--- a/arch/mips/include/asm/unaligned-emul.h
+++ b/arch/mips/include/asm/unaligned-emul.h
@@ -20,8 +20,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -41,8 +41,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -74,10 +74,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -102,8 +102,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -125,8 +125,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -145,8 +145,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -178,10 +178,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -223,14 +223,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -255,8 +255,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT));\
@@ -276,8 +276,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -296,8 +296,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -325,10 +325,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
@@ -365,14 +365,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
@@ -398,8 +398,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -419,8 +419,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -452,10 +452,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -481,8 +481,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -504,8 +504,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -524,8 +524,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -557,10 +557,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -602,14 +602,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
@@ -632,8 +632,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT));\
@@ -653,8 +653,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -673,8 +673,8 @@ do { \
"j\t3b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 4b\n\t" \
- STR(PTR)"\t2b, 4b\n\t" \
+ STR(PTR_WD)"\t1b, 4b\n\t" \
+ STR(PTR_WD)"\t2b, 4b\n\t" \
".previous" \
: "=r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
@@ -703,10 +703,10 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
@@ -743,14 +743,14 @@ do { \
"j\t10b\n\t" \
".previous\n\t" \
".section\t__ex_table,\"a\"\n\t" \
- STR(PTR)"\t1b, 11b\n\t" \
- STR(PTR)"\t2b, 11b\n\t" \
- STR(PTR)"\t3b, 11b\n\t" \
- STR(PTR)"\t4b, 11b\n\t" \
- STR(PTR)"\t5b, 11b\n\t" \
- STR(PTR)"\t6b, 11b\n\t" \
- STR(PTR)"\t7b, 11b\n\t" \
- STR(PTR)"\t8b, 11b\n\t" \
+ STR(PTR_WD)"\t1b, 11b\n\t" \
+ STR(PTR_WD)"\t2b, 11b\n\t" \
+ STR(PTR_WD)"\t3b, 11b\n\t" \
+ STR(PTR_WD)"\t4b, 11b\n\t" \
+ STR(PTR_WD)"\t5b, 11b\n\t" \
+ STR(PTR_WD)"\t6b, 11b\n\t" \
+ STR(PTR_WD)"\t7b, 11b\n\t" \
+ STR(PTR_WD)"\t8b, 11b\n\t" \
".previous" \
: "=&r" (res) \
: "r" (value), "r" (addr), "i" (-EFAULT) \
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index a39ec755e4c2..750fe569862b 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -1258,10 +1258,10 @@ fpu_emul:
" j 10b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1333,10 +1333,10 @@ fpu_emul:
" j 10b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1404,10 +1404,10 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1474,10 +1474,10 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1589,14 +1589,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1708,14 +1708,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1827,14 +1827,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -1945,14 +1945,14 @@ fpu_emul:
" j 9b\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- STR(PTR) " 1b,8b\n"
- STR(PTR) " 2b,8b\n"
- STR(PTR) " 3b,8b\n"
- STR(PTR) " 4b,8b\n"
- STR(PTR) " 5b,8b\n"
- STR(PTR) " 6b,8b\n"
- STR(PTR) " 7b,8b\n"
- STR(PTR) " 0b,8b\n"
+ STR(PTR_WD) " 1b,8b\n"
+ STR(PTR_WD) " 2b,8b\n"
+ STR(PTR_WD) " 3b,8b\n"
+ STR(PTR_WD) " 4b,8b\n"
+ STR(PTR_WD) " 5b,8b\n"
+ STR(PTR_WD) " 6b,8b\n"
+ STR(PTR_WD) " 7b,8b\n"
+ STR(PTR_WD) " 0b,8b\n"
" .previous\n"
" .set pop\n"
: "+&r"(rt), "=&r"(rs),
@@ -2007,7 +2007,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "=&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV)
@@ -2065,7 +2065,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "+&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV));
@@ -2126,7 +2126,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "=&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV)
@@ -2189,7 +2189,7 @@ fpu_emul:
"j 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
- STR(PTR) " 1b,3b\n"
+ STR(PTR_WD) " 1b,3b\n"
".previous\n"
: "+&r"(res), "+&r"(err)
: "r"(vaddr), "i"(SIGSEGV));
diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
index cbf6db98cfb3..2748c55820c2 100644
--- a/arch/mips/kernel/r2300_fpu.S
+++ b/arch/mips/kernel/r2300_fpu.S
@@ -23,14 +23,14 @@
#define EX(a,b) \
9: a,##b; \
.section __ex_table,"a"; \
- PTR 9b,fault; \
+ PTR_WD 9b,fault; \
.previous
#define EX2(a,b) \
9: a,##b; \
.section __ex_table,"a"; \
- PTR 9b,fault; \
- PTR 9b+4,fault; \
+ PTR_WD 9b,fault; \
+ PTR_WD 9b+4,fault; \
.previous
.set mips1
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index b91e91106475..2e687c60bc4f 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -31,7 +31,7 @@
.ex\@: \insn \reg, \src
.set pop
.section __ex_table,"a"
- PTR .ex\@, fault
+ PTR_WD .ex\@, fault
.previous
.endm
diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S
index f3c908abdbb8..cfde14b48fd8 100644
--- a/arch/mips/kernel/relocate_kernel.S
+++ b/arch/mips/kernel/relocate_kernel.S
@@ -147,10 +147,10 @@ LEAF(kexec_smp_wait)
kexec_args:
EXPORT(kexec_args)
-arg0: PTR 0x0
-arg1: PTR 0x0
-arg2: PTR 0x0
-arg3: PTR 0x0
+arg0: PTR_WD 0x0
+arg1: PTR_WD 0x0
+arg2: PTR_WD 0x0
+arg3: PTR_WD 0x0
.size kexec_args,PTRSIZE*4
#ifdef CONFIG_SMP
@@ -161,10 +161,10 @@ arg3: PTR 0x0
*/
secondary_kexec_args:
EXPORT(secondary_kexec_args)
-s_arg0: PTR 0x0
-s_arg1: PTR 0x0
-s_arg2: PTR 0x0
-s_arg3: PTR 0x0
+s_arg0: PTR_WD 0x0
+s_arg1: PTR_WD 0x0
+s_arg2: PTR_WD 0x0
+s_arg3: PTR_WD 0x0
.size secondary_kexec_args,PTRSIZE*4
kexec_flag:
LONG 0x1
@@ -173,17 +173,17 @@ kexec_flag:
kexec_start_address:
EXPORT(kexec_start_address)
- PTR 0x0
+ PTR_WD 0x0
.size kexec_start_address, PTRSIZE
kexec_indirection_page:
EXPORT(kexec_indirection_page)
- PTR 0
+ PTR_WD 0
.size kexec_indirection_page, PTRSIZE
relocate_new_kernel_end:
relocate_new_kernel_size:
EXPORT(relocate_new_kernel_size)
- PTR relocate_new_kernel_end - relocate_new_kernel
+ PTR_WD relocate_new_kernel_end - relocate_new_kernel
.size relocate_new_kernel_size, PTRSIZE
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index b1b2e106f711..9bfce5f75f60 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -72,10 +72,10 @@ loads_done:
.set pop
.section __ex_table,"a"
- PTR load_a4, bad_stack_a4
- PTR load_a5, bad_stack_a5
- PTR load_a6, bad_stack_a6
- PTR load_a7, bad_stack_a7
+ PTR_WD load_a4, bad_stack_a4
+ PTR_WD load_a5, bad_stack_a5
+ PTR_WD load_a6, bad_stack_a6
+ PTR_WD load_a7, bad_stack_a7
.previous
lw t0, TI_FLAGS($28) # syscall tracing enabled?
@@ -216,7 +216,7 @@ einval: li v0, -ENOSYS
#endif /* CONFIG_MIPS_MT_FPAFF */
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.align 2
.type sys_call_table, @object
EXPORT(sys_call_table)
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index f650c55a17dc..97456b2ca7dc 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -101,7 +101,7 @@ not_n32_scall:
END(handle_sysn32)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.type sysn32_call_table, @object
EXPORT(sysn32_call_table)
#include <asm/syscall_table_n32.h>
diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
index 5d7bfc65e4d0..5f6ed4b4c399 100644
--- a/arch/mips/kernel/scall64-n64.S
+++ b/arch/mips/kernel/scall64-n64.S
@@ -109,7 +109,7 @@ illegal_syscall:
j n64_syscall_exit
END(handle_sys64)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.align 3
.type sys_call_table, @object
EXPORT(sys_call_table)
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index cedc8bd88804..d3c2616cba22 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -73,10 +73,10 @@ load_a7: lw a7, 28(t0) # argument #8 from usp
loads_done:
.section __ex_table,"a"
- PTR load_a4, bad_stack_a4
- PTR load_a5, bad_stack_a5
- PTR load_a6, bad_stack_a6
- PTR load_a7, bad_stack_a7
+ PTR_WD load_a4, bad_stack_a4
+ PTR_WD load_a5, bad_stack_a5
+ PTR_WD load_a6, bad_stack_a6
+ PTR_WD load_a7, bad_stack_a7
.previous
li t1, _TIF_WORK_SYSCALL_ENTRY
@@ -214,7 +214,7 @@ einval: li v0, -ENOSYS
END(sys32_syscall)
#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
-#define __SYSCALL(nr, entry) PTR entry
+#define __SYSCALL(nr, entry) PTR_WD entry
.align 3
.type sys32_call_table,@object
EXPORT(sys32_call_table)
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 5512cd586e6e..ae93a607ddf7 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -122,8 +122,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
- " "STR(PTR)" 1b, 4b \n"
- " "STR(PTR)" 2b, 4b \n"
+ " "STR(PTR_WD)" 1b, 4b \n"
+ " "STR(PTR_WD)" 2b, 4b \n"
" .previous \n"
" .set pop \n"
: [old] "=&r" (old),
@@ -152,8 +152,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new)
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
- " "STR(PTR)" 1b, 5b \n"
- " "STR(PTR)" 2b, 5b \n"
+ " "STR(PTR_WD)" 1b, 5b \n"
+ " "STR(PTR_WD)" 2b, 5b \n"
" .previous \n"
" .set pop \n"
: [old] "=&r" (old),
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index e59cb6246f76..a25e0b73ee70 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -414,6 +414,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
return -ENOIOCTLCMD;
}
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static int noinstr kvm_mips_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guest_state_enter_irqoff();
+ ret = kvm_mips_callbacks->vcpu_run(vcpu);
+ guest_state_exit_irqoff();
+
+ return ret;
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
int r = -EINTR;
@@ -434,7 +452,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
lose_fpu(1);
local_irq_disable();
- guest_enter_irqoff();
+ guest_timing_enter_irqoff();
trace_kvm_enter(vcpu);
/*
@@ -445,10 +463,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
- r = kvm_mips_callbacks->vcpu_run(vcpu);
+ r = kvm_mips_vcpu_enter_exit(vcpu);
+
+ /*
+ * We must ensure that any pending interrupts are taken before
+ * we exit guest timing so that timer ticks are accounted as
+ * guest time. Transiently unmask interrupts so that any
+ * pending interrupts are taken.
+ *
+ * TODO: is there a barrier which ensures that pending interrupts are
+ * recognised? Currently this just hopes that the CPU takes any pending
+ * interrupts between the enable and disable.
+ */
+ local_irq_enable();
+ local_irq_disable();
trace_kvm_out(vcpu);
- guest_exit_irqoff();
+ guest_timing_exit_irqoff();
local_irq_enable();
out:
@@ -1168,7 +1199,7 @@ static void kvm_mips_set_c0_status(void)
/*
* Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
*/
-int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
+static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
u32 cause = vcpu->arch.host_cp0_cause;
@@ -1357,6 +1388,17 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
return ret;
}
+int noinstr kvm_mips_handle_exit(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guest_state_exit_irqoff();
+ ret = __kvm_mips_handle_exit(vcpu);
+ guest_state_enter_irqoff();
+
+ return ret;
+}
+
/* Enable FPU for guest and restore context */
void kvm_own_fpu(struct kvm_vcpu *vcpu)
{
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
index 4adca5abbc72..c706f5890a05 100644
--- a/arch/mips/kvm/vz.c
+++ b/arch/mips/kvm/vz.c
@@ -458,8 +458,8 @@ void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu)
/**
* _kvm_vz_save_htimer() - Switch to software emulation of guest timer.
* @vcpu: Virtual CPU.
- * @compare: Pointer to write compare value to.
- * @cause: Pointer to write cause value to.
+ * @out_compare: Pointer to write compare value to.
+ * @out_cause: Pointer to write cause value to.
*
* Save VZ guest timer state and switch to software emulation of guest CP0
* timer. The hard timer must already be in use, so preemption should be
@@ -1541,11 +1541,14 @@ static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu)
}
/**
- * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor.
+ * kvm_trap_vz_handle_cop_unusable() - Guest used unusable coprocessor.
* @vcpu: Virtual CPU context.
*
* Handle when the guest attempts to use a coprocessor which hasn't been allowed
* by the root context.
+ *
+ * Return: value indicating whether to resume the host or the guest
+ * (RESUME_HOST or RESUME_GUEST)
*/
static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
{
@@ -1592,6 +1595,9 @@ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
*
* Handle when the guest attempts to use MSA when it is disabled in the root
* context.
+ *
+ * Return: value indicating whether to resume the host or the guest
+ * (RESUME_HOST or RESUME_GUEST)
*/
static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu)
{
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index a46db0807195..7767137c3e49 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -347,7 +347,7 @@ EXPORT_SYMBOL(csum_partial)
.if \mode == LEGACY_MODE; \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, .L_exc; \
+ PTR_WD 9b, .L_exc; \
.previous; \
/* This is enabled in EVA mode */ \
.else; \
@@ -356,7 +356,7 @@ EXPORT_SYMBOL(csum_partial)
((\to == USEROP) && (type == ST_INSN)); \
9: __BUILD_EVA_INSN(insn##e, reg, addr); \
.section __ex_table,"a"; \
- PTR 9b, .L_exc; \
+ PTR_WD 9b, .L_exc; \
.previous; \
.else; \
/* EVA without exception */ \
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 277c32296636..18a43f2e29c8 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -116,7 +116,7 @@
.if \mode == LEGACY_MODE; \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous; \
/* This is assembled in EVA mode */ \
.else; \
@@ -125,7 +125,7 @@
((\to == USEROP) && (type == ST_INSN)); \
9: __BUILD_EVA_INSN(insn##e, reg, addr); \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous; \
.else; \
/* \
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index b0baa3c79fad..0b342bae9a98 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -52,7 +52,7 @@
9: ___BUILD_EVA_INSN(insn, reg, addr); \
.endif; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
.macro f_fill64 dst, offset, val, fixup, mode
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
index 556acf684d7b..13aaa9927ad1 100644
--- a/arch/mips/lib/strncpy_user.S
+++ b/arch/mips/lib/strncpy_user.S
@@ -15,7 +15,7 @@
#define EX(insn,reg,addr,handler) \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
/*
@@ -59,7 +59,7 @@ LEAF(__strncpy_from_user_asm)
jr ra
.section __ex_table,"a"
- PTR 1b, .Lfault
+ PTR_WD 1b, .Lfault
.previous
EXPORT_SYMBOL(__strncpy_from_user_asm)
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index 92b63f20ec05..6de31b616f9c 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -14,7 +14,7 @@
#define EX(insn,reg,addr,handler) \
9: insn reg, addr; \
.section __ex_table,"a"; \
- PTR 9b, handler; \
+ PTR_WD 9b, handler; \
.previous
/*
diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c
index 9a29e94d3db1..3115d4de982c 100644
--- a/arch/mips/loongson64/vbios_quirk.c
+++ b/arch/mips/loongson64/vbios_quirk.c
@@ -3,7 +3,7 @@
#include <linux/pci.h>
#include <loongson.h>
-static void pci_fixup_radeon(struct pci_dev *pdev)
+static void pci_fixup_video(struct pci_dev *pdev)
{
struct resource *res = &pdev->resource[PCI_ROM_RESOURCE];
@@ -22,8 +22,7 @@ static void pci_fixup_radeon(struct pci_dev *pdev)
res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW |
IORESOURCE_PCI_FIXED;
- dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n",
- PCI_ROM_RESOURCE, res);
+ dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", res);
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, 0x9615,
- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, 0x9615,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
index 0ec9cfc5131f..56ffd260c669 100644
--- a/arch/parisc/include/asm/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
@@ -12,6 +12,14 @@
#include <asm/barrier.h>
#include <linux/atomic.h>
+/* compiler build environment sanity checks: */
+#if !defined(CONFIG_64BIT) && defined(__LP64__)
+#error "Please use 'ARCH=parisc' to build the 32-bit kernel."
+#endif
+#if defined(CONFIG_64BIT) && !defined(__LP64__)
+#error "Please use 'ARCH=parisc64' to build the 64-bit kernel."
+#endif
+
/* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion
* on use of volatile and __*_bit() (set/clear/change):
* *_bit() want use of volatile.
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index ebf8a845b017..123d5f16cd9d 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -89,8 +89,8 @@ struct exception_table_entry {
__asm__("1: " ldx " 0(" sr "%2),%0\n" \
"9:\n" \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- : "=r"(__gu_val), "=r"(__gu_err) \
- : "r"(ptr), "1"(__gu_err)); \
+ : "=r"(__gu_val), "+r"(__gu_err) \
+ : "r"(ptr)); \
\
(val) = (__force __typeof__(*(ptr))) __gu_val; \
}
@@ -123,8 +123,8 @@ struct exception_table_entry {
"9:\n" \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
- : "=&r"(__gu_tmp.l), "=r"(__gu_err) \
- : "r"(ptr), "1"(__gu_err)); \
+ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \
+ : "r"(ptr)); \
\
(val) = __gu_tmp.t; \
}
@@ -135,13 +135,12 @@ struct exception_table_entry {
#define __put_user_internal(sr, x, ptr) \
({ \
ASM_EXCEPTIONTABLE_VAR(__pu_err); \
- __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x); \
\
switch (sizeof(*(ptr))) { \
- case 1: __put_user_asm(sr, "stb", __x, ptr); break; \
- case 2: __put_user_asm(sr, "sth", __x, ptr); break; \
- case 4: __put_user_asm(sr, "stw", __x, ptr); break; \
- case 8: STD_USER(sr, __x, ptr); break; \
+ case 1: __put_user_asm(sr, "stb", x, ptr); break; \
+ case 2: __put_user_asm(sr, "sth", x, ptr); break; \
+ case 4: __put_user_asm(sr, "stw", x, ptr); break; \
+ case 8: STD_USER(sr, x, ptr); break; \
default: BUILD_BUG(); \
} \
\
@@ -150,7 +149,9 @@ struct exception_table_entry {
#define __put_user(x, ptr) \
({ \
- __put_user_internal("%%sr3,", x, ptr); \
+ __typeof__(&*(ptr)) __ptr = ptr; \
+ __typeof__(*(__ptr)) __x = (__typeof__(*(__ptr)))(x); \
+ __put_user_internal("%%sr3,", __x, __ptr); \
})
#define __put_kernel_nofault(dst, src, type, err_label) \
@@ -180,8 +181,8 @@ struct exception_table_entry {
"1: " stx " %2,0(" sr "%1)\n" \
"9:\n" \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
- : "=r"(__pu_err) \
- : "r"(ptr), "r"(x), "0"(__pu_err))
+ : "+r"(__pu_err) \
+ : "r"(ptr), "r"(x))
#if !defined(CONFIG_64BIT)
@@ -193,8 +194,8 @@ struct exception_table_entry {
"9:\n" \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
- : "=r"(__pu_err) \
- : "r"(ptr), "r"(__val), "0"(__pu_err)); \
+ : "+r"(__pu_err) \
+ : "r"(ptr), "r"(__val)); \
} while (0)
#endif /* !defined(CONFIG_64BIT) */
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 237d20dd5622..286cec4d86d7 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -340,7 +340,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop)
: "r" (val), "r" (regs->ior), "r" (regs->isr)
: "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER );
- return 0;
+ return ret;
}
static int emulate_std(struct pt_regs *regs, int frreg, int flop)
{
@@ -397,7 +397,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
__asm__ __volatile__ (
" mtsp %4, %%sr1\n"
" zdep %2, 29, 2, %%r19\n"
-" dep %%r0, 31, 2, %2\n"
+" dep %%r0, 31, 2, %3\n"
" mtsar %%r19\n"
" zvdepi -2, 32, %%r19\n"
"1: ldw 0(%%sr1,%3),%%r20\n"
@@ -409,7 +409,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
" andcm %%r21, %%r19, %%r21\n"
" or %1, %%r20, %1\n"
" or %2, %%r21, %2\n"
-"3: stw %1,0(%%sr1,%1)\n"
+"3: stw %1,0(%%sr1,%3)\n"
"4: stw %%r1,4(%%sr1,%3)\n"
"5: stw %2,8(%%sr1,%3)\n"
" copy %%r0, %0\n"
@@ -596,7 +596,6 @@ void handle_unaligned(struct pt_regs *regs)
ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */
break;
}
-#ifdef CONFIG_PA20
switch (regs->iir & OPCODE2_MASK)
{
case OPCODE_FLDD_L:
@@ -607,22 +606,23 @@ void handle_unaligned(struct pt_regs *regs)
flop=1;
ret = emulate_std(regs, R2(regs->iir),1);
break;
+#ifdef CONFIG_PA20
case OPCODE_LDD_L:
ret = emulate_ldd(regs, R2(regs->iir),0);
break;
case OPCODE_STD_L:
ret = emulate_std(regs, R2(regs->iir),0);
break;
- }
#endif
+ }
switch (regs->iir & OPCODE3_MASK)
{
case OPCODE_FLDW_L:
flop=1;
- ret = emulate_ldw(regs, R2(regs->iir),0);
+ ret = emulate_ldw(regs, R2(regs->iir), 1);
break;
case OPCODE_LDW_M:
- ret = emulate_ldw(regs, R2(regs->iir),1);
+ ret = emulate_ldw(regs, R2(regs->iir), 0);
break;
case OPCODE_FSTW_L:
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
index 367f6397bda7..860385058085 100644
--- a/arch/parisc/lib/iomap.c
+++ b/arch/parisc/lib/iomap.c
@@ -346,6 +346,16 @@ u64 ioread64be(const void __iomem *addr)
return *((u64 *)addr);
}
+u64 ioread64_lo_hi(const void __iomem *addr)
+{
+ u32 low, high;
+
+ low = ioread32(addr);
+ high = ioread32(addr + sizeof(u32));
+
+ return low + ((u64)high << 32);
+}
+
u64 ioread64_hi_lo(const void __iomem *addr)
{
u32 low, high;
@@ -419,6 +429,12 @@ void iowrite64be(u64 datum, void __iomem *addr)
}
}
+void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+ iowrite32(val, addr);
+ iowrite32(val >> 32, addr + sizeof(u32));
+}
+
void iowrite64_hi_lo(u64 val, void __iomem *addr)
{
iowrite32(val >> 32, addr + sizeof(u32));
@@ -530,6 +546,7 @@ EXPORT_SYMBOL(ioread32);
EXPORT_SYMBOL(ioread32be);
EXPORT_SYMBOL(ioread64);
EXPORT_SYMBOL(ioread64be);
+EXPORT_SYMBOL(ioread64_lo_hi);
EXPORT_SYMBOL(ioread64_hi_lo);
EXPORT_SYMBOL(iowrite8);
EXPORT_SYMBOL(iowrite16);
@@ -538,6 +555,7 @@ EXPORT_SYMBOL(iowrite32);
EXPORT_SYMBOL(iowrite32be);
EXPORT_SYMBOL(iowrite64);
EXPORT_SYMBOL(iowrite64be);
+EXPORT_SYMBOL(iowrite64_lo_hi);
EXPORT_SYMBOL(iowrite64_hi_lo);
EXPORT_SYMBOL(ioread8_rep);
EXPORT_SYMBOL(ioread16_rep);
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 1ae31db9988f..1dc2e88e7b04 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -337,9 +337,9 @@ static void __init setup_bootmem(void)
static bool kernel_set_to_readonly;
-static void __init map_pages(unsigned long start_vaddr,
- unsigned long start_paddr, unsigned long size,
- pgprot_t pgprot, int force)
+static void __ref map_pages(unsigned long start_vaddr,
+ unsigned long start_paddr, unsigned long size,
+ pgprot_t pgprot, int force)
{
pmd_t *pmd;
pte_t *pg_table;
@@ -449,7 +449,7 @@ void __init set_kernel_text_rw(int enable_read_write)
flush_tlb_all();
}
-void __ref free_initmem(void)
+void free_initmem(void)
{
unsigned long init_begin = (unsigned long)__init_begin;
unsigned long init_end = (unsigned long)__init_end;
@@ -463,7 +463,6 @@ void __ref free_initmem(void)
/* The init text pages are marked R-X. We have to
* flush the icache and mark them RW-
*
- * This is tricky, because map_pages is in the init section.
* Do a dummy remap of the data section first (the data
* section is already PAGE_KERNEL) to pull in the TLB entries
* for map_kernel */
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 7be27862329f..78c6a5fde1d6 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -223,6 +223,8 @@ static __always_inline void update_user_segments(u32 val)
update_user_segment(15, val);
}
+int __init find_free_bat(void);
+unsigned int bat_block_size(unsigned long base, unsigned long top);
#endif /* !__ASSEMBLY__ */
/* We happily ignore the smaller BATs on 601, we don't actually use
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 609c80f67194..f8b94f78403f 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -178,6 +178,7 @@ static inline bool pte_user(pte_t pte)
#ifndef __ASSEMBLY__
int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 33e073d6b0c4..875730d5af40 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1082,6 +1082,8 @@ static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t p
return hash__map_kernel_page(ea, pa, prot);
}
+void unmap_kernel_page(unsigned long va);
+
static inline int __meminit vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys)
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 947b5b9c4424..a832aeafe560 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -111,8 +111,10 @@ static inline void __set_fixmap(enum fixed_addresses idx,
BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
else if (WARN_ON(idx >= __end_of_fixed_addresses))
return;
-
- map_kernel_page(__fix_to_virt(idx), phys, flags);
+ if (pgprot_val(flags))
+ map_kernel_page(__fix_to_virt(idx), phys, flags);
+ else
+ unmap_kernel_page(__fix_to_virt(idx));
}
#define __early_set_fixmap __set_fixmap
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index a58fb4aa6c81..674e5aaafcbd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -473,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
return !(regs->msr & MSR_EE);
}
-static inline bool should_hard_irq_enable(void)
+static __always_inline bool should_hard_irq_enable(void)
{
return false;
}
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index fe07558173ef..827038a33064 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -39,7 +39,6 @@ struct kvm_nested_guest {
pgd_t *shadow_pgtable; /* our page table for this guest */
u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
u64 process_table; /* process table entry for this guest */
- u64 hfscr; /* HFSCR that the L1 requested for this nested guest */
long refcnt; /* number of pointers to this struct */
struct mutex tlb_lock; /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index a770443cd6e0..d9bf60bf0816 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -818,6 +818,7 @@ struct kvm_vcpu_arch {
/* For support of nested guests */
struct kvm_nested_guest *nested;
+ u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */
u32 nested_vcpu_id;
gpa_t nested_io_gpr;
#endif
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index b67742e2a9b2..d959c2a73fbf 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -64,6 +64,7 @@ extern int icache_44x_need_flush;
#ifndef __ASSEMBLY__
int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index a3313e853e5e..2816d158280a 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -308,6 +308,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define __swp_entry_to_pte(x) __pte((x).val)
int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
extern int __meminit vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys);
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index efad07081cc0..9675303b724e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -500,6 +500,7 @@
#define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
#define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
#define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i))
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 52d05b465e3e..25fc8ad9a27a 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,7 +90,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
unsigned long val, mask = -1UL;
unsigned int n = 6;
- if (is_32bit_task())
+ if (is_tsk_32bit_task(task))
mask = 0xffffffff;
while (n--) {
@@ -105,7 +105,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
static inline int syscall_get_arch(struct task_struct *task)
{
- if (is_32bit_task())
+ if (is_tsk_32bit_task(task))
return AUDIT_ARCH_PPC;
else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
return AUDIT_ARCH_PPC64LE;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 5725029aaa29..d6e649b3c70b 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -168,8 +168,10 @@ static inline bool test_thread_local_flags(unsigned int flags)
#ifdef CONFIG_COMPAT
#define is_32bit_task() (test_thread_flag(TIF_32BIT))
+#define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT))
#else
#define is_32bit_task() (IS_ENABLED(CONFIG_PPC32))
+#define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32))
#endif
#if defined(CONFIG_PPC64)
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index fa84744d6b24..b876ef8c70a7 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -421,14 +421,14 @@ InstructionTLBMiss:
*/
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
-#ifdef CONFIG_MODULES
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
#endif
mfspr r2, SPRN_SDR1
li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER
rlwinm r2, r2, 28, 0xfffff000
-#ifdef CONFIG_MODULES
+#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 92088f848266..7bab2d7de372 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -30,6 +30,7 @@ COMPAT_SYS_CALL_TABLE:
.ifc \srr,srr
mfspr r11,SPRN_SRR0
ld r12,_NIP(r1)
+ clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
@@ -40,6 +41,7 @@ COMPAT_SYS_CALL_TABLE:
.else
mfspr r11,SPRN_HSRR0
ld r12,_NIP(r1)
+ clrrdi r11,r11,2
clrrdi r12,r12,2
100: tdne r11,r12
EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 62361cc7281c..cd0b8b71ecdd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -649,8 +649,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
__this_cpu_inc(irq_stat.timer_irqs_event);
} else {
now = *next_tb - now;
- if (now <= decrementer_max)
- set_dec_or_work(now);
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d1817cd9a691..84c89f08ae9a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1816,7 +1816,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
{
- struct kvm_nested_guest *nested = vcpu->arch.nested;
int r;
int srcu_idx;
@@ -1922,7 +1921,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
* it into a HEAI.
*/
if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
- (nested->hfscr & (1UL << cause))) {
+ (vcpu->arch.nested_hfscr & (1UL << cause))) {
vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
/*
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 8f8daaeeb3b7..9d373f8963ee 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -363,7 +363,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
/* set L1 state to L2 state */
vcpu->arch.nested = l2;
vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
- l2->hfscr = l2_hv.hfscr;
+ vcpu->arch.nested_hfscr = l2_hv.hfscr;
vcpu->arch.regs = l2_regs;
/* Guest must always run with ME enabled, HV disabled. */
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a94b0cd0bdc5..bd3734d5be89 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -3264,12 +3264,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
case BARRIER_EIEIO:
eieio();
break;
+#ifdef CONFIG_PPC64
case BARRIER_LWSYNC:
asm volatile("lwsync" : : : "memory");
break;
case BARRIER_PTESYNC:
asm volatile("ptesync" : : : "memory");
break;
+#endif
}
break;
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 94045b265b6b..203735caf691 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
-static int __init find_free_bat(void)
+int __init find_free_bat(void)
{
int b;
int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@@ -100,7 +100,7 @@ static int __init find_free_bat(void)
* - block size has to be a power of two. This is calculated by finding the
* highest bit set to 1.
*/
-static unsigned int block_size(unsigned long base, unsigned long top)
+unsigned int bat_block_size(unsigned long base, unsigned long top)
{
unsigned int max_size = SZ_256M;
unsigned int base_shift = (ffs(base) - 1) & 31;
@@ -145,7 +145,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
int idx;
while ((idx = find_free_bat()) != -1 && base != top) {
- unsigned int size = block_size(base, top);
+ unsigned int size = bat_block_size(base, top);
if (size < 128 << 10)
break;
@@ -201,12 +201,12 @@ void mmu_mark_initmem_nx(void)
unsigned long size;
for (i = 0; i < nb - 1 && base < top;) {
- size = block_size(base, top);
+ size = bat_block_size(base, top);
setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
base += size;
}
if (base < top) {
- size = block_size(base, top);
+ size = bat_block_size(base, top);
if ((top - base) > size) {
size <<= 1;
if (strict_kernel_rwx_enabled() && base + size > border)
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
index 35b287b0a8da..450a67ef0bbe 100644
--- a/arch/powerpc/mm/kasan/book3s_32.c
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -10,48 +10,51 @@ int __init kasan_init_region(void *start, size_t size)
{
unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
- unsigned long k_cur = k_start;
- int k_size = k_end - k_start;
- int k_size_base = 1 << (ffs(k_size) - 1);
+ unsigned long k_nobat = k_start;
+ unsigned long k_cur;
+ phys_addr_t phys;
int ret;
- void *block;
- block = memblock_alloc(k_size, k_size_base);
-
- if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) {
- int shift = ffs(k_size - k_size_base);
- int k_size_more = shift ? 1 << (shift - 1) : 0;
-
- setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL);
- if (k_size_more >= SZ_128K)
- setbat(-1, k_start + k_size_base, __pa(block) + k_size_base,
- k_size_more, PAGE_KERNEL);
- if (v_block_mapped(k_start))
- k_cur = k_start + k_size_base;
- if (v_block_mapped(k_start + k_size_base))
- k_cur = k_start + k_size_base + k_size_more;
-
- update_bats();
+ while (k_nobat < k_end) {
+ unsigned int k_size = bat_block_size(k_nobat, k_end);
+ int idx = find_free_bat();
+
+ if (idx == -1)
+ break;
+ if (k_size < SZ_128K)
+ break;
+ phys = memblock_phys_alloc_range(k_size, k_size, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ break;
+
+ setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL);
+ k_nobat += k_size;
}
+ if (k_nobat != k_start)
+ update_bats();
- if (!block)
- block = memblock_alloc(k_size, PAGE_SIZE);
- if (!block)
- return -ENOMEM;
+ if (k_nobat < k_end) {
+ phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ return -ENOMEM;
+ }
ret = kasan_init_shadow_page_tables(k_start, k_end);
if (ret)
return ret;
- kasan_update_early_region(k_start, k_cur, __pte(0));
+ kasan_update_early_region(k_start, k_nobat, __pte(0));
- for (; k_cur < k_end; k_cur += PAGE_SIZE) {
+ for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_off_k(k_cur);
- void *va = block + k_cur - k_start;
- pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+ pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL);
__set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
}
flush_tlb_kernel_range(k_start, k_end);
+ memset(kasan_mem_to_shadow(start), 0, k_end - k_start);
+
return 0;
}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index abb3198bd277..6ec5a7dd7913 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -206,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
__set_pte_at(mm, addr, ptep, pte, 0);
}
+void unmap_kernel_page(unsigned long va)
+{
+ pmd_t *pmdp = pmd_off_k(va);
+ pte_t *ptep = pte_offset_kernel(pmdp, va);
+
+ pte_clear(&init_mm, va, ptep);
+ flush_tlb_kernel_range(va, va + PAGE_SIZE);
+}
+
/*
* This is called when relaxing access to a PTE. It's also called in the page
* fault path when we don't hit any of the major fault cases, ie, a minor
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d6ffdd0f2309..56dd1f4e3e44 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
}
-/* Fix the branch target addresses for subprog calls */
-static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx, u32 *addrs)
+/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
+static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
+ struct codegen_context *ctx, u32 *addrs)
{
const struct bpf_insn *insn = fp->insnsi;
bool func_addr_fixed;
u64 func_addr;
u32 tmp_idx;
- int i, ret;
+ int i, j, ret;
for (i = 0; i < fp->len; i++) {
/*
@@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
* of the JITed sequence remains unchanged.
*/
ctx->idx = tmp_idx;
+ } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+ tmp_idx = ctx->idx;
+ ctx->idx = addrs[i] / 4;
+#ifdef CONFIG_PPC32
+ PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm);
+ PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm);
+ for (j = ctx->idx - addrs[i] / 4; j < 4; j++)
+ EMIT(PPC_RAW_NOP());
+#else
+ func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32);
+ PPC_LI64(b2p[insn[i].dst_reg], func_addr);
+ /* overwrite rest with nops */
+ for (j = ctx->idx - addrs[i] / 4; j < 5; j++)
+ EMIT(PPC_RAW_NOP());
+#endif
+ ctx->idx = tmp_idx;
+ i++;
}
}
@@ -200,13 +217,13 @@ skip_init_ctx:
/*
* Do not touch the prologue and epilogue as they will remain
* unchanged. Only fix the branch target address for subprog
- * calls in the body.
+ * calls in the body, and ldimm64 instructions.
*
* This does not change the offsets and lengths of the subprog
* call instruction sequences and hence, the size of the JITed
* image as well.
*/
- bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
+ bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs);
/* There is no need to perform the usual passes. */
goto skip_codegen_passes;
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index faaebd446cad..cf8dd8aea386 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun
if (image && rel < 0x2000000 && rel >= -0x2000000) {
PPC_BL_ABS(func);
+ EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_NOP());
} else {
/* Load function address into r0 */
EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
@@ -290,6 +293,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
bool func_addr_fixed;
u64 func_addr;
u32 true_cond;
+ u32 tmp_idx;
+ int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -905,8 +910,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
* 16 byte instruction that uses two 'struct bpf_insn'
*/
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ tmp_idx = ctx->idx;
PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
PPC_LI32(dst_reg, (u32)insn[i].imm);
+ /* padding to allow full 4 instructions for later patching */
+ for (j = ctx->idx - tmp_idx; j < 4; j++)
+ EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
break;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 9eae8d8ed340..e1e8c934308a 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -319,6 +319,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
u64 imm64;
u32 true_cond;
u32 tmp_idx;
+ int j;
/*
* addrs[] maps a BPF bytecode address into a real offset from
@@ -633,17 +634,21 @@ bpf_alu32_trunc:
EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
break;
case 64:
- /*
- * Way easier and faster(?) to store the value
- * into stack and then use ldbrx
- *
- * ctx->seen will be reliable in pass2, but
- * the instructions generated will remain the
- * same across all passes
- */
+ /* Store the value to stack and then use byte-reverse loads */
PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
- EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
+ } else {
+ EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1]));
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
+ EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4));
+ EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1]));
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2]));
+ }
break;
}
break;
@@ -848,9 +853,13 @@ emit_clear:
case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
imm64 = ((u64)(u32) insn[i].imm) |
(((u64)(u32) insn[i+1].imm) << 32);
+ tmp_idx = ctx->idx;
+ PPC_LI64(dst_reg, imm64);
+ /* padding to allow full 5 instructions for later patching */
+ for (j = ctx->idx - tmp_idx; j < 5; j++)
+ EMIT(PPC_RAW_NOP());
/* Adjust for two bpf instructions */
addrs[++i] = ctx->idx * 4;
- PPC_LI64(dst_reg, imm64);
break;
/*
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index a684901b6965..b5b42cf0a703 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -776,6 +776,34 @@ static void pmao_restore_workaround(bool ebb)
mtspr(SPRN_PMC6, pmcs[5]);
}
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * through to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+ struct cpu_hw_events *cpuhw;
+
+ /*
+ * This could simply test local_paca->pmcregs_in_use if that were not
+ * under ifdef KVM.
+ */
+ if (!ppmu)
+ return false;
+
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+ return cpuhw->n_events;
+}
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
@@ -1327,9 +1355,20 @@ static void power_pmu_disable(struct pmu *pmu)
* Otherwise provide a warning if there is PMI pending, but
* no counter is found overflown.
*/
- if (any_pmc_overflown(cpuhw))
- clear_pmi_irq_pending();
- else
+ if (any_pmc_overflown(cpuhw)) {
+ /*
+ * Since power_pmu_disable runs under local_irq_save, it
+ * could happen that code hits a PMC overflow without PMI
+ * pending in paca. Hence only clear PMI pending if it was
+ * set.
+ *
+ * If a PMI is pending, then MSR[EE] must be disabled (because
+ * the masked PMI handler disabling EE). So it is safe to
+ * call clear_pmi_irq_pending().
+ */
+ if (pmi_irq_pending())
+ clear_pmi_irq_pending();
+ } else
WARN_ON(pmi_irq_pending());
val = mmcra = cpuhw->mmcr.mmcra;
@@ -2438,36 +2477,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
perf_sample_event_took(sched_clock() - start_clock);
}
-/*
- * If the perf subsystem wants performance monitor interrupts as soon as
- * possible (e.g., to sample the instruction address and stack chain),
- * this should return true. The IRQ masking code can then enable MSR[EE]
- * in some places (e.g., interrupt handlers) that allows PMI interrupts
- * though to improve accuracy of profiles, at the cost of some performance.
- *
- * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
- * access), but in that case there is no need for prompt PMI handling.
- *
- * This currently returns true if any perf counter is being used. It
- * could possibly return false if only events are being counted rather than
- * samples being taken, but for now this is good enough.
- */
-bool power_pmu_wants_prompt_pmi(void)
-{
- struct cpu_hw_events *cpuhw;
-
- /*
- * This could simply test local_paca->pmcregs_in_use if that were not
- * under ifdef KVM.
- */
-
- if (!ppmu)
- return false;
-
- cpuhw = this_cpu_ptr(&cpu_hw_events);
- return cpuhw->n_events;
-}
-
static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 0c5239e05721..624166004e36 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -90,6 +90,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *cntx;
+ struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
@@ -106,6 +107,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
cntx->hstatus |= HSTATUS_SPVP;
cntx->hstatus |= HSTATUS_SPV;
+ /* By default, make CY, TM, and IR counters accessible in VU mode */
+ reset_csr->scounteren = 0x7;
+
/* Setup VCPU timer */
kvm_riscv_vcpu_timer_init(vcpu);
@@ -699,6 +703,20 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
csr_write(CSR_HVIP, csr->hvip);
}
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+ guest_state_enter_irqoff();
+ __kvm_riscv_switch_to(&vcpu->arch);
+ guest_state_exit_irqoff();
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
int ret;
@@ -790,9 +808,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
continue;
}
- guest_enter_irqoff();
+ guest_timing_enter_irqoff();
- __kvm_riscv_switch_to(&vcpu->arch);
+ kvm_riscv_vcpu_enter_exit(vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
@@ -812,25 +830,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_sync_interrupts(vcpu);
/*
- * We may have taken a host interrupt in VS/VU-mode (i.e.
- * while executing the guest). This interrupt is still
- * pending, as we haven't serviced it yet!
+ * We must ensure that any pending interrupts are taken before
+ * we exit guest timing so that timer ticks are accounted as
+ * guest time. Transiently unmask interrupts so that any
+ * pending interrupts are taken.
*
- * We're now back in HS-mode with interrupts disabled
- * so enabling the interrupts now will have the effect
- * of taking the interrupt again, in HS-mode this time.
+ * There's no barrier which ensures that pending interrupts are
+ * recognised, so we just hope that the CPU takes any pending
+ * interrupts between the enable and disable.
*/
local_irq_enable();
+ local_irq_disable();
- /*
- * We do local_irq_enable() before calling guest_exit() so
- * that if a timer interrupt hits while running the guest
- * we account that tick as being spent in the guest. We
- * enable preemption after calling guest_exit() so that if
- * we get preempted we make sure ticks after that is not
- * counted as guest time.
- */
- guest_exit();
+ guest_timing_exit_irqoff();
+
+ local_irq_enable();
preempt_enable();
diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c
index 4ecf377f483b..48f431091cdb 100644
--- a/arch/riscv/kvm/vcpu_sbi_base.c
+++ b/arch/riscv/kvm/vcpu_sbi_base.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
+#include <linux/version.h>
#include <asm/csr.h>
#include <asm/sbi.h>
#include <asm/kvm_vcpu_timer.h>
@@ -32,7 +33,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
*out_val = KVM_SBI_IMPID;
break;
case SBI_EXT_BASE_GET_IMP_VERSION:
- *out_val = 0;
+ *out_val = LINUX_VERSION_CODE;
break;
case SBI_EXT_BASE_PROBE_EXT:
if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START &&
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9750f92380f5..be9f39fd06df 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -945,6 +945,9 @@ config S390_GUEST
endmenu
+config S390_MODULES_SANITY_TEST_HELPERS
+ def_bool n
+
menu "Selftests"
config S390_UNWIND_SELFTEST
@@ -971,4 +974,16 @@ config S390_KPROBES_SANITY_TEST
Say N if you are unsure.
+config S390_MODULES_SANITY_TEST
+ def_tristate n
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ prompt "Enable s390 specific modules tests"
+ select S390_MODULES_SANITY_TEST_HELPERS
+ help
+ This option enables an s390 specific modules test. This option is
+ not useful for distributions or general kernels, but only for
+ kernel developers working on architecture code.
+
+ Say N if you are unsure.
endmenu
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 7fe8975b49ec..498bed9b261b 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -63,6 +63,7 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_STATIC_KEYS_SELFTEST=y
@@ -96,7 +97,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_FRONTSWAP=y
CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
CONFIG_CMA_SYSFS=y
@@ -109,6 +109,7 @@ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
CONFIG_GUP_TEST=y
+CONFIG_ANON_VMA_NAME=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -116,7 +117,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_NET_SWITCHDEV=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -185,7 +185,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
@@ -391,6 +390,7 @@ CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
@@ -400,6 +400,7 @@ CONFIG_PCI_IOV=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -501,6 +502,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
@@ -511,7 +513,6 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -542,6 +543,7 @@ CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
@@ -592,6 +594,7 @@ CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+# CONFIG_SURFACE_PLATFORMS is not set
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -756,9 +759,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -774,6 +774,8 @@ CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC32_SELFTEST=y
CONFIG_CRC4=m
CONFIG_CRC7=m
@@ -807,7 +809,6 @@ CONFIG_SLUB_DEBUG_ON=y
CONFIG_SLUB_STATS=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_VM_VMACACHE=y
CONFIG_DEBUG_VM_PGFLAGS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
@@ -819,12 +820,11 @@ CONFIG_PANIC_ON_OOPS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_WQ_WATCHDOG=y
CONFIG_TEST_LOCKUP=m
-CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
-CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
+CONFIG_DEBUG_IRQFLAGS=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 466780c465f5..61e36b999f67 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -61,6 +61,7 @@ CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_S390_KPROBES_SANITY_TEST=m
+CONFIG_S390_MODULES_SANITY_TEST=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
# CONFIG_GCC_PLUGINS is not set
@@ -91,7 +92,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_FRONTSWAP=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
@@ -101,6 +101,7 @@ CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PERCPU_STATS=y
+CONFIG_ANON_VMA_NAME=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
@@ -108,7 +109,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_NET_SWITCHDEV=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -177,7 +177,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
@@ -382,6 +381,7 @@ CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
+CONFIG_NET_SWITCHDEV=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
@@ -391,6 +391,7 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_UEVENT_HELPER=y
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -492,6 +493,7 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
+# CONFIG_NET_VENDOR_ENGLEDER is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
@@ -502,7 +504,6 @@ CONFIG_NLMON=m
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -533,6 +534,7 @@ CONFIG_MLX5_ESWITCH=y
# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
@@ -582,6 +584,7 @@ CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m
+# CONFIG_SURFACE_PLATFORMS is not set
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -743,9 +746,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
-CONFIG_CRYPTO_LIB_BLAKE2S=m
-CONFIG_CRYPTO_LIB_CURVE25519=m
-CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -762,6 +762,8 @@ CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index eed3b9acfa71..c55c668dc3c7 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -1,6 +1,7 @@
# CONFIG_SWAP is not set
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_CPU_ISOLATION is not set
# CONFIG_UTS_NS is not set
# CONFIG_TIME_NS is not set
@@ -34,6 +35,7 @@ CONFIG_NET=y
# CONFIG_PCPU_DEV_REFCNT is not set
# CONFIG_ETHTOOL_NETLINK is not set
CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_SAFE=y
CONFIG_BLK_DEV_RAM=y
# CONFIG_DCSSBLK is not set
# CONFIG_DASD is not set
@@ -58,6 +60,7 @@ CONFIG_ZFCP=y
# CONFIG_HID is not set
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
+# CONFIG_SURFACE_PLATFORMS is not set
# CONFIG_IOMMU_SUPPORT is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index 33f973ff9744..e8f15dbb89d0 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -20,6 +20,7 @@
static char local_guest[] = " ";
static char all_guests[] = "* ";
+static char *all_groups = all_guests;
static char *guest_query;
struct diag2fc_data {
@@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr)
memcpy(parm_list.userid, query, NAME_LEN);
ASCEBC(parm_list.userid, NAME_LEN);
- parm_list.addr = (unsigned long) addr ;
+ memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
+ ASCEBC(parm_list.aci_grp, NAME_LEN);
+ parm_list.addr = (unsigned long)addr;
parm_list.size = size;
parm_list.fmt = 0x02;
- memset(parm_list.aci_grp, 0x40, NAME_LEN);
rc = -1;
diag_stat_inc(DIAG_STAT_X2FC);
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 147cb3534ce4..d74e26b48604 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -47,8 +47,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
int __put_user_bad(void) __attribute__((noreturn));
int __get_user_bad(void) __attribute__((noreturn));
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
-
union oac {
unsigned int val;
struct {
@@ -71,6 +69,8 @@ union oac {
};
};
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+
#define __put_get_user_asm(to, from, size, oac_spec) \
({ \
int __rc; \
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index d52d85367bf7..b032e556eeb7 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -33,7 +33,7 @@
#define DEBUGP(fmt , ...)
#endif
-#define PLT_ENTRY_SIZE 20
+#define PLT_ENTRY_SIZE 22
void *module_alloc(unsigned long size)
{
@@ -341,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
- unsigned int insn[5];
- unsigned int *ip = me->core_layout.base +
- me->arch.plt_offset +
- info->plt_offset;
-
- insn[0] = 0x0d10e310; /* basr 1,0 */
- insn[1] = 0x100a0004; /* lg 1,10(1) */
+ unsigned char insn[PLT_ENTRY_SIZE];
+ char *plt_base;
+ char *ip;
+
+ plt_base = me->core_layout.base + me->arch.plt_offset;
+ ip = plt_base + info->plt_offset;
+ *(int *)insn = 0x0d10e310; /* basr 1,0 */
+ *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */
if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
- unsigned int *ij;
- ij = me->core_layout.base +
- me->arch.plt_offset +
- me->arch.plt_size - PLT_ENTRY_SIZE;
- insn[2] = 0xa7f40000 + /* j __jump_r1 */
- (unsigned int)(u16)
- (((unsigned long) ij - 8 -
- (unsigned long) ip) / 2);
+ char *jump_r1;
+
+ jump_r1 = plt_base + me->arch.plt_size -
+ PLT_ENTRY_SIZE;
+ /* brcl 0xf,__jump_r1 */
+ *(short *)&insn[8] = 0xc0f4;
+ *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
} else {
- insn[2] = 0x07f10000; /* br %r1 */
+ *(int *)&insn[8] = 0x07f10000; /* br %r1 */
}
- insn[3] = (unsigned int) (val >> 32);
- insn[4] = (unsigned int) val;
+ *(long *)&insn[14] = val;
write(ip, insn, sizeof(insn));
info->plt_initialized = 1;
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0c9e894913dc..651a51914e34 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -264,7 +264,14 @@ static int notrace s390_validate_registers(union mci mci, int umode)
/* Validate vector registers */
union ctlreg0 cr0;
- if (!mci.vr) {
+ /*
+ * The vector validity must only be checked if not running a
+ * KVM guest. For KVM guests the machine check is forwarded by
+ * KVM and it is the responsibility of the guest to take
+ * appropriate actions. The host vector or FPU values have been
+ * saved by KVM and will be restored by KVM.
+ */
+ if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) {
/*
* Vector registers can't be restored. If the kernel
* currently uses vector registers the system is
@@ -307,11 +314,21 @@ static int notrace s390_validate_registers(union mci mci, int umode)
if (cr2.gse) {
if (!mci.gs) {
/*
- * Guarded storage register can't be restored and
- * the current processes uses guarded storage.
- * It has to be terminated.
+ * 2 cases:
+ * - machine check in kernel or userspace
+ * - machine check while running SIE (KVM guest)
+ * For kernel or userspace the userspace values of
+ * guarded storage control can not be recreated, the
+ * process must be terminated.
+ * For SIE the guest values of guarded storage can not
+ * be recreated. This is either due to a bug or due to
+ * GS being disabled in the guest. The guest will be
+ * notified by KVM code and the guests machine check
+ * handling must take care of this. The host values
+ * are saved by KVM and are not affected.
*/
- kill_task = 1;
+ if (!test_cpu_flag(CIF_MCCK_GUEST))
+ kill_task = 1;
} else {
load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 577f1ead6a51..2296b1ff1e02 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4667,6 +4667,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
return -EINVAL;
if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
return -E2BIG;
+ if (!kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EINVAL;
switch (mop->op) {
case KVM_S390_MEMOP_SIDA_READ:
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 707cd4622c13..69feb8ed3312 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -17,4 +17,7 @@ KASAN_SANITIZE_uaccess.o := n
obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
+obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o
+obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
+
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c
new file mode 100644
index 000000000000..9894009fc1f2
--- /dev/null
+++ b/arch/s390/lib/test_modules.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <kunit/test.h>
+#include <linux/module.h>
+
+#include "test_modules.h"
+
+/*
+ * Test that modules with many relocations are loaded properly.
+ */
+static void test_modules_many_vmlinux_relocs(struct kunit *test)
+{
+ int result = 0;
+
+#define CALL_RETURN(i) result += test_modules_return_ ## i()
+ REPEAT_10000(CALL_RETURN);
+ KUNIT_ASSERT_EQ(test, result, 49995000);
+}
+
+static struct kunit_case modules_testcases[] = {
+ KUNIT_CASE(test_modules_many_vmlinux_relocs),
+ {}
+};
+
+static struct kunit_suite modules_test_suite = {
+ .name = "modules_test_s390",
+ .test_cases = modules_testcases,
+};
+
+kunit_test_suites(&modules_test_suite);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h
new file mode 100644
index 000000000000..6371fcf17684
--- /dev/null
+++ b/arch/s390/lib/test_modules.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef TEST_MODULES_H
+#define TEST_MODULES_H
+
+#define __REPEAT_10000_3(f, x) \
+ f(x ## 0); \
+ f(x ## 1); \
+ f(x ## 2); \
+ f(x ## 3); \
+ f(x ## 4); \
+ f(x ## 5); \
+ f(x ## 6); \
+ f(x ## 7); \
+ f(x ## 8); \
+ f(x ## 9)
+#define __REPEAT_10000_2(f, x) \
+ __REPEAT_10000_3(f, x ## 0); \
+ __REPEAT_10000_3(f, x ## 1); \
+ __REPEAT_10000_3(f, x ## 2); \
+ __REPEAT_10000_3(f, x ## 3); \
+ __REPEAT_10000_3(f, x ## 4); \
+ __REPEAT_10000_3(f, x ## 5); \
+ __REPEAT_10000_3(f, x ## 6); \
+ __REPEAT_10000_3(f, x ## 7); \
+ __REPEAT_10000_3(f, x ## 8); \
+ __REPEAT_10000_3(f, x ## 9)
+#define __REPEAT_10000_1(f, x) \
+ __REPEAT_10000_2(f, x ## 0); \
+ __REPEAT_10000_2(f, x ## 1); \
+ __REPEAT_10000_2(f, x ## 2); \
+ __REPEAT_10000_2(f, x ## 3); \
+ __REPEAT_10000_2(f, x ## 4); \
+ __REPEAT_10000_2(f, x ## 5); \
+ __REPEAT_10000_2(f, x ## 6); \
+ __REPEAT_10000_2(f, x ## 7); \
+ __REPEAT_10000_2(f, x ## 8); \
+ __REPEAT_10000_2(f, x ## 9)
+#define REPEAT_10000(f) \
+ __REPEAT_10000_1(f, 0); \
+ __REPEAT_10000_1(f, 1); \
+ __REPEAT_10000_1(f, 2); \
+ __REPEAT_10000_1(f, 3); \
+ __REPEAT_10000_1(f, 4); \
+ __REPEAT_10000_1(f, 5); \
+ __REPEAT_10000_1(f, 6); \
+ __REPEAT_10000_1(f, 7); \
+ __REPEAT_10000_1(f, 8); \
+ __REPEAT_10000_1(f, 9)
+
+#define DECLARE_RETURN(i) int test_modules_return_ ## i(void)
+REPEAT_10000(DECLARE_RETURN);
+
+#endif
diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c
new file mode 100644
index 000000000000..1670349a03eb
--- /dev/null
+++ b/arch/s390/lib/test_modules_helpers.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/export.h>
+
+#include "test_modules.h"
+
+#define DEFINE_RETURN(i) \
+ int test_modules_return_ ## i(void) \
+ { \
+ return 1 ## i - 10000; \
+ } \
+ EXPORT_SYMBOL_GPL(test_modules_return_ ## i)
+REPEAT_10000(DEFINE_RETURN);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ebe8fc76949a..9f5bd41bf660 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -186,6 +186,7 @@ config X86
select HAVE_CONTEXT_TRACKING_OFFSTACK if HAVE_CONTEXT_TRACKING
select HAVE_C_RECORDMCOUNT
select HAVE_OBJTOOL_MCOUNT if STACK_VALIDATION
+ select HAVE_BUILDTIME_MCOUNT_SORT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c
index f9e2fecdb761..59ae28abe35c 100644
--- a/arch/x86/crypto/blake2s-shash.c
+++ b/arch/x86/crypto/blake2s-shash.c
@@ -18,12 +18,12 @@
static int crypto_blake2s_update_x86(struct shash_desc *desc,
const u8 *in, unsigned int inlen)
{
- return crypto_blake2s_update(desc, in, inlen, blake2s_compress);
+ return crypto_blake2s_update(desc, in, inlen, false);
}
static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
{
- return crypto_blake2s_final(desc, out, blake2s_compress);
+ return crypto_blake2s_final(desc, out, false);
}
#define BLAKE2S_ALG(name, driver_name, digest_size) \
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fd9f908debe5..a3c7ca876aeb 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4703,6 +4703,19 @@ static __initconst const struct x86_pmu intel_pmu = {
.lbr_read = intel_pmu_lbr_read_64,
.lbr_save = intel_pmu_lbr_save,
.lbr_restore = intel_pmu_lbr_restore,
+
+ /*
+ * SMM has access to all 4 rings and while traditionally SMM code only
+ * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM.
+ *
+ * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction
+ * between SMM or not, this results in what should be pure userspace
+ * counters including SMM data.
+ *
+ * This is a clear privilege issue, therefore globally disable
+ * counting SMM by default.
+ */
+ .attr_freeze_on_smi = 1,
};
static __init void intel_clovertown_quirk(void)
@@ -6236,6 +6249,19 @@ __init int intel_pmu_init(void)
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
}
+
+ /*
+ * Quirk: For some Alder Lake machine, when all E-cores are disabled in
+ * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
+ * the X86_FEATURE_HYBRID_CPU is still set. The above codes will
+ * mistakenly add extra counters for P-cores. Correct the number of
+ * counters here.
+ */
+ if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
+ pmu->num_counters = x86_pmu.num_counters;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ }
+
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
@@ -6340,6 +6366,8 @@ __init int intel_pmu_init(void)
}
if (x86_pmu.lbr_nr) {
+ intel_pmu_lbr_init();
+
pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
/* only support branch_stack snapshot for perfmon >= v2 */
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8043213b75a5..669c2be14784 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -8,14 +8,6 @@
#include "../perf_event.h"
-static const enum {
- LBR_EIP_FLAGS = 1,
- LBR_TSX = 2,
-} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
- [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
- [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
-};
-
/*
* Intel LBR_SELECT bits
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (x86_pmu.lbr_has_info)
wrmsrl(x86_pmu.lbr_info + i, 0);
}
}
@@ -305,11 +297,10 @@ enum {
*/
static inline bool lbr_from_signext_quirk_needed(void)
{
- int lbr_format = x86_pmu.intel_cap.lbr_format;
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+ return !tsx_support && x86_pmu.lbr_has_tsx;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
void intel_pmu_lbr_restore(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
- int i;
- unsigned lbr_idx, mask;
+ bool need_info = x86_pmu.lbr_has_info;
u64 tos = task_ctx->tos;
+ unsigned lbr_idx, mask;
+ int i;
mask = x86_pmu.lbr_nr - 1;
for (i = 0; i < task_ctx->valid_lbrs; i++) {
@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, 0);
wrlbr_to(lbr_idx, 0);
- if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ if (need_info)
wrlbr_info(lbr_idx, 0);
}
@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
void intel_pmu_lbr_save(void *ctx)
{
- bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
+ bool need_info = x86_pmu.lbr_has_info;
unsigned lbr_idx, mask;
u64 tos;
int i;
@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
- int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
int i;
int out = 0;
@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
- int skip = 0;
u16 cycles = 0;
- int lbr_flags = lbr_desc[lbr_format];
from = rdlbr_from(lbr_idx, NULL);
to = rdlbr_to(lbr_idx, NULL);
@@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (call_stack && !from)
break;
- if (lbr_format == LBR_FORMAT_INFO && need_info) {
- u64 info;
-
- info = rdlbr_info(lbr_idx, NULL);
- mis = !!(info & LBR_INFO_MISPRED);
- pred = !mis;
- in_tx = !!(info & LBR_INFO_IN_TX);
- abort = !!(info & LBR_INFO_ABORT);
- cycles = (info & LBR_INFO_CYCLES);
- }
-
- if (lbr_format == LBR_FORMAT_TIME) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- cycles = ((to >> 48) & LBR_INFO_CYCLES);
-
- to = (u64)((((s64)to) << 16) >> 16);
- }
-
- if (lbr_flags & LBR_EIP_FLAGS) {
- mis = !!(from & LBR_FROM_FLAG_MISPRED);
- pred = !mis;
- skip = 1;
- }
- if (lbr_flags & LBR_TSX) {
- in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
- abort = !!(from & LBR_FROM_FLAG_ABORT);
- skip = 3;
+ if (x86_pmu.lbr_has_info) {
+ if (need_info) {
+ u64 info;
+
+ info = rdlbr_info(lbr_idx, NULL);
+ mis = !!(info & LBR_INFO_MISPRED);
+ pred = !mis;
+ cycles = (info & LBR_INFO_CYCLES);
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(info & LBR_INFO_IN_TX);
+ abort = !!(info & LBR_INFO_ABORT);
+ }
+ }
+ } else {
+ int skip = 0;
+
+ if (x86_pmu.lbr_from_flags) {
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
+ skip = 1;
+ }
+ if (x86_pmu.lbr_has_tsx) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
+ }
+ from = (u64)((((s64)from) << skip) >> skip);
+
+ if (x86_pmu.lbr_to_cycles) {
+ cycles = ((to >> 48) & LBR_INFO_CYCLES);
+ to = (u64)((((s64)to) << 16) >> 16);
+ }
}
- from = (u64)((((s64)from) << skip) >> skip);
/*
* Some CPUs report duplicated abort records,
@@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_stack.hw_idx = tos;
}
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
+static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);
+
static __always_inline int get_lbr_br_type(u64 info)
{
- if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
- return 0;
+ int type = 0;
- return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+ if (static_branch_likely(&x86_lbr_type))
+ type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
+
+ return type;
}
static __always_inline bool get_lbr_mispred(u64 info)
{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
+ bool mispred = 0;
- return !!(info & LBR_INFO_MISPRED);
-}
+ if (static_branch_likely(&x86_lbr_mispred))
+ mispred = !!(info & LBR_INFO_MISPRED);
-static __always_inline bool get_lbr_predicted(u64 info)
-{
- if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
- return 0;
-
- return !(info & LBR_INFO_MISPRED);
+ return mispred;
}
static __always_inline u16 get_lbr_cycles(u64 info)
{
+ u16 cycles = info & LBR_INFO_CYCLES;
+
if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
- !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
- return 0;
+ (!static_branch_likely(&x86_lbr_cycles) ||
+ !(info & LBR_INFO_CYC_CNT_VALID)))
+ cycles = 0;
- return info & LBR_INFO_CYCLES;
+ return cycles;
}
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
@@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->from = from;
e->to = to;
e->mispred = get_lbr_mispred(info);
- e->predicted = get_lbr_predicted(info);
+ e->predicted = !e->mispred;
e->in_tx = !!(info & LBR_INFO_IN_TX);
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
@@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
- (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+ x86_pmu.lbr_has_info)
reg->config |= LBR_NO_INFO;
return 0;
@@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
+void intel_pmu_lbr_init(void)
+{
+ switch (x86_pmu.intel_cap.lbr_format) {
+ case LBR_FORMAT_EIP_FLAGS2:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_EIP_FLAGS:
+ x86_pmu.lbr_from_flags = 1;
+ break;
+
+ case LBR_FORMAT_INFO:
+ x86_pmu.lbr_has_tsx = 1;
+ fallthrough;
+ case LBR_FORMAT_INFO2:
+ x86_pmu.lbr_has_info = 1;
+ break;
+
+ case LBR_FORMAT_TIME:
+ x86_pmu.lbr_from_flags = 1;
+ x86_pmu.lbr_to_cycles = 1;
+ break;
+ }
+
+ if (x86_pmu.lbr_has_info) {
+ /*
+ * Only used in combination with baseline pebs.
+ */
+ static_branch_enable(&x86_lbr_mispred);
+ static_branch_enable(&x86_lbr_cycles);
+ }
+}
+
/*
* LBR state size is variable based on the max number of registers.
* This calculates the expected state size, which should match
@@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void)
* Check the LBR state with the corresponding software structure.
* Disable LBR XSAVES support if the size doesn't match.
*/
+ if (xfeature_size(XFEATURE_LBR) == 0)
+ return false;
+
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
return false;
@@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
x86_pmu.lbr_nr = lbr_nr;
+ if (x86_pmu.lbr_mispred)
+ static_branch_enable(&x86_lbr_mispred);
+ if (x86_pmu.lbr_timed_lbr)
+ static_branch_enable(&x86_lbr_cycles);
+ if (x86_pmu.lbr_br_type)
+ static_branch_enable(&x86_lbr_type);
arch_lbr_xsave = is_arch_lbr_xsave_available();
if (arch_lbr_xsave) {
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 7f406c14715f..2d33bba9a144 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -897,8 +897,9 @@ static void pt_handle_status(struct pt *pt)
* means we are already losing data; need to let the decoder
* know.
*/
- if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
- buf->output_off == pt_buffer_region_size(buf)) {
+ if (!buf->single &&
+ (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
+ buf->output_off == pt_buffer_region_size(buf))) {
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_TRUNCATED);
advance++;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index f1ba6ab2e97e..e497da9bf427 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
.cpu_init = adl_uncore_cpu_init,
- .mmio_init = tgl_uncore_mmio_init,
+ .mmio_init = adl_uncore_mmio_init,
};
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index b9687980aab6..2adeaf4de4df 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
-void adl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
+void adl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
+void adl_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 3049c646fa20..6ddadb482f68 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -494,8 +494,8 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
writel(0, box->io_addr);
}
-static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
- struct perf_event *event)
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 6d735611c281..cfaf558bdb6b 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -139,6 +139,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
struct perf_event *event);
+void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event);
void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 0f63706cdadf..f698a55bde81 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
#include "uncore.h"
+#include "uncore_discovery.h"
/* Uncore IMC PCI IDs */
#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100
@@ -64,6 +65,20 @@
#define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53
#define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660
#define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641
+#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601
+#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602
+#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609
+#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a
+#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621
+#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623
+#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629
+#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637
+#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b
+#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648
+#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649
+#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650
+#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668
+#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -155,6 +170,7 @@
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
@@ -1334,6 +1350,62 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_3_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_4_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_5_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_6_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_7_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_8_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_9_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_10_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_11_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_12_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_13_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_14_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_15_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
{ /* end: all zeroes */ }
};
@@ -1390,7 +1462,8 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void)
#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
#define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000
-static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+static void __uncore_imc_init_box(struct intel_uncore_box *box,
+ unsigned int base_offset)
{
struct pci_dev *pdev = tgl_uncore_get_mc_dev();
struct intel_uncore_pmu *pmu = box->pmu;
@@ -1417,11 +1490,17 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
addr |= ((resource_size_t)mch_bar << 32);
#endif
+ addr += base_offset;
box->io_addr = ioremap(addr, type->mmio_map_size);
if (!box->io_addr)
pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name);
}
+static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, 0);
+}
+
static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
.init_box = tgl_uncore_imc_freerunning_init_box,
.exit_box = uncore_mmio_exit_box,
@@ -1469,3 +1548,136 @@ void tgl_uncore_mmio_init(void)
}
/* end of Tiger Lake MMIO uncore support */
+
+/* Alder Lake MMIO uncore support */
+#define ADL_UNCORE_IMC_BASE 0xd900
+#define ADL_UNCORE_IMC_MAP_SIZE 0x200
+#define ADL_UNCORE_IMC_CTR 0xe8
+#define ADL_UNCORE_IMC_CTRL 0xd0
+#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0
+#define ADL_UNCORE_IMC_BOX_CTL 0xc4
+#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800
+#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100
+
+#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0)
+#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1)
+#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2)
+#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \
+ ADL_UNCORE_IMC_CTL_RST_CTRS)
+
+static void adl_uncore_imc_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE);
+
+ /* The global control in MC1 can control both MCs. */
+ if (box->io_addr && (box->pmu->pmu_idx == 1))
+ writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL);
+}
+
+static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box)
+{
+ if (!box->io_addr)
+ return;
+
+ writel(0, box->io_addr + uncore_mmio_box_ctl(box));
+}
+
+static struct intel_uncore_ops adl_uncore_mmio_ops = {
+ .init_box = adl_uncore_imc_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .disable_box = adl_uncore_mmio_disable_box,
+ .enable_box = adl_uncore_mmio_enable_box,
+ .disable_event = intel_generic_uncore_mmio_disable_event,
+ .enable_event = intel_generic_uncore_mmio_enable_event,
+ .read_counter = uncore_mmio_read_counter,
+};
+
+#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00
+#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ ADL_UNC_CTL_CHMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET)
+
+static struct attribute *adl_uncore_imc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_chmask.attr,
+ &format_attr_edge.attr,
+ NULL,
+};
+
+static const struct attribute_group adl_uncore_imc_format_group = {
+ .name = "format",
+ .attrs = adl_uncore_imc_formats_attr,
+};
+
+static struct intel_uncore_type adl_uncore_imc = {
+ .name = "imc",
+ .num_counters = 5,
+ .num_boxes = 2,
+ .perf_ctr_bits = 64,
+ .perf_ctr = ADL_UNCORE_IMC_CTR,
+ .event_ctl = ADL_UNCORE_IMC_CTRL,
+ .event_mask = ADL_UNC_IMC_EVENT_MASK,
+ .box_ctl = ADL_UNCORE_IMC_BOX_CTL,
+ .mmio_offset = 0,
+ .mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE,
+ .ops = &adl_uncore_mmio_ops,
+ .format_group = &adl_uncore_imc_format_group,
+};
+
+enum perf_adl_uncore_imc_freerunning_types {
+ ADL_MMIO_UNCORE_IMC_DATA_TOTAL,
+ ADL_MMIO_UNCORE_IMC_DATA_READ,
+ ADL_MMIO_UNCORE_IMC_DATA_WRITE,
+ ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX
+};
+
+static struct freerunning_counters adl_uncore_imc_freerunning[] = {
+ [ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 },
+ [ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 },
+};
+
+static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ __uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE);
+}
+
+static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = {
+ .init_box = adl_uncore_imc_freerunning_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct intel_uncore_type adl_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+ .mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE,
+ .freerunning = adl_uncore_imc_freerunning,
+ .ops = &adl_uncore_imc_freerunning_ops,
+ .event_descs = tgl_uncore_imc_events,
+ .format_group = &tgl_uncore_imc_format_group,
+};
+
+static struct intel_uncore_type *adl_mmio_uncores[] = {
+ &adl_uncore_imc,
+ &adl_uncore_imc_free_running,
+ NULL
+};
+
+void adl_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = adl_mmio_uncores;
+}
+
+/* end of Alder Lake MMIO uncore support */
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 3660f698fb2a..ed869443efb2 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_uncore_imc = {
.fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
- .event_descs = hswep_uncore_imc_events,
+ .event_descs = snr_uncore_imc_events,
.perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
.event_ctl = SNR_IMC_MMIO_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9d376e528dfc..150261d929b9 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -215,7 +215,8 @@ enum {
LBR_FORMAT_EIP_FLAGS2 = 0x04,
LBR_FORMAT_INFO = 0x05,
LBR_FORMAT_TIME = 0x06,
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
+ LBR_FORMAT_INFO2 = 0x07,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2,
};
enum {
@@ -840,6 +841,11 @@ struct x86_pmu {
bool lbr_double_abort; /* duplicated lbr aborts */
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
+ unsigned int lbr_has_info:1;
+ unsigned int lbr_has_tsx:1;
+ unsigned int lbr_from_flags:1;
+ unsigned int lbr_to_cycles:1;
+
/*
* Intel Architectural LBR CPUID Enumeration
*/
@@ -1392,6 +1398,8 @@ void intel_pmu_lbr_init_skl(void);
void intel_pmu_lbr_init_knl(void);
+void intel_pmu_lbr_init(void);
+
void intel_pmu_arch_lbr_init(void);
void intel_pmu_pebs_data_source_nhm(void);
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 85feafacc445..77e3a47af5ad 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
* - perf_msr_probe(PERF_RAPL_MAX)
* - want to use same event codes across both architectures
*/
-static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {
- [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr },
+static struct perf_msr amd_rapl_msrs[] = {
+ [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 },
+ [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
+ [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 },
+ [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 },
+ [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 },
};
-
static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 84b87538a15d..bab883c0b6fe 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -22,7 +22,7 @@
#ifdef CONFIG_DEBUG_BUGVERBOSE
-#define _BUG_FLAGS(ins, flags) \
+#define _BUG_FLAGS(ins, flags, extra) \
do { \
asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
@@ -31,7 +31,8 @@ do { \
"\t.word %c1" "\t# bug_entry::line\n" \
"\t.word %c2" "\t# bug_entry::flags\n" \
"\t.org 2b+%c3\n" \
- ".popsection" \
+ ".popsection\n" \
+ extra \
: : "i" (__FILE__), "i" (__LINE__), \
"i" (flags), \
"i" (sizeof(struct bug_entry))); \
@@ -39,14 +40,15 @@ do { \
#else /* !CONFIG_DEBUG_BUGVERBOSE */
-#define _BUG_FLAGS(ins, flags) \
+#define _BUG_FLAGS(ins, flags, extra) \
do { \
asm_inline volatile("1:\t" ins "\n" \
".pushsection __bug_table,\"aw\"\n" \
"2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \
"\t.word %c0" "\t# bug_entry::flags\n" \
"\t.org 2b+%c1\n" \
- ".popsection" \
+ ".popsection\n" \
+ extra \
: : "i" (flags), \
"i" (sizeof(struct bug_entry))); \
} while (0)
@@ -55,7 +57,7 @@ do { \
#else
-#define _BUG_FLAGS(ins, flags) asm volatile(ins)
+#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins)
#endif /* CONFIG_GENERIC_BUG */
@@ -63,8 +65,8 @@ do { \
#define BUG() \
do { \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, 0); \
- unreachable(); \
+ _BUG_FLAGS(ASM_UD2, 0, ""); \
+ __builtin_unreachable(); \
} while (0)
/*
@@ -75,9 +77,9 @@ do { \
*/
#define __WARN_FLAGS(flags) \
do { \
+ __auto_type f = BUGFLAG_WARNING|(flags); \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \
- annotate_reachable(); \
+ _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE); \
instrumentation_end(); \
} while (0)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 631d5040b31e..d39e0de06be2 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -82,7 +82,7 @@ KVM_X86_OP_NULL(guest_apic_has_interrupt)
KVM_X86_OP(load_eoi_exitmap)
KVM_X86_OP(set_virtual_apic_mode)
KVM_X86_OP_NULL(set_apic_access_page_addr)
-KVM_X86_OP(deliver_posted_interrupt)
+KVM_X86_OP(deliver_interrupt)
KVM_X86_OP_NULL(sync_pir_to_irr)
KVM_X86_OP(set_tss_addr)
KVM_X86_OP(set_identity_map_addr)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1384517d7709..ec9830d2aabf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -703,7 +703,6 @@ struct kvm_vcpu_arch {
struct fpu_guest guest_fpu;
u64 xcr0;
- u64 guest_supported_xcr0;
struct kvm_pio_request pio;
void *pio_data;
@@ -1410,7 +1409,8 @@ struct kvm_x86_ops {
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
- int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
+ void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
@@ -1483,7 +1483,8 @@ struct kvm_x86_ops {
int (*get_msr_feature)(struct kvm_msr_entry *entry);
- bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
+ bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len);
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
@@ -1496,6 +1497,7 @@ struct kvm_x86_ops {
};
struct kvm_x86_nested_ops {
+ void (*leave_nested)(struct kvm_vcpu *vcpu);
int (*check_events)(struct kvm_vcpu *vcpu);
bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
void (*triple_fault)(struct kvm_vcpu *vcpu);
@@ -1861,7 +1863,6 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
-void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3faf0f97edb1..a4a39c3e0f19 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -476,6 +476,7 @@
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index b00dbc5fac2b..bb2fb78523ce 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -220,6 +220,42 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
#define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2)
+
+/* AVIC */
+#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
+#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
+
+#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
+#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
+#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
+#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
+#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFF)
+
+#define AVIC_DOORBELL_PHYSICAL_ID_MASK (0xFF)
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
+
+enum avic_ipi_failure_cause {
+ AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+ AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+ AVIC_IPI_FAILURE_INVALID_TARGET,
+ AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+};
+
+
+/*
+ * 0xff is broadcast, so the max index allowed for physical APIC ID
+ * table is 0xfe. APIC IDs above 0xff are reserved.
+ */
+#define AVIC_MAX_PHYSICAL_ID_COUNT 0xff
+
+#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
+#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+
+
struct vmcb_seg {
u16 selector;
u16 attrib;
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index a9630104f1c4..78e667a31d6c 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -100,6 +100,13 @@
/* Memory mapped from other domains has valid IOMMU entries */
#define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2)
#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */
+#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */
+/*
+ * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be
+ * used to store high bits for the Destination ID. This expands the Destination
+ * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ */
+#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5)
/*
* Leaf 6 (0x40000x05)
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 1bf2ad34188a..16f548a661cf 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -43,20 +43,6 @@ static inline uint32_t xen_cpuid_base(void)
return hypervisor_cpuid_base("XenVMMXenVMM", 2);
}
-#ifdef CONFIG_XEN
-extern bool __init xen_hvm_need_lapic(void);
-
-static inline bool __init xen_x2apic_para_available(void)
-{
- return xen_hvm_need_lapic();
-}
-#else
-static inline bool __init xen_x2apic_para_available(void)
-{
- return (xen_cpuid_base() != 0);
-}
-#endif
-
struct pci_dev;
#ifdef CONFIG_XEN_PV_DOM0
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 2da3316bb559..bf6e96011dfe 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -452,6 +452,9 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
+/* attributes for system fd (group 0) */
+#define KVM_X86_XCOMP_GUEST_SUPP 0
+
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index a1e2f41796dc..9f4b508886dd 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -423,7 +423,7 @@ static void threshold_restart_bank(void *_tr)
u32 hi, lo;
/* sysfs write might race against an offline operation */
- if (this_cpu_read(threshold_banks))
+ if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off)
return;
rdmsr(tr->b->address, lo, hi);
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index bb9a46a804bf..baafbb37be67 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_D:
case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 001808e3901c..48afe96ae0f0 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -410,6 +410,8 @@ void sgx_encl_release(struct kref *ref)
}
kfree(entry);
+ /* Invoke scheduler to prevent soft lockups. */
+ cond_resched();
}
xa_destroy(&encl->page_array);
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 4b41efc9e367..8e4bc6453d26 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -344,10 +344,8 @@ static void sgx_reclaim_pages(void)
{
struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
struct sgx_backing backing[SGX_NR_TO_SCAN];
- struct sgx_epc_section *section;
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
- struct sgx_numa_node *node;
pgoff_t page_index;
int cnt = 0;
int ret;
@@ -418,13 +416,7 @@ skip:
kref_put(&encl_page->encl->refcount, sgx_encl_release);
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
- section = &sgx_epc_sections[epc_page->section];
- node = section->node;
-
- spin_lock(&node->lock);
- list_add_tail(&epc_page->list, &node->free_page_list);
- spin_unlock(&node->lock);
- atomic_long_inc(&sgx_nr_free_pages);
+ sgx_free_epc_page(epc_page);
}
}
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 437d7c930c0b..75ffaef8c299 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -91,11 +91,9 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
struct fpu *fpu = &target->thread.fpu;
- struct user32_fxsr_struct newstate;
+ struct fxregs_state newstate;
int ret;
- BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state));
-
if (!cpu_feature_enabled(X86_FEATURE_FXSR))
return -ENODEV;
@@ -116,9 +114,10 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
/* Copy the state */
memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate));
- /* Clear xmm8..15 */
+ /* Clear xmm8..15 for 32-bit callers */
BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16);
- memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16);
+ if (in_ia32_syscall())
+ memset(&fpu->fpstate->regs.fxsave.xmm_space[8*4], 0, 8 * 16);
/* Mark FP and SSE as in use when XSAVE is enabled */
if (use_xsave())
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 02b3ddaf4f75..7c7824ae7862 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1558,7 +1558,10 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
fpregs_restore_userregs();
newfps->xfeatures = curfps->xfeatures | xfeatures;
- newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+
+ if (!guest_fpu)
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+
newfps->xfd = curfps->xfd & ~xfeatures;
/* Do the final updates within the locked region */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a438217cbfac..f734e3b0cfec 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -462,19 +462,22 @@ static bool pv_tlb_flush_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
+ (num_possible_cpus() != 1));
}
static bool pv_ipi_supported(void)
{
- return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
+ return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
+ (num_possible_cpus() != 1));
}
static bool pv_sched_yield_supported(void)
{
return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
- kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
+ (num_possible_cpus() != 1));
}
#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 6d2244c94799..8d2f2f995539 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1224,7 +1224,7 @@ static struct user_regset x86_64_regsets[] __ro_after_init = {
},
[REGSET_FP] = {
.core_note_type = NT_PRFPREG,
- .n = sizeof(struct user_i387_struct) / sizeof(long),
+ .n = sizeof(struct fxregs_state) / sizeof(long),
.size = sizeof(long), .align = sizeof(long),
.active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
@@ -1271,7 +1271,7 @@ static struct user_regset x86_32_regsets[] __ro_after_init = {
},
[REGSET_XFP] = {
.core_note_type = NT_PRXFPREG,
- .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
+ .n = sizeof(struct fxregs_state) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
.active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set
},
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 9ae64f9af956..9b9fb7882c20 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/dmi.h>
#include <linux/ioport.h>
#include <asm/e820/api.h>
@@ -24,31 +23,11 @@ static void resource_clip(struct resource *res, resource_size_t start,
res->start = end + 1;
}
-/*
- * Some BIOS-es contain a bug where they add addresses which map to
- * system RAM in the PCI host bridge window returned by the ACPI _CRS
- * method, see commit 4dc2287c1805 ("x86: avoid E820 regions when
- * allocating address space"). To avoid this Linux by default excludes
- * E820 reservations when allocating addresses since 2010.
- * In 2019 some systems have shown-up with E820 reservations which cover
- * the entire _CRS returned PCI host bridge window, causing all attempts
- * to assign memory to PCI BARs to fail if Linux uses E820 reservations.
- *
- * Ideally Linux would fully stop using E820 reservations, but then
- * the old systems this was added for will regress.
- * Instead keep the old behavior for old systems, while ignoring the
- * E820 reservations for any systems from now on.
- */
static void remove_e820_regions(struct resource *avail)
{
- int i, year = dmi_get_bios_year();
+ int i;
struct e820_entry *entry;
- if (year >= 2018)
- return;
-
- pr_info_once("PCI: Removing E820 reservations from host bridge windows\n");
-
for (i = 0; i < e820_table->nr_entries; i++) {
entry = &e820_table->entries[i];
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 3902c28fb6cb..b8f8d268d058 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -133,6 +133,7 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
orig = &vcpu->arch.cpuid_entries[i];
if (e2[i].function != orig->function ||
e2[i].index != orig->index ||
+ e2[i].flags != orig->flags ||
e2[i].eax != orig->eax || e2[i].ebx != orig->ebx ||
e2[i].ecx != orig->ecx || e2[i].edx != orig->edx)
return -EINVAL;
@@ -196,10 +197,26 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
vcpu->arch.pv_cpuid.features = best->eax;
}
+/*
+ * Calculate guest's supported XCR0 taking into account guest CPUID data and
+ * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0).
+ */
+static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = cpuid_entry2_find(entries, nent, 0xd, 0);
+ if (!best)
+ return 0;
+
+ return (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
+}
+
static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
int nent)
{
struct kvm_cpuid_entry2 *best;
+ u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent);
best = cpuid_entry2_find(entries, nent, 1, 0);
if (best) {
@@ -238,6 +255,21 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
vcpu->arch.ia32_misc_enable_msr &
MSR_IA32_MISC_ENABLE_MWAIT);
}
+
+ /*
+ * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
+ * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
+ * requested XCR0 value. The enclave's XFRM must be a subset of XCRO
+ * at the time of EENTER, thus adjust the allowed XFRM by the guest's
+ * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
+ * '1' even on CPUs that don't support XSAVE.
+ */
+ best = cpuid_entry2_find(entries, nent, 0x12, 0x1);
+ if (best) {
+ best->ecx &= guest_supported_xcr0 & 0xffffffff;
+ best->edx &= guest_supported_xcr0 >> 32;
+ best->ecx |= XFEATURE_MASK_FPSSE;
+ }
}
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
@@ -250,6 +282,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
+ u64 guest_supported_xcr0;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best && apic) {
@@ -261,27 +294,10 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_apic_set_version(vcpu);
}
- best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
- if (!best)
- vcpu->arch.guest_supported_xcr0 = 0;
- else
- vcpu->arch.guest_supported_xcr0 =
- (best->eax | ((u64)best->edx << 32)) & supported_xcr0;
+ guest_supported_xcr0 =
+ cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
- /*
- * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate
- * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's
- * requested XCR0 value. The enclave's XFRM must be a subset of XCRO
- * at the time of EENTER, thus adjust the allowed XFRM by the guest's
- * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to
- * '1' even on CPUs that don't support XSAVE.
- */
- best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1);
- if (best) {
- best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff;
- best->edx &= vcpu->arch.guest_supported_xcr0 >> 32;
- best->ecx |= XFEATURE_MASK_FPSSE;
- }
+ vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0;
kvm_update_pv_runtime(vcpu);
@@ -346,8 +362,14 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
* KVM_SET_CPUID{,2} again. To support this legacy behavior, check
* whether the supplied CPUID data is equal to what's already set.
*/
- if (vcpu->arch.last_vmentry_cpu != -1)
- return kvm_cpuid_check_equal(vcpu, e2, nent);
+ if (vcpu->arch.last_vmentry_cpu != -1) {
+ r = kvm_cpuid_check_equal(vcpu, e2, nent);
+ if (r)
+ return r;
+
+ kvfree(e2);
+ return 0;
+ }
r = kvm_check_cpuid(vcpu, e2, nent);
if (r)
@@ -535,12 +557,13 @@ void kvm_set_cpu_caps(void)
);
kvm_cpu_cap_mask(CPUID_7_0_EBX,
- F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | F(INVPCID) | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
- F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
- F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
- F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
- );
+ F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) |
+ F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) |
+ F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) |
+ F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) |
+ F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) |
+ F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) |
+ F(AVX512VL));
kvm_cpu_cap_mask(CPUID_7_ECX,
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
@@ -887,13 +910,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
break;
case 0xd: {
- u64 guest_perm = xstate_get_guest_group_perm();
+ u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm();
+ u64 permitted_xss = supported_xss;
- entry->eax &= supported_xcr0 & guest_perm;
- entry->ebx = xstate_required_size(supported_xcr0, false);
+ entry->eax &= permitted_xcr0;
+ entry->ebx = xstate_required_size(permitted_xcr0, false);
entry->ecx = entry->ebx;
- entry->edx &= (supported_xcr0 & guest_perm) >> 32;
- if (!supported_xcr0)
+ entry->edx &= permitted_xcr0 >> 32;
+ if (!permitted_xcr0)
break;
entry = do_host_cpuid(array, function, 1);
@@ -902,20 +926,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
cpuid_entry_override(entry, CPUID_D_1_EAX);
if (entry->eax & (F(XSAVES)|F(XSAVEC)))
- entry->ebx = xstate_required_size(supported_xcr0 | supported_xss,
+ entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss,
true);
else {
- WARN_ON_ONCE(supported_xss != 0);
+ WARN_ON_ONCE(permitted_xss != 0);
entry->ebx = 0;
}
- entry->ecx &= supported_xss;
- entry->edx &= supported_xss >> 32;
+ entry->ecx &= permitted_xss;
+ entry->edx &= permitted_xss >> 32;
for (i = 2; i < 64; ++i) {
bool s_state;
- if (supported_xcr0 & BIT_ULL(i))
+ if (permitted_xcr0 & BIT_ULL(i))
s_state = false;
- else if (supported_xss & BIT_ULL(i))
+ else if (permitted_xss & BIT_ULL(i))
s_state = true;
else
continue;
@@ -929,7 +953,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
* invalid sub-leafs. Only valid sub-leafs should
* reach this point, and they should have a non-zero
* save state size. Furthermore, check whether the
- * processor agrees with supported_xcr0/supported_xss
+ * processor agrees with permitted_xcr0/permitted_xss
* on whether this is an XCR0- or IA32_XSS-managed area.
*/
if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index baca9fa37a91..9322e6340a74 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1096,14 +1096,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
apic->regs + APIC_TMR);
}
- if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) {
- kvm_lapic_set_irr(vector, apic);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_vcpu_kick(vcpu);
- } else {
- trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
- trig_mode, vector);
- }
+ static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode,
+ trig_mode, vector);
break;
case APIC_DM_REMRD:
@@ -2312,7 +2306,12 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
apic->irr_pending = true;
apic->isr_count = 1;
} else {
- apic->irr_pending = (apic_search_irr(apic) != -1);
+ /*
+ * Don't clear irr_pending, searching the IRR can race with
+ * updates from the CPU as APICv is still active from hardware's
+ * perspective. The flag will be cleared as appropriate when
+ * KVM injects the interrupt.
+ */
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
}
}
@@ -2629,7 +2628,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
kvm_apic_set_version(vcpu);
apic_update_ppr(apic);
- hrtimer_cancel(&apic->lapic_timer.timer);
+ cancel_apic_timer(apic);
apic->lapic_timer.expired_tscdeadline = 0;
apic_update_lvtt(apic);
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 593093b52395..8e24f73bf60b 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3889,12 +3889,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
walk_shadow_page_lockless_end(vcpu);
}
+static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
+{
+ /* make sure the token value is not 0 */
+ u32 id = vcpu->arch.apf.id;
+
+ if (id << 12 == 0)
+ vcpu->arch.apf.id = 1;
+
+ return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+}
+
static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
gfn_t gfn)
{
struct kvm_arch_async_pf arch;
- arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
+ arch.token = alloc_apf_token(vcpu);
arch.gfn = gfn;
arch.direct_map = vcpu->arch.mmu->direct_map;
arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index f614f95acc6b..b1a02993782b 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -95,7 +95,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
}
static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
- unsigned config, bool exclude_user,
+ u64 config, bool exclude_user,
bool exclude_kernel, bool intr,
bool in_tx, bool in_tx_cp)
{
@@ -181,7 +181,8 @@ static int cmp_u64(const void *a, const void *b)
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
{
- unsigned config, type = PERF_TYPE_RAW;
+ u64 config;
+ u32 type = PERF_TYPE_RAW;
struct kvm *kvm = pmc->vcpu->kvm;
struct kvm_pmu_event_filter *filter;
bool allow_event = true;
@@ -220,7 +221,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
}
if (type == PERF_TYPE_RAW)
- config = eventsel & X86_RAW_EVENT_MASK;
+ config = eventsel & AMD64_RAW_EVENT_MASK;
if (pmc->current_config == eventsel && pmc_resume_counter(pmc))
return;
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 90364d02f22a..fb3e20791338 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -27,20 +27,6 @@
#include "irq.h"
#include "svm.h"
-#define SVM_AVIC_DOORBELL 0xc001011b
-
-#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
-
-/*
- * 0xff is broadcast, so the max index allowed for physical APIC ID
- * table is 0xfe. APIC IDs above 0xff are reserved.
- */
-#define AVIC_MAX_PHYSICAL_ID_COUNT 255
-
-#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
-#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
-#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
-
/* AVIC GATAG is encoded using VM and VCPU IDs */
#define AVIC_VCPU_ID_BITS 8
#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
@@ -73,12 +59,6 @@ struct amd_svm_iommu_ir {
void *data; /* Storing pointer to struct amd_ir_data */
};
-enum avic_ipi_failure_cause {
- AVIC_IPI_FAILURE_INVALID_INT_TYPE,
- AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
- AVIC_IPI_FAILURE_INVALID_TARGET,
- AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
-};
/* Note:
* This function is called from IOMMU driver to notify
@@ -289,6 +269,22 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
return 0;
}
+void avic_ring_doorbell(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Note, the vCPU could get migrated to a different pCPU at any point,
+ * which could result in signalling the wrong/previous pCPU. But if
+ * that happens the vCPU is guaranteed to do a VMRUN (after being
+ * migrated) and thus will process pending interrupts, i.e. a doorbell
+ * is not needed (and the spurious one is harmless).
+ */
+ int cpu = READ_ONCE(vcpu->cpu);
+
+ if (cpu != get_cpu())
+ wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
+ put_cpu();
+}
+
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh)
{
@@ -304,8 +300,13 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
kvm_for_each_vcpu(i, vcpu, kvm) {
if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
GET_APIC_DEST_FIELD(icrh),
- icrl & APIC_DEST_MASK))
- kvm_vcpu_wake_up(vcpu);
+ icrl & APIC_DEST_MASK)) {
+ vcpu->arch.apic->irr_pending = true;
+ svm_complete_interrupt_delivery(vcpu,
+ icrl & APIC_MODE_MASK,
+ icrl & APIC_INT_LEVELTRIG,
+ icrl & APIC_VECTOR_MASK);
+ }
}
}
@@ -345,8 +346,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
break;
case AVIC_IPI_FAILURE_INVALID_TARGET:
- WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
- index, vcpu->vcpu_id, icrh, icrl);
break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
@@ -669,52 +668,6 @@ void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
-{
- if (!vcpu->arch.apicv_active)
- return -1;
-
- kvm_lapic_set_irr(vec, vcpu->arch.apic);
-
- /*
- * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
- * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
- * the read of guest_mode, which guarantees that either VMRUN will see
- * and process the new vIRR entry, or that the below code will signal
- * the doorbell if the vCPU is already running in the guest.
- */
- smp_mb__after_atomic();
-
- /*
- * Signal the doorbell to tell hardware to inject the IRQ if the vCPU
- * is in the guest. If the vCPU is not in the guest, hardware will
- * automatically process AVIC interrupts at VMRUN.
- */
- if (vcpu->mode == IN_GUEST_MODE) {
- int cpu = READ_ONCE(vcpu->cpu);
-
- /*
- * Note, the vCPU could get migrated to a different pCPU at any
- * point, which could result in signalling the wrong/previous
- * pCPU. But if that happens the vCPU is guaranteed to do a
- * VMRUN (after being migrated) and thus will process pending
- * interrupts, i.e. a doorbell is not needed (and the spurious
- * one is harmless).
- */
- if (cpu != get_cpu())
- wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
- put_cpu();
- } else {
- /*
- * Wake the vCPU if it was blocking. KVM will then detect the
- * pending IRQ when checking if the vCPU has a wake event.
- */
- kvm_vcpu_wake_up(vcpu);
- }
-
- return 0;
-}
-
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
return false;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index cf206855ebf0..39d280e7e80e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -983,9 +983,9 @@ void svm_free_nested(struct vcpu_svm *svm)
/*
* Forcibly leave nested mode in order to be able to reset the VCPU later on.
*/
-void svm_leave_nested(struct vcpu_svm *svm)
+void svm_leave_nested(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vcpu_svm *svm = to_svm(vcpu);
if (is_guest_mode(vcpu)) {
svm->nested.nested_run_pending = 0;
@@ -1411,7 +1411,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
- svm_leave_nested(svm);
+ svm_leave_nested(vcpu);
svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
return 0;
}
@@ -1457,18 +1457,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
!__nested_vmcb_check_save(vcpu, &save_cached))
goto out_free;
- /*
- * While the nested guest CR3 is already checked and set by
- * KVM_SET_SREGS, it was set when nested state was yet loaded,
- * thus MMU might not be initialized correctly.
- * Set it again to fix this.
- */
-
- ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
- nested_npt_enabled(svm), false);
- if (WARN_ON_ONCE(ret))
- goto out_free;
-
/*
* All checks done, we can enter guest mode. Userspace provides
@@ -1478,7 +1466,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
*/
if (is_guest_mode(vcpu))
- svm_leave_nested(svm);
+ svm_leave_nested(vcpu);
else
svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
@@ -1494,6 +1482,20 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
svm_switch_vmcb(svm, &svm->nested.vmcb02);
nested_vmcb02_prepare_control(svm);
+
+ /*
+ * While the nested guest CR3 is already checked and set by
+ * KVM_SET_SREGS, it was set when nested state was yet loaded,
+ * thus MMU might not be initialized correctly.
+ * Set it again to fix this.
+ */
+
+ ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
+ nested_npt_enabled(svm), false);
+ if (WARN_ON_ONCE(ret))
+ goto out_free;
+
+
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
ret = 0;
out_free:
@@ -1532,6 +1534,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
}
struct kvm_x86_nested_ops svm_nested_ops = {
+ .leave_nested = svm_leave_nested,
.check_events = svm_check_nested_events,
.triple_fault = nested_svm_triple_fault,
.get_nested_state_pages = svm_get_nested_state_pages,
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6a22798eaaee..17b53457d866 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2100,8 +2100,13 @@ void __init sev_hardware_setup(void)
if (!sev_enabled || !npt_enabled)
goto out;
- /* Does the CPU support SEV? */
- if (!boot_cpu_has(X86_FEATURE_SEV))
+ /*
+ * SEV must obviously be supported in hardware. Sanity check that the
+ * CPU supports decode assists, which is mandatory for SEV guests to
+ * support instruction emulation.
+ */
+ if (!boot_cpu_has(X86_FEATURE_SEV) ||
+ WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)))
goto out;
/* Retrieve SEV CPUID information */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2c99b18d76c0..fd3a00c892c7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -290,7 +290,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) {
if (!(efer & EFER_SVME)) {
- svm_leave_nested(svm);
+ svm_leave_nested(vcpu);
svm_set_gif(svm, true);
/* #GP intercept is still needed for vmware backdoor */
if (!enable_vmware_backdoor)
@@ -312,7 +312,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
return ret;
}
- if (svm_gp_erratum_intercept)
+ /*
+ * Never intercept #GP for SEV guests, KVM can't
+ * decrypt guest memory to workaround the erratum.
+ */
+ if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm))
set_exception_intercept(svm, GP_VECTOR);
}
}
@@ -1010,9 +1014,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
* Guest access to VMware backdoor ports could legitimately
* trigger #GP because of TSS I/O permission bitmap.
* We intercept those #GP and allow access to them anyway
- * as VMware does.
+ * as VMware does. Don't intercept #GP for SEV guests as KVM can't
+ * decrypt guest memory to decode the faulting instruction.
*/
- if (enable_vmware_backdoor)
+ if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
set_exception_intercept(svm, GP_VECTOR);
svm_set_intercept(svm, INTERCEPT_INTR);
@@ -1580,6 +1585,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 hcr0 = cr0;
+ bool old_paging = is_paging(vcpu);
#ifdef CONFIG_X86_64
if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
@@ -1596,8 +1602,11 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
#endif
vcpu->arch.cr0 = cr0;
- if (!npt_enabled)
+ if (!npt_enabled) {
hcr0 |= X86_CR0_PG | X86_CR0_WP;
+ if (old_paging != is_paging(vcpu))
+ svm_set_cr4(vcpu, kvm_read_cr4(vcpu));
+ }
/*
* re-enable caching here because the QEMU bios
@@ -1641,8 +1650,12 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
svm_flush_tlb(vcpu);
vcpu->arch.cr4 = cr4;
- if (!npt_enabled)
+ if (!npt_enabled) {
cr4 |= X86_CR4_PAE;
+
+ if (!is_paging(vcpu))
+ cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+ }
cr4 |= host_cr4_mce;
to_svm(vcpu)->vmcb->save.cr4 = cr4;
vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
@@ -2091,10 +2104,6 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (error_code)
goto reinject;
- /* All SVM instructions expect page aligned RAX */
- if (svm->vmcb->save.rax & ~PAGE_MASK)
- goto reinject;
-
/* Decode the instruction for usage later */
if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK)
goto reinject;
@@ -2112,8 +2121,13 @@ static int gp_interception(struct kvm_vcpu *vcpu)
if (!is_guest_mode(vcpu))
return kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE);
- } else
+ } else {
+ /* All SVM instructions expect page aligned RAX */
+ if (svm->vmcb->save.rax & ~PAGE_MASK)
+ goto reinject;
+
return emulate_svm_instr(vcpu, opcode);
+ }
reinject:
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
@@ -2679,8 +2693,23 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u64 data = msr->data;
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
- if (!msr->host_initiated && !svm->tsc_scaling_enabled)
- return 1;
+
+ if (!svm->tsc_scaling_enabled) {
+
+ if (!msr->host_initiated)
+ return 1;
+ /*
+ * In case TSC scaling is not enabled, always
+ * leave this MSR at the default value.
+ *
+ * Due to bug in qemu 6.2.0, it would try to set
+ * this msr to 0 if tsc scaling is not enabled.
+ * Ignore this value as well.
+ */
+ if (data != 0 && data != svm->tsc_ratio_msr)
+ return 1;
+ break;
+ }
if (data & TSC_RATIO_RSVD)
return 1;
@@ -3285,6 +3314,55 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
}
+void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+ int trig_mode, int vector)
+{
+ /*
+ * vcpu->arch.apicv_active must be read after vcpu->mode.
+ * Pairs with smp_store_release in vcpu_enter_guest.
+ */
+ bool in_guest_mode = (smp_load_acquire(&vcpu->mode) == IN_GUEST_MODE);
+
+ if (!READ_ONCE(vcpu->arch.apicv_active)) {
+ /* Process the interrupt via inject_pending_event */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ return;
+ }
+
+ trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector);
+ if (in_guest_mode) {
+ /*
+ * Signal the doorbell to tell hardware to inject the IRQ. If
+ * the vCPU exits the guest before the doorbell chimes, hardware
+ * will automatically process AVIC interrupts at the next VMRUN.
+ */
+ avic_ring_doorbell(vcpu);
+ } else {
+ /*
+ * Wake the vCPU if it was blocking. KVM will then detect the
+ * pending IRQ when checking if the vCPU has a wake event.
+ */
+ kvm_vcpu_wake_up(vcpu);
+ }
+}
+
+static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector)
+{
+ kvm_lapic_set_irr(vector, apic);
+
+ /*
+ * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
+ * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
+ * the read of guest_mode. This guarantees that either VMRUN will see
+ * and process the new vIRR entry, or that svm_complete_interrupt_delivery
+ * will signal the doorbell if the CPU has already entered the guest.
+ */
+ smp_mb__after_atomic();
+ svm_complete_interrupt_delivery(apic->vcpu, delivery_mode, trig_mode, vector);
+}
+
static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3332,11 +3410,13 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (svm->nested.nested_run_pending)
return -EBUSY;
+ if (svm_nmi_blocked(vcpu))
+ return 0;
+
/* An NMI must not be injected into L2 if it's supposed to VM-Exit. */
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
return -EBUSY;
-
- return !svm_nmi_blocked(vcpu);
+ return 1;
}
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -3388,9 +3468,13 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
{
struct vcpu_svm *svm = to_svm(vcpu);
+
if (svm->nested.nested_run_pending)
return -EBUSY;
+ if (svm_interrupt_blocked(vcpu))
+ return 0;
+
/*
* An IRQ must not be injected into L2 if it's supposed to VM-Exit,
* e.g. if the IRQ arrived asynchronously after checking nested events.
@@ -3398,7 +3482,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
return -EBUSY;
- return !svm_interrupt_blocked(vcpu);
+ return 1;
}
static void svm_enable_irq_window(struct kvm_vcpu *vcpu)
@@ -3609,7 +3693,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long vmcb_pa = svm->current_vmcb->pa;
- kvm_guest_enter_irqoff();
+ guest_state_enter_irqoff();
if (sev_es_guest(vcpu->kvm)) {
__svm_sev_es_vcpu_run(vmcb_pa);
@@ -3629,7 +3713,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
vmload(__sme_page_pa(sd->save_area));
}
- kvm_guest_exit_irqoff();
+ guest_state_exit_irqoff();
}
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
@@ -4129,11 +4213,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
if (svm->nested.nested_run_pending)
return -EBUSY;
+ if (svm_smi_blocked(vcpu))
+ return 0;
+
/* An SMI must not be injected into L2 if it's supposed to VM-Exit. */
if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm))
return -EBUSY;
- return !svm_smi_blocked(vcpu);
+ return 1;
}
static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
@@ -4227,11 +4314,18 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
* Enter the nested guest now
*/
+ vmcb_mark_all_dirty(svm->vmcb01.ptr);
+
vmcb12 = map.hva;
nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+ if (ret)
+ goto unmap_save;
+
+ svm->nested.nested_run_pending = 1;
+
unmap_save:
kvm_vcpu_unmap(vcpu, &map_save, true);
unmap_map:
@@ -4252,79 +4346,140 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
}
}
-static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
{
bool smep, smap, is_user;
unsigned long cr4;
+ u64 error_code;
+
+ /* Emulation is always possible when KVM has access to all guest state. */
+ if (!sev_guest(vcpu->kvm))
+ return true;
+
+ /* #UD and #GP should never be intercepted for SEV guests. */
+ WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD |
+ EMULTYPE_TRAP_UD_FORCED |
+ EMULTYPE_VMWARE_GP));
/*
- * When the guest is an SEV-ES guest, emulation is not possible.
+ * Emulation is impossible for SEV-ES guests as KVM doesn't have access
+ * to guest register state.
*/
if (sev_es_guest(vcpu->kvm))
return false;
/*
+ * Emulation is possible if the instruction is already decoded, e.g.
+ * when completing I/O after returning from userspace.
+ */
+ if (emul_type & EMULTYPE_NO_DECODE)
+ return true;
+
+ /*
+ * Emulation is possible for SEV guests if and only if a prefilled
+ * buffer containing the bytes of the intercepted instruction is
+ * available. SEV guest memory is encrypted with a guest specific key
+ * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
+ * decode garbage.
+ *
+ * Inject #UD if KVM reached this point without an instruction buffer.
+ * In practice, this path should never be hit by a well-behaved guest,
+ * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
+ * is still theoretically reachable, e.g. via unaccelerated fault-like
+ * AVIC access, and needs to be handled by KVM to avoid putting the
+ * guest into an infinite loop. Injecting #UD is somewhat arbitrary,
+ * but its the least awful option given lack of insight into the guest.
+ */
+ if (unlikely(!insn)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return false;
+ }
+
+ /*
+ * Emulate for SEV guests if the insn buffer is not empty. The buffer
+ * will be empty if the DecodeAssist microcode cannot fetch bytes for
+ * the faulting instruction because the code fetch itself faulted, e.g.
+ * the guest attempted to fetch from emulated MMIO or a guest page
+ * table used to translate CS:RIP resides in emulated MMIO.
+ */
+ if (likely(insn_len))
+ return true;
+
+ /*
* Detect and workaround Errata 1096 Fam_17h_00_0Fh.
*
* Errata:
- * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
- * possible that CPU microcode implementing DecodeAssist will fail
- * to read bytes of instruction which caused #NPF. In this case,
- * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
- * return 0 instead of the correct guest instruction bytes.
- *
- * This happens because CPU microcode reading instruction bytes
- * uses a special opcode which attempts to read data using CPL=0
- * privileges. The microcode reads CS:RIP and if it hits a SMAP
- * fault, it gives up and returns no instruction bytes.
+ * When CPU raises #NPF on guest data access and vCPU CR4.SMAP=1, it is
+ * possible that CPU microcode implementing DecodeAssist will fail to
+ * read guest memory at CS:RIP and vmcb.GuestIntrBytes will incorrectly
+ * be '0'. This happens because microcode reads CS:RIP using a _data_
+ * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode
+ * gives up and does not fill the instruction bytes buffer.
*
- * Detection:
- * We reach here in case CPU supports DecodeAssist, raised #NPF and
- * returned 0 in GuestIntrBytes field of the VMCB.
- * First, errata can only be triggered in case vCPU CR4.SMAP=1.
- * Second, if vCPU CR4.SMEP=1, errata could only be triggered
- * in case vCPU CPL==3 (Because otherwise guest would have triggered
- * a SMEP fault instead of #NPF).
- * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
- * As most guests enable SMAP if they have also enabled SMEP, use above
- * logic in order to attempt minimize false-positive of detecting errata
- * while still preserving all cases semantic correctness.
+ * As above, KVM reaches this point iff the VM is an SEV guest, the CPU
+ * supports DecodeAssist, a #NPF was raised, KVM's page fault handler
+ * triggered emulation (e.g. for MMIO), and the CPU returned 0 in the
+ * GuestIntrBytes field of the VMCB.
*
- * Workaround:
- * To determine what instruction the guest was executing, the hypervisor
- * will have to decode the instruction at the instruction pointer.
+ * This does _not_ mean that the erratum has been encountered, as the
+ * DecodeAssist will also fail if the load for CS:RIP hits a legitimate
+ * #PF, e.g. if the guest attempt to execute from emulated MMIO and
+ * encountered a reserved/not-present #PF.
*
- * In non SEV guest, hypervisor will be able to read the guest
- * memory to decode the instruction pointer when insn_len is zero
- * so we return true to indicate that decoding is possible.
+ * To hit the erratum, the following conditions must be true:
+ * 1. CR4.SMAP=1 (obviously).
+ * 2. CR4.SMEP=0 || CPL=3. If SMEP=1 and CPL<3, the erratum cannot
+ * have been hit as the guest would have encountered a SMEP
+ * violation #PF, not a #NPF.
+ * 3. The #NPF is not due to a code fetch, in which case failure to
+ * retrieve the instruction bytes is legitimate (see abvoe).
*
- * But in the SEV guest, the guest memory is encrypted with the
- * guest specific key and hypervisor will not be able to decode the
- * instruction pointer so we will not able to workaround it. Lets
- * print the error and request to kill the guest.
- */
- if (likely(!insn || insn_len))
- return true;
-
- /*
- * If RIP is invalid, go ahead with emulation which will cause an
- * internal error exit.
+ * In addition, don't apply the erratum workaround if the #NPF occurred
+ * while translating guest page tables (see below).
*/
- if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
- return true;
+ error_code = to_svm(vcpu)->vmcb->control.exit_info_1;
+ if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
+ goto resume_guest;
cr4 = kvm_read_cr4(vcpu);
smep = cr4 & X86_CR4_SMEP;
smap = cr4 & X86_CR4_SMAP;
is_user = svm_get_cpl(vcpu) == 3;
if (smap && (!smep || is_user)) {
- if (!sev_guest(vcpu->kvm))
- return true;
-
pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+
+ /*
+ * If the fault occurred in userspace, arbitrarily inject #GP
+ * to avoid killing the guest and to hopefully avoid confusing
+ * the guest kernel too much, e.g. injecting #PF would not be
+ * coherent with respect to the guest's page tables. Request
+ * triple fault if the fault occurred in the kernel as there's
+ * no fault that KVM can inject without confusing the guest.
+ * In practice, the triple fault is moot as no sane SEV kernel
+ * will execute from user memory while also running with SMAP=1.
+ */
+ if (is_user)
+ kvm_inject_gp(vcpu, 0);
+ else
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
}
+resume_guest:
+ /*
+ * If the erratum was not hit, simply resume the guest and let it fault
+ * again. While awful, e.g. the vCPU may get stuck in an infinite loop
+ * if the fault is at CPL=0, it's the lesser of all evils. Exiting to
+ * userspace will kill the guest, and letting the emulator read garbage
+ * will yield random behavior and potentially corrupt the guest.
+ *
+ * Simply resuming the guest is technically not a violation of the SEV
+ * architecture. AMD's APM states that all code fetches and page table
+ * accesses for SEV guest are encrypted, regardless of the C-Bit. The
+ * APM also states that encrypted accesses to MMIO are "ignored", but
+ * doesn't explicitly define "ignored", i.e. doing nothing and letting
+ * the guest spin is technically "ignoring" the access.
+ */
return false;
}
@@ -4478,7 +4633,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.pmu_ops = &amd_pmu_ops,
.nested_ops = &svm_nested_ops,
- .deliver_posted_interrupt = svm_deliver_avic_intr,
+ .deliver_interrupt = svm_deliver_interrupt,
.dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
.update_pi_irte = svm_update_pi_irte,
.setup_mce = svm_setup_mce,
@@ -4555,6 +4710,7 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x80000001 and 0x8000000A (SVM features) */
if (nested) {
kvm_cpu_cap_set(X86_FEATURE_SVM);
+ kvm_cpu_cap_set(X86_FEATURE_VMCBCLEAN);
if (nrips)
kvm_cpu_cap_set(X86_FEATURE_NRIPS);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 47ef8f4a9358..fa98d6844728 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -304,11 +304,6 @@ static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
& ~VMCB_ALWAYS_DIRTY_MASK;
}
-static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit)
-{
- return (vmcb->control.clean & (1 << bit));
-}
-
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
{
vmcb->control.clean &= ~(1 << bit);
@@ -494,6 +489,8 @@ void svm_set_gif(struct vcpu_svm *svm, bool value);
int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code);
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
int read, int write);
+void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+ int trig_mode, int vec);
/* nested.c */
@@ -525,7 +522,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
-void svm_leave_nested(struct vcpu_svm *svm);
+void svm_leave_nested(struct kvm_vcpu *vcpu);
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
@@ -561,17 +558,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
/* avic.c */
-#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
-#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
-#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
-
-#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
-#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
-#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
-#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
-
-#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
-
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
@@ -588,12 +574,12 @@ bool svm_check_apicv_inhibit_reasons(ulong bit);
void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr);
-int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec);
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu);
int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
+void avic_ring_doorbell(struct kvm_vcpu *vcpu);
/* sev.c */
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index c53b8bf8d013..489ca56212c6 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -46,6 +46,9 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
if (npt_enabled &&
ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
hve->hv_enlightenments_control.enlightened_npt_tlb = 1;
+
+ if (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)
+ hve->hv_enlightenments_control.msr_bitmap = 1;
}
static inline void svm_hv_hardware_setup(void)
@@ -83,14 +86,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
struct hv_enlightenments *hve =
(struct hv_enlightenments *)vmcb->control.reserved_sw;
- /*
- * vmcb can be NULL if called during early vcpu init.
- * And its okay not to mark vmcb dirty during vcpu init
- * as we mark it dirty unconditionally towards end of vcpu
- * init phase.
- */
- if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
- hve->hv_enlightenments_control.msr_bitmap)
+ if (hve->hv_enlightenments_control.msr_bitmap)
vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
}
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 959b59d13b5a..3f430e218375 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -54,7 +54,6 @@ struct nested_vmx_msrs {
struct vmcs_config {
int size;
- int order;
u32 basic_cap;
u32 revision_id;
u32 pin_based_exec_ctrl;
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index ba6f99f584ac..87e3dc10edf4 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -12,8 +12,6 @@
DEFINE_STATIC_KEY_FALSE(enable_evmcs);
-#if IS_ENABLED(CONFIG_HYPERV)
-
#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
{EVMCS1_OFFSET(name), clean_field}
@@ -296,6 +294,7 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
};
const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
+#if IS_ENABLED(CONFIG_HYPERV)
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
{
vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
@@ -362,6 +361,7 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
case MSR_IA32_VMX_PROCBASED_CTLS2:
ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
break;
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
break;
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 16731d2cf231..8d70f9aea94b 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -59,12 +59,12 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
SECONDARY_EXEC_SHADOW_VMCS | \
SECONDARY_EXEC_TSC_SCALING | \
SECONDARY_EXEC_PAUSE_LOOP_EXITING)
-#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \
+ (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
-#if IS_ENABLED(CONFIG_HYPERV)
-
struct evmcs_field {
u16 offset;
u16 clean_field;
@@ -73,26 +73,56 @@ struct evmcs_field {
extern const struct evmcs_field vmcs_field_to_evmcs_1[];
extern const unsigned int nr_evmcs_1_fields;
-static __always_inline int get_evmcs_offset(unsigned long field,
- u16 *clean_field)
+static __always_inline int evmcs_field_offset(unsigned long field,
+ u16 *clean_field)
{
unsigned int index = ROL16(field, 6);
const struct evmcs_field *evmcs_field;
- if (unlikely(index >= nr_evmcs_1_fields)) {
- WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
- field);
+ if (unlikely(index >= nr_evmcs_1_fields))
return -ENOENT;
- }
evmcs_field = &vmcs_field_to_evmcs_1[index];
+ /*
+ * Use offset=0 to detect holes in eVMCS. This offset belongs to
+ * 'revision_id' but this field has no encoding and is supposed to
+ * be accessed directly.
+ */
+ if (unlikely(!evmcs_field->offset))
+ return -ENOENT;
+
if (clean_field)
*clean_field = evmcs_field->clean_field;
return evmcs_field->offset;
}
+static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs,
+ unsigned long field, u16 offset)
+{
+ /*
+ * vmcs12_read_any() doesn't care whether the supplied structure
+ * is 'struct vmcs12' or 'struct hv_enlightened_vmcs' as it takes
+ * the exact offset of the required field, use it for convenience
+ * here.
+ */
+ return vmcs12_read_any((void *)evmcs, field, offset);
+}
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+static __always_inline int get_evmcs_offset(unsigned long field,
+ u16 *clean_field)
+{
+ int offset = evmcs_field_offset(field, clean_field);
+
+ WARN_ONCE(offset < 0, "KVM: accessing unsupported EVMCS field %lx\n",
+ field);
+
+ return offset;
+}
+
static __always_inline void evmcs_write64(unsigned long field, u64 value)
{
u16 clean_field;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index f235f77cbc03..ba34e94049c7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -7,6 +7,7 @@
#include <asm/mmu_context.h>
#include "cpuid.h"
+#include "evmcs.h"
#include "hyperv.h"
#include "mmu.h"
#include "nested.h"
@@ -4851,18 +4852,20 @@ static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
/*
- * We should allocate a shadow vmcs for vmcs01 only when L1
- * executes VMXON and free it when L1 executes VMXOFF.
- * As it is invalid to execute VMXON twice, we shouldn't reach
- * here when vmcs01 already have an allocated shadow vmcs.
+ * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it
+ * when L1 executes VMXOFF or the vCPU is forced out of nested
+ * operation. VMXON faults if the CPU is already post-VMXON, so it
+ * should be impossible to already have an allocated shadow VMCS. KVM
+ * doesn't support virtualization of VMCS shadowing, so vmcs01 should
+ * always be the loaded VMCS.
*/
- WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
+ if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs))
+ return loaded_vmcs->shadow_vmcs;
+
+ loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
+ if (loaded_vmcs->shadow_vmcs)
+ vmcs_clear(loaded_vmcs->shadow_vmcs);
- if (!loaded_vmcs->shadow_vmcs) {
- loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
- if (loaded_vmcs->shadow_vmcs)
- vmcs_clear(loaded_vmcs->shadow_vmcs);
- }
return loaded_vmcs->shadow_vmcs;
}
@@ -5099,27 +5102,49 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- /*
- * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
- * any VMREAD sets the ALU flags for VMfailInvalid.
- */
- if (vmx->nested.current_vmptr == INVALID_GPA ||
- (is_guest_mode(vcpu) &&
- get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
- return nested_vmx_failInvalid(vcpu);
-
/* Decode instruction info and find the field to read */
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
- offset = vmcs_field_to_offset(field);
- if (offset < 0)
- return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+ if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+ /*
+ * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA,
+ * any VMREAD sets the ALU flags for VMfailInvalid.
+ */
+ if (vmx->nested.current_vmptr == INVALID_GPA ||
+ (is_guest_mode(vcpu) &&
+ get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA))
+ return nested_vmx_failInvalid(vcpu);
- if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
- copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
+ offset = get_vmcs12_field_offset(field);
+ if (offset < 0)
+ return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+
+ if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
+ copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
- /* Read the field, zero-extended to a u64 value */
- value = vmcs12_read_any(vmcs12, field, offset);
+ /* Read the field, zero-extended to a u64 value */
+ value = vmcs12_read_any(vmcs12, field, offset);
+ } else {
+ /*
+ * Hyper-V TLFS (as of 6.0b) explicitly states, that while an
+ * enlightened VMCS is active VMREAD/VMWRITE instructions are
+ * unsupported. Unfortunately, certain versions of Windows 11
+ * don't comply with this requirement which is not enforced in
+ * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a
+ * workaround, as misbehaving guests will panic on VM-Fail.
+ * Note, enlightened VMCS is incompatible with shadow VMCS so
+ * all VMREADs from L2 should go to L1.
+ */
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
+ return nested_vmx_failInvalid(vcpu);
+
+ offset = evmcs_field_offset(field, NULL);
+ if (offset < 0)
+ return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+
+ /* Read the field, zero-extended to a u64 value */
+ value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset);
+ }
/*
* Now copy part of this value to register or memory, as requested.
@@ -5214,7 +5239,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
- offset = vmcs_field_to_offset(field);
+ offset = get_vmcs12_field_offset(field);
if (offset < 0)
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
@@ -6462,7 +6487,7 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void)
max_idx = 0;
for (i = 0; i < nr_vmcs12_fields; i++) {
/* The vmcs12 table is very, very sparsely populated. */
- if (!vmcs_field_to_offset_table[i])
+ if (!vmcs12_field_offsets[i])
continue;
idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
@@ -6771,6 +6796,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
}
struct kvm_x86_nested_ops vmx_nested_ops = {
+ .leave_nested = vmx_leave_nested,
.check_events = vmx_check_nested_events,
.hv_timer_pending = nested_vmx_preemption_timer_pending,
.triple_fault = nested_vmx_triple_fault,
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index cab6ba7a5005..2251b60920f8 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -8,7 +8,7 @@
FIELD(number, name), \
[ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32)
-const unsigned short vmcs_field_to_offset_table[] = {
+const unsigned short vmcs12_field_offsets[] = {
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
FIELD(POSTED_INTR_NV, posted_intr_nv),
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
@@ -151,4 +151,4 @@ const unsigned short vmcs_field_to_offset_table[] = {
FIELD(HOST_RSP, host_rsp),
FIELD(HOST_RIP, host_rip),
};
-const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs_field_to_offset_table);
+const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets);
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 2a45f026ee11..746129ddd5ae 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -361,10 +361,10 @@ static inline void vmx_check_vmcs12_offsets(void)
CHECK_OFFSET(guest_pml_index, 996);
}
-extern const unsigned short vmcs_field_to_offset_table[];
+extern const unsigned short vmcs12_field_offsets[];
extern const unsigned int nr_vmcs12_fields;
-static inline short vmcs_field_to_offset(unsigned long field)
+static inline short get_vmcs12_field_offset(unsigned long field)
{
unsigned short offset;
unsigned int index;
@@ -377,7 +377,7 @@ static inline short vmcs_field_to_offset(unsigned long field)
return -ENOENT;
index = array_index_nospec(index, nr_vmcs12_fields);
- offset = vmcs_field_to_offset_table[index];
+ offset = vmcs12_field_offsets[index];
if (offset == 0)
return -ENOENT;
return offset;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4ac676066d60..efda5e4d6247 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1487,11 +1487,12 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
-static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len)
+static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
{
/*
* Emulation of instructions in SGX enclaves is impossible as RIP does
- * not point tthe failing instruction, and even if it did, the code
+ * not point at the failing instruction, and even if it did, the code
* stream is inaccessible. Inject #UD instead of exiting to userspace
* so that guest userspace can't DoS the guest simply by triggering
* emulation (enclaves are CPL3 only).
@@ -2603,7 +2604,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
return -EIO;
vmcs_conf->size = vmx_msr_high & 0x1fff;
- vmcs_conf->order = get_order(vmcs_conf->size);
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
vmcs_conf->revision_id = vmx_msr_low;
@@ -2628,7 +2628,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
struct page *pages;
struct vmcs *vmcs;
- pages = __alloc_pages_node(node, flags, vmcs_config.order);
+ pages = __alloc_pages_node(node, flags, 0);
if (!pages)
return NULL;
vmcs = page_address(pages);
@@ -2647,7 +2647,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
void free_vmcs(struct vmcs *vmcs)
{
- free_pages((unsigned long)vmcs, vmcs_config.order);
+ free_page((unsigned long)vmcs);
}
/*
@@ -4041,6 +4041,21 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
return 0;
}
+static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
+ int trig_mode, int vector)
+{
+ struct kvm_vcpu *vcpu = apic->vcpu;
+
+ if (vmx_deliver_posted_interrupt(vcpu, vector)) {
+ kvm_lapic_set_irr(vector, apic);
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ } else {
+ trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
+ trig_mode, vector);
+ }
+}
+
/*
* Set up the vmcs's constant host-state fields, i.e., host-state fields that
* will not change in the lifetime of the guest.
@@ -4094,10 +4109,14 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
/*
- * If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites
- * HOST_IA32_SYSENTER_ESP.
+ * SYSENTER is used for 32-bit system calls on either 32-bit or
+ * 64-bit kernels. It is always zero If neither is allowed, otherwise
+ * vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may
+ * have already done so!).
*/
- vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
+ if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32))
+ vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
+
rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
@@ -4901,8 +4920,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
dr6 = vmx_get_exit_qual(vcpu);
if (!(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ /*
+ * If the #DB was due to ICEBP, a.k.a. INT1, skip the
+ * instruction. ICEBP generates a trap-like #DB, but
+ * despite its interception control being tied to #DB,
+ * is an instruction intercept, i.e. the VM-Exit occurs
+ * on the ICEBP itself. Note, skipping ICEBP also
+ * clears STI and MOVSS blocking.
+ *
+ * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
+ * if single-step is enabled in RFLAGS and STI or MOVSS
+ * blocking is active, as the CPU doesn't set the bit
+ * on VM-Exit due to #DB interception. VM-Entry has a
+ * consistency check that a single-step #DB is pending
+ * in this scenario as the previous instruction cannot
+ * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV
+ * don't modify RFLAGS), therefore the one instruction
+ * delay when activating single-step breakpoints must
+ * have already expired. Note, the CPU sets/clears BS
+ * as appropriate for all other VM-Exits types.
+ */
if (is_icebp(intr_info))
WARN_ON(!skip_emulated_instruction(vcpu));
+ else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
return 1;
@@ -5397,7 +5441,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
gpa_t gpa;
- if (!vmx_can_emulate_instruction(vcpu, NULL, 0))
+ if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0))
return 1;
/*
@@ -6725,7 +6769,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
struct vcpu_vmx *vmx)
{
- kvm_guest_enter_irqoff();
+ guest_state_enter_irqoff();
/* L1D Flush includes CPU buffer clear to mitigate MDS */
if (static_branch_unlikely(&vmx_l1d_should_flush))
@@ -6741,7 +6785,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vcpu->arch.cr2 = native_read_cr2();
- kvm_guest_exit_irqoff();
+ guest_state_exit_irqoff();
}
static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
@@ -7615,6 +7659,7 @@ static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
if (ret)
return ret;
+ vmx->nested.nested_run_pending = 1;
vmx->nested.smm.guest_mode = false;
}
return 0;
@@ -7739,7 +7784,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hwapic_isr_update = vmx_hwapic_isr_update,
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
- .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
+ .deliver_interrupt = vmx_deliver_interrupt,
.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
.set_tss_addr = vmx_set_tss_addr,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9e43d756312f..82a9dcd8c67f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -90,6 +90,8 @@
u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
+#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
+
#define emul_to_vcpu(ctxt) \
((struct kvm_vcpu *)(ctxt)->vcpu)
@@ -982,6 +984,18 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
+static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.guest_fpu.fpstate->user_xfeatures;
+}
+
+#ifdef CONFIG_X86_64
+static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
+{
+ return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC;
+}
+#endif
+
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
u64 xcr0 = xcr;
@@ -1001,7 +1015,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
* saving. However, xcr0 bit 0 is always set, even if the
* emulated CPU does not support XSAVE (see kvm_vcpu_reset()).
*/
- valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
+ valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP;
if (xcr0 & ~valid_bits)
return 1;
@@ -2349,10 +2363,12 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
+#ifdef CONFIG_X86_64
static inline int gtod_is_based_on_tsc(int mode)
{
return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
}
+#endif
static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
{
@@ -3535,6 +3551,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~supported_xss)
return 1;
vcpu->arch.ia32_xss = data;
+ kvm_update_cpuid_runtime(vcpu);
break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
@@ -3703,8 +3720,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
return 1;
- if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
- vcpu->arch.guest_supported_xcr0))
+ if (data & ~kvm_guest_supported_xfd(vcpu))
return 1;
fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data);
@@ -3714,8 +3730,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
!guest_cpuid_has(vcpu, X86_FEATURE_XFD))
return 1;
- if (data & ~(XFEATURE_MASK_USER_DYNAMIC &
- vcpu->arch.guest_supported_xcr0))
+ if (data & ~kvm_guest_supported_xfd(vcpu))
return 1;
vcpu->arch.guest_fpu.xfd_err = data;
@@ -4229,6 +4244,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SREGS2:
case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
case KVM_CAP_VCPU_ATTRIBUTES:
+ case KVM_CAP_SYS_ATTRIBUTES:
+ case KVM_CAP_ENABLE_CAP:
r = 1;
break;
case KVM_CAP_EXIT_HYPERCALL:
@@ -4331,7 +4348,49 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
}
return r;
+}
+
+static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr)
+{
+ void __user *uaddr = (void __user*)(unsigned long)attr->addr;
+ if ((u64)(unsigned long)uaddr != attr->addr)
+ return ERR_PTR_USR(-EFAULT);
+ return uaddr;
+}
+
+static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
+{
+ u64 __user *uaddr = kvm_get_attr_addr(attr);
+
+ if (attr->group)
+ return -ENXIO;
+
+ if (IS_ERR(uaddr))
+ return PTR_ERR(uaddr);
+
+ switch (attr->attr) {
+ case KVM_X86_XCOMP_GUEST_SUPP:
+ if (put_user(supported_xcr0, uaddr))
+ return -EFAULT;
+ return 0;
+ default:
+ return -ENXIO;
+ break;
+ }
+}
+
+static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr)
+{
+ if (attr->group)
+ return -ENXIO;
+
+ switch (attr->attr) {
+ case KVM_X86_XCOMP_GUEST_SUPP:
+ return 0;
+ default:
+ return -ENXIO;
+ }
}
long kvm_arch_dev_ioctl(struct file *filp,
@@ -4422,6 +4481,22 @@ long kvm_arch_dev_ioctl(struct file *filp,
case KVM_GET_SUPPORTED_HV_CPUID:
r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
break;
+ case KVM_GET_DEVICE_ATTR: {
+ struct kvm_device_attr attr;
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_x86_dev_get_attr(&attr);
+ break;
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ struct kvm_device_attr attr;
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_x86_dev_has_attr(&attr);
+ break;
+ }
default:
r = -EINVAL;
break;
@@ -4860,8 +4935,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
- if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
+ if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+ kvm_x86_ops.nested_ops->leave_nested(vcpu);
kvm_smm_changed(vcpu, events->smi.smm);
+ }
vcpu->arch.smi_pending = events->smi.pending;
@@ -5022,11 +5099,11 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu,
static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
+ u64 __user *uaddr = kvm_get_attr_addr(attr);
int r;
- if ((u64)(unsigned long)uaddr != attr->addr)
- return -EFAULT;
+ if (IS_ERR(uaddr))
+ return PTR_ERR(uaddr);
switch (attr->attr) {
case KVM_VCPU_TSC_OFFSET:
@@ -5045,12 +5122,12 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr)
{
- u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
+ u64 __user *uaddr = kvm_get_attr_addr(attr);
struct kvm *kvm = vcpu->kvm;
int r;
- if ((u64)(unsigned long)uaddr != attr->addr)
- return -EFAULT;
+ if (IS_ERR(uaddr))
+ return PTR_ERR(uaddr);
switch (attr->attr) {
case KVM_VCPU_TSC_OFFSET: {
@@ -6810,6 +6887,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
}
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
+static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len)
+{
+ return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type,
+ insn, insn_len);
+}
+
int handle_ud(struct kvm_vcpu *vcpu)
{
static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
@@ -6817,7 +6901,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e;
- if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0)))
+ if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0)))
return 1;
if (force_emulation_prefix &&
@@ -8193,7 +8277,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
bool writeback = true;
bool write_fault_to_spt;
- if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len)))
+ if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len)))
return 1;
vcpu->arch.l1tf_flush_l1d = true;
@@ -8871,6 +8955,13 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
return -KVM_EOPNOTSUPP;
+ /*
+ * When tsc is in permanent catchup mode guests won't be able to use
+ * pvclock_read_retry loop to get consistent view of pvclock
+ */
+ if (vcpu->arch.tsc_always_catchup)
+ return -KVM_EOPNOTSUPP;
+
if (!kvm_get_walltime_and_clockread(&ts, &cycle))
return -KVM_EOPNOTSUPP;
@@ -9706,7 +9797,7 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
}
-void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
return;
@@ -9912,7 +10003,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* result in virtual interrupt delivery.
*/
local_irq_disable();
- vcpu->mode = IN_GUEST_MODE;
+
+ /* Store vcpu->apicv_active before vcpu->mode. */
+ smp_store_release(&vcpu->mode, IN_GUEST_MODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -9972,6 +10065,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(0, 7);
}
+ guest_timing_enter_irqoff();
+
for (;;) {
/*
* Assert that vCPU vs. VM APICv state is consistent. An APICv
@@ -10056,7 +10151,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* of accounting via context tracking, but the loss of accuracy is
* acceptable for all known use cases.
*/
- vtime_account_guest_exit();
+ guest_timing_exit_irqoff();
if (lapic_in_kernel(vcpu)) {
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
@@ -11209,7 +11304,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.msr_misc_features_enables = 0;
- vcpu->arch.xcr0 = XFEATURE_MASK_FP;
+ __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
+ __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
}
/* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */
@@ -11226,8 +11322,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
- vcpu->arch.ia32_xss = 0;
-
static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
@@ -11571,8 +11665,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
kvm_free_pit(kvm);
}
-#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
-
/**
* __x86_set_memory_region: Setup KVM internal memory slot
*
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 635b75f9e145..767ec7f99516 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -10,51 +10,6 @@
void kvm_spurious_fault(void);
-static __always_inline void kvm_guest_enter_irqoff(void)
-{
- /*
- * VMENTER enables interrupts (host state), but the kernel state is
- * interrupts disabled when this is invoked. Also tell RCU about
- * it. This is the same logic as for exit_to_user_mode().
- *
- * This ensures that e.g. latency analysis on the host observes
- * guest mode as interrupt enabled.
- *
- * guest_enter_irqoff() informs context tracking about the
- * transition to guest mode and if enabled adjusts RCU state
- * accordingly.
- */
- instrumentation_begin();
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- instrumentation_end();
-
- guest_enter_irqoff();
- lockdep_hardirqs_on(CALLER_ADDR0);
-}
-
-static __always_inline void kvm_guest_exit_irqoff(void)
-{
- /*
- * VMEXIT disables interrupts (host state), but tracing and lockdep
- * have them in state 'on' as recorded before entering guest mode.
- * Same as enter_from_user_mode().
- *
- * context_tracking_guest_exit() restores host context and reinstates
- * RCU if enabled and required.
- *
- * This needs to be done immediately after VM-Exit, before any code
- * that might contain tracepoints or call out to the greater world,
- * e.g. before x86_spec_ctrl_restore_host().
- */
- lockdep_hardirqs_off(CALLER_ADDR0);
- context_tracking_guest_exit();
-
- instrumentation_begin();
- trace_hardirqs_off_finish();
- instrumentation_end();
-}
-
#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \
({ \
bool failed = (consistency_check); \
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 0e3f7d6e9fd7..74be1fda58e3 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -133,32 +133,57 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
{
struct kvm_vcpu_xen *vx = &v->arch.xen;
+ struct gfn_to_hva_cache *ghc = &vx->runstate_cache;
+ struct kvm_memslots *slots = kvm_memslots(v->kvm);
+ bool atomic = (state == RUNSTATE_runnable);
uint64_t state_entry_time;
- unsigned int offset;
+ int __user *user_state;
+ uint64_t __user *user_times;
kvm_xen_update_runstate(v, state);
if (!vx->runstate_set)
return;
- BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+ if (unlikely(slots->generation != ghc->generation || kvm_is_error_hva(ghc->hva)) &&
+ kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len))
+ return;
+
+ /* We made sure it fits in a single page */
+ BUG_ON(!ghc->memslot);
+
+ if (atomic)
+ pagefault_disable();
- offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
-#ifdef CONFIG_X86_64
/*
- * The only difference is alignment of uint64_t in 32-bit.
- * So the first field 'state' is accessed directly using
- * offsetof() (where its offset happens to be zero), while the
- * remaining fields which are all uint64_t, start at 'offset'
- * which we tweak here by adding 4.
+ * The only difference between 32-bit and 64-bit versions of the
+ * runstate struct us the alignment of uint64_t in 32-bit, which
+ * means that the 64-bit version has an additional 4 bytes of
+ * padding after the first field 'state'.
+ *
+ * So we use 'int __user *user_state' to point to the state field,
+ * and 'uint64_t __user *user_times' for runstate_entry_time. So
+ * the actual array of time[] in each state starts at user_times[1].
*/
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
+ BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
+ user_state = (int __user *)ghc->hva;
+
+ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+ user_times = (uint64_t __user *)(ghc->hva +
+ offsetof(struct compat_vcpu_runstate_info,
+ state_entry_time));
+#ifdef CONFIG_X86_64
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
offsetof(struct compat_vcpu_runstate_info, time) + 4);
if (v->kvm->arch.xen.long_mode)
- offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+ user_times = (uint64_t __user *)(ghc->hva +
+ offsetof(struct vcpu_runstate_info,
+ state_entry_time));
#endif
/*
* First write the updated state_entry_time at the appropriate
@@ -172,10 +197,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
sizeof(state_entry_time));
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &state_entry_time, offset,
- sizeof(state_entry_time)))
- return;
+ if (__put_user(state_entry_time, user_times))
+ goto out;
smp_wmb();
/*
@@ -189,11 +212,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
sizeof(vx->current_runstate));
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &vx->current_runstate,
- offsetof(struct vcpu_runstate_info, state),
- sizeof(vx->current_runstate)))
- return;
+ if (__put_user(vx->current_runstate, user_state))
+ goto out;
/*
* Write the actual runstate times immediately after the
@@ -208,24 +228,23 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
sizeof(vx->runstate_times));
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &vx->runstate_times[0],
- offset + sizeof(u64),
- sizeof(vx->runstate_times)))
- return;
-
+ if (__copy_to_user(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)))
+ goto out;
smp_wmb();
/*
* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
* runstate_entry_time field.
*/
-
state_entry_time &= ~XEN_RUNSTATE_UPDATE;
- if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
- &state_entry_time, offset,
- sizeof(state_entry_time)))
- return;
+ __put_user(state_entry_time, user_times);
+ smp_wmb();
+
+ out:
+ mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
+
+ if (atomic)
+ pagefault_enable();
}
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
@@ -316,10 +335,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
"\tnotq %0\n"
"\t" LOCK_PREFIX "andq %0, %2\n"
"2:\n"
- "\t.section .fixup,\"ax\"\n"
- "3:\tjmp\t2b\n"
- "\t.previous\n"
- _ASM_EXTABLE_UA(1b, 3b)
+ _ASM_EXTABLE_UA(1b, 2b)
: "=r" (evtchn_pending_sel),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
@@ -335,10 +351,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
"\tnotl %0\n"
"\t" LOCK_PREFIX "andl %0, %2\n"
"2:\n"
- "\t.section .fixup,\"ax\"\n"
- "3:\tjmp\t2b\n"
- "\t.previous\n"
- _ASM_EXTABLE_UA(1b, 3b)
+ _ASM_EXTABLE_UA(1b, 2b)
: "=r" (evtchn_pending_sel32),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
@@ -449,6 +462,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
+ /* It must fit within a single page */
+ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) {
+ r = -EINVAL;
+ break;
+ }
+
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache,
data->u.gpa,
@@ -466,6 +485,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
+ /* It must fit within a single page */
+ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct pvclock_vcpu_time_info) > PAGE_SIZE) {
+ r = -EINVAL;
+ break;
+ }
+
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_time_info_cache,
data->u.gpa,
@@ -487,6 +512,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
+ /* It must fit within a single page */
+ if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_runstate_info) > PAGE_SIZE) {
+ r = -EINVAL;
+ break;
+ }
+
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.runstate_cache,
data->u.gpa,
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2edd86649468..615a76d70019 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -353,8 +353,8 @@ static void pci_fixup_video(struct pci_dev *pdev)
}
}
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
static const struct dmi_system_id msi_k8t_dmi_table[] = {
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 42300941ec29..517a9d8d8f94 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -9,6 +9,7 @@
#include <xen/events.h>
#include <xen/interface/memory.h>
+#include <asm/apic.h>
#include <asm/cpu.h>
#include <asm/smp.h>
#include <asm/io_apic.h>
@@ -184,8 +185,7 @@ static int xen_cpu_dead_hvm(unsigned int cpu)
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_teardown_timer(cpu);
-
- return 0;
+ return 0;
}
static bool no_vector_callback __initdata;
@@ -242,15 +242,14 @@ static __init int xen_parse_no_vector_callback(char *arg)
}
early_param("xen_no_vector_callback", xen_parse_no_vector_callback);
-bool __init xen_hvm_need_lapic(void)
+static __init bool xen_x2apic_available(void)
{
- if (xen_pv_domain())
- return false;
- if (!xen_hvm_domain())
- return false;
- if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
- return false;
- return true;
+ return x2apic_supported();
+}
+
+static bool __init msi_ext_dest_id(void)
+{
+ return cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_EXT_DEST_ID;
}
static __init void xen_hvm_guest_late_init(void)
@@ -312,9 +311,10 @@ struct hypervisor_x86 x86_hyper_xen_hvm __initdata = {
.detect = xen_platform_hvm,
.type = X86_HYPER_XEN_HVM,
.init.init_platform = xen_hvm_guest_init,
- .init.x2apic_available = xen_x2apic_para_available,
+ .init.x2apic_available = xen_x2apic_available,
.init.init_mem_mapping = xen_hvm_init_mem_mapping,
.init.guest_late_init = xen_hvm_guest_late_init,
+ .init.msi_ext_dest_id = msi_ext_dest_id,
.runtime.pin_vcpu = xen_pin_vcpu,
.ignore_nopv = true,
};
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5004feb16783..d47c3d176ae4 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1341,10 +1341,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_acpi_sleep_register();
- /* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
-
xen_boot_params_init_edd();
#ifdef CONFIG_ACPI
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 6a8f3b53ab83..4a6019238ee7 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -148,28 +148,12 @@ int xen_smp_intr_init_pv(unsigned int cpu)
return rc;
}
-static void __init xen_fill_possible_map(void)
-{
- int i, rc;
-
- if (xen_initial_domain())
- return;
-
- for (i = 0; i < nr_cpu_ids; i++) {
- rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
- if (rc >= 0) {
- num_processors++;
- set_cpu_possible(i, true);
- }
- }
-}
-
-static void __init xen_filter_cpu_maps(void)
+static void __init _get_smp_config(unsigned int early)
{
int i, rc;
unsigned int subtract = 0;
- if (!xen_initial_domain())
+ if (early)
return;
num_processors = 0;
@@ -210,7 +194,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
* sure the old memory can be recycled. */
make_lowmem_page_readwrite(xen_initial_gdt);
- xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
/*
@@ -476,5 +459,8 @@ static const struct smp_ops xen_smp_ops __initconst = {
void __init xen_smp_init(void)
{
smp_ops = xen_smp_ops;
- xen_fill_possible_map();
+
+ /* Avoid searching for BIOS MP tables */
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.mpparse.get_smp_config = _get_smp_config;
}
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index 31b1e3477cb6..14ea32e734d5 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -57,6 +57,14 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+ if (size >= offsetof(struct dom0_vga_console_info,
+ u.vesa_lfb.ext_lfb_base)
+ + sizeof(info->u.vesa_lfb.ext_lfb_base)
+ && info->u.vesa_lfb.ext_lfb_base) {
+ screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base;
+ screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ }
+
if (info->video_type == XEN_VGATYPE_EFI_LFB) {
screen_info->orig_video_isVGA = VIDEO_TYPE_EFI;
break;
@@ -66,14 +74,6 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
u.vesa_lfb.mode_attrs)
+ sizeof(info->u.vesa_lfb.mode_attrs))
screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
-
- if (size >= offsetof(struct dom0_vga_console_info,
- u.vesa_lfb.ext_lfb_base)
- + sizeof(info->u.vesa_lfb.ext_lfb_base)
- && info->u.vesa_lfb.ext_lfb_base) {
- screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base;
- screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
- }
break;
}
}