summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/.gitignore1
-rw-r--r--arch/x86/Kconfig77
-rw-r--r--arch/x86/Kconfig.cpu4
-rw-r--r--arch/x86/boot/compressed/eboot.c4
-rw-r--r--arch/x86/entry/entry_64.S2
-rw-r--r--arch/x86/events/core.c3
-rw-r--r--arch/x86/events/intel/core.c14
-rw-r--r--arch/x86/events/intel/ds.c6
-rw-r--r--arch/x86/events/intel/uncore_snbep.c34
-rw-r--r--arch/x86/events/perf_event.h6
-rw-r--r--arch/x86/include/asm/bitops.h29
-rw-r--r--arch/x86/include/asm/cpufeature.h79
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/include/asm/nospec-branch.h37
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/refcount.h2
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h18
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/include/uapi/asm/mce.h1
-rw-r--r--arch/x86/kernel/apic/vector.c25
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c15
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c45
-rw-r--r--arch/x86/kernel/kvm.c20
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/signal_compat.c65
-rw-r--r--arch/x86/kernel/smpboot.c12
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/lapic.c11
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/svm.c81
-rw-r--r--arch/x86/kvm/vmx.c26
-rw-r--r--arch/x86/kvm/x86.c107
-rw-r--r--arch/x86/lib/error-inject.c1
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/init_64.c64
-rw-r--r--arch/x86/mm/pgtable.c48
-rw-r--r--arch/x86/net/bpf_jit_comp.c12
-rw-r--r--arch/x86/oprofile/nmi_int.c2
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c2
-rw-r--r--arch/x86/tools/relocs.c3
-rw-r--r--arch/x86/xen/enlighten_pv.c6
-rw-r--r--arch/x86/xen/smp.c2
48 files changed, 677 insertions, 235 deletions
diff --git a/arch/x86/.gitignore b/arch/x86/.gitignore
index aff152c87cf4..5a82bac5e0bc 100644
--- a/arch/x86/.gitignore
+++ b/arch/x86/.gitignore
@@ -1,6 +1,7 @@
boot/compressed/vmlinux
tools/test_get_len
tools/insn_sanity
+tools/insn_decoder_test
purgatory/kexec-purgatory.c
purgatory/purgatory.ro
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 09c599e0900d..0fa71a78ec99 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -423,12 +423,6 @@ config X86_MPPARSE
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
-config X86_BIGSMP
- bool "Support for big SMP systems with more than 8 CPUs"
- depends on X86_32 && SMP
- ---help---
- This option is needed for the systems that have more than 8 CPUs
-
config GOLDFISH
def_bool y
depends on X86_GOLDFISH
@@ -461,6 +455,12 @@ config INTEL_RDT
Say N if unsure.
if X86_32
+config X86_BIGSMP
+ bool "Support for big SMP systems with more than 8 CPUs"
+ depends on SMP
+ ---help---
+ This option is needed for the systems that have more than 8 CPUs
+
config X86_EXTENDED_PLATFORM
bool "Support for extended (non-PC) x86 platforms"
default y
@@ -950,25 +950,66 @@ config MAXSMP
Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+# interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+ int
+ default NR_CPUS_RANGE_END if MAXSMP
+ default 1 if !SMP
+ default 2
+
+config NR_CPUS_RANGE_END
+ int
+ depends on X86_32
+ default 64 if SMP && X86_BIGSMP
+ default 8 if SMP && !X86_BIGSMP
+ default 1 if !SMP
+
+config NR_CPUS_RANGE_END
+ int
+ depends on X86_64
+ default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK)
+ default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+ default 1 if !SMP
+
+config NR_CPUS_DEFAULT
+ int
+ depends on X86_32
+ default 32 if X86_BIGSMP
+ default 8 if SMP
+ default 1 if !SMP
+
+config NR_CPUS_DEFAULT
+ int
+ depends on X86_64
+ default 8192 if MAXSMP
+ default 64 if SMP
+ default 1 if !SMP
+
config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
- range 2 8 if SMP && X86_32 && !X86_BIGSMP
- range 2 64 if SMP && X86_32 && X86_BIGSMP
- range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
- range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
- default "1" if !SMP
- default "8192" if MAXSMP
- default "32" if SMP && X86_BIGSMP
- default "8" if SMP && X86_32
- default "64" if SMP
+ range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+ default NR_CPUS_DEFAULT
---help---
This allows you to specify the maximum number of CPUs which this
kernel will support. If CPUMASK_OFFSTACK is enabled, the maximum
supported value is 8192, otherwise the maximum value is 512. The
minimum value which makes sense is 2.
- This is purely to save memory - each supported CPU adds
- approximately eight kilobytes to the kernel image.
+ This is purely to save memory: each supported CPU adds about 8KB
+ to the kernel image.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
@@ -1364,7 +1405,7 @@ config HIGHMEM4G
config HIGHMEM64G
bool "64GB"
- depends on !M486
+ depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
select X86_PAE
---help---
Select this if you have a 32-bit processor and more than 4
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index f0c5ef578153..638411f22267 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -361,7 +361,7 @@ config X86_TSC
config X86_CMPXCHG64
def_bool y
- depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
+ depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
# this should be set for all -march=.. options where the compiler
# generates cmov.
@@ -372,7 +372,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && X86_P6_NOP
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
default "5" if X86_32 && X86_CMPXCHG64
default "4"
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 353e20c3f114..886a9115af62 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -439,7 +439,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
struct efi_uga_draw_protocol *uga = NULL, *first_uga;
efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
unsigned long nr_ugas;
- u32 *handles = (u32 *)uga_handle;;
+ u32 *handles = (u32 *)uga_handle;
efi_status_t status = EFI_INVALID_PARAMETER;
int i;
@@ -484,7 +484,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
struct efi_uga_draw_protocol *uga = NULL, *first_uga;
efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
unsigned long nr_ugas;
- u64 *handles = (u64 *)uga_handle;;
+ u64 *handles = (u64 *)uga_handle;
efi_status_t status = EFI_INVALID_PARAMETER;
int i;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9b114675fbc0..18ed349b4f83 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -55,7 +55,7 @@ END(native_usergs_sysret64)
.macro TRACE_IRQS_FLAGS flags:req
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9, \flags /* interrupts off? */
+ btl $9, \flags /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 140d33288e78..88797c80b3e0 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2118,7 +2118,8 @@ static int x86_pmu_event_init(struct perf_event *event)
event->destroy(event);
}
- if (READ_ONCE(x86_pmu.attr_rdpmc))
+ if (READ_ONCE(x86_pmu.attr_rdpmc) &&
+ !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
return err;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 56457cb73448..1e41d7508d99 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2952,9 +2952,9 @@ static void intel_pebs_aliases_skl(struct perf_event *event)
return intel_pebs_aliases_precdist(event);
}
-static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
{
- unsigned long flags = x86_pmu.free_running_flags;
+ unsigned long flags = x86_pmu.large_pebs_flags;
if (event->attr.use_clockid)
flags &= ~PERF_SAMPLE_TIME;
@@ -2976,8 +2976,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (!event->attr.freq) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type &
- ~intel_pmu_free_running_flags(event)))
- event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+ ~intel_pmu_large_pebs_flags(event)))
+ event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
}
if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event);
@@ -3194,7 +3194,7 @@ static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
X86_CONFIG(.event=0xc0, .umask=0x01)) {
if (left < 128)
left = 128;
- left &= ~0x3fu;
+ left &= ~0x3fULL;
}
return left;
}
@@ -3460,7 +3460,7 @@ static __initconst const struct x86_pmu core_pmu = {
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
- .free_running_flags = PEBS_FREERUNNING_FLAGS,
+ .large_pebs_flags = LARGE_PEBS_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32-bit width,
@@ -3502,7 +3502,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
- .free_running_flags = PEBS_FREERUNNING_FLAGS,
+ .large_pebs_flags = LARGE_PEBS_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 18c25ab28557..d8015235ba76 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -935,7 +935,7 @@ void intel_pmu_pebs_add(struct perf_event *event)
bool needed_cb = pebs_needs_sched_cb(cpuc);
cpuc->n_pebs++;
- if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+ if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs++;
pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
@@ -975,7 +975,7 @@ void intel_pmu_pebs_del(struct perf_event *event)
bool needed_cb = pebs_needs_sched_cb(cpuc);
cpuc->n_pebs--;
- if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+ if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs--;
pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
@@ -1530,7 +1530,7 @@ void __init intel_ds_init(void)
x86_pmu.pebs_record_size =
sizeof(struct pebs_record_skl);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
- x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
+ x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
break;
default:
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 6d8044ab1060..c98b943e58b4 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3343,6 +3343,7 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = {
SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4),
SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8),
SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x38, 0xff, 0x3),
EVENT_EXTRA_END
};
@@ -3562,24 +3563,27 @@ static struct intel_uncore_type *skx_msr_uncores[] = {
NULL,
};
+/*
+ * To determine the number of CHAs, it should read bits 27:0 in the CAPID6
+ * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083.
+ */
+#define SKX_CAPID6 0x9c
+#define SKX_CHA_BIT_MASK GENMASK(27, 0)
+
static int skx_count_chabox(void)
{
- struct pci_dev *chabox_dev = NULL;
- int bus, count = 0;
+ struct pci_dev *dev = NULL;
+ u32 val = 0;
- while (1) {
- chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev);
- if (!chabox_dev)
- break;
- if (count == 0)
- bus = chabox_dev->bus->number;
- if (bus != chabox_dev->bus->number)
- break;
- count++;
- }
+ dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev);
+ if (!dev)
+ goto out;
- pci_dev_put(chabox_dev);
- return count;
+ pci_read_config_dword(dev, SKX_CAPID6, &val);
+ val &= SKX_CHA_BIT_MASK;
+out:
+ pci_dev_put(dev);
+ return hweight32(val);
}
void skx_uncore_cpu_init(void)
@@ -3606,7 +3610,7 @@ static struct intel_uncore_type skx_uncore_imc = {
};
static struct attribute *skx_upi_uncore_formats_attr[] = {
- &format_attr_event_ext.attr,
+ &format_attr_event.attr,
&format_attr_umask_ext.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78f91ec1056e..39cd0615f04f 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -69,7 +69,7 @@ struct event_constraint {
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */
+#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */
struct amd_nb {
@@ -88,7 +88,7 @@ struct amd_nb {
* REGS_USER can be handled for events limited to ring 3.
*
*/
-#define PEBS_FREERUNNING_FLAGS \
+#define LARGE_PEBS_FLAGS \
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
@@ -608,7 +608,7 @@ struct x86_pmu {
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
int max_pebs_events;
- unsigned long free_running_flags;
+ unsigned long large_pebs_flags;
/*
* Intel LBR
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 3fa039855b8f..9f645ba57dbb 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -78,7 +78,7 @@ set_bit(long nr, volatile unsigned long *addr)
: "iq" ((u8)CONST_MASK(nr))
: "memory");
} else {
- asm volatile(LOCK_PREFIX "bts %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
}
}
@@ -94,7 +94,7 @@ set_bit(long nr, volatile unsigned long *addr)
*/
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
+ asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
}
/**
@@ -115,7 +115,7 @@ clear_bit(long nr, volatile unsigned long *addr)
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)~CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX "btr %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
: BITOP_ADDR(addr)
: "Ir" (nr));
}
@@ -137,7 +137,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
}
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -182,7 +182,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
*/
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
}
/**
@@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX "btc %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
: BITOP_ADDR(addr)
: "Ir" (nr));
}
@@ -217,7 +217,8 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
*/
static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts),
+ *addr, "Ir", nr, "%0", c);
}
/**
@@ -246,7 +247,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
{
bool oldbit;
- asm("bts %2,%1"
+ asm(__ASM_SIZE(bts) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -263,7 +264,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
*/
static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr),
+ *addr, "Ir", nr, "%0", c);
}
/**
@@ -286,7 +288,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
{
bool oldbit;
- asm volatile("btr %2,%1"
+ asm volatile(__ASM_SIZE(btr) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -298,7 +300,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
{
bool oldbit;
- asm volatile("btc %2,%1"
+ asm volatile(__ASM_SIZE(btc) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr) : "memory");
@@ -316,7 +318,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
*/
static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc),
+ *addr, "Ir", nr, "%0", c);
}
static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
@@ -329,7 +332,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
{
bool oldbit;
- asm volatile("bt %2,%1"
+ asm volatile(__ASM_SIZE(bt) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 70eddb3922ff..736771c9822e 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline __pure bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
- "2:\n"
- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
- "((5f-4f) - (2b-1b)),0x90\n"
- "3:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 4f - .\n" /* repl offset */
- " .word %P1\n" /* always replace */
- " .byte 3b - 1b\n" /* src len */
- " .byte 5f - 4f\n" /* repl len */
- " .byte 3b - 2b\n" /* pad len */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "4: jmp %l[t_no]\n"
- "5:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 0\n" /* no replacement */
- " .word %P0\n" /* feature bit */
- " .byte 3b - 1b\n" /* src len */
- " .byte 0\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .altinstr_aux,\"ax\"\n"
- "6:\n"
- " testb %[bitnum],%[cap_byte]\n"
- " jnz %l[t_yes]\n"
- " jmp %l[t_no]\n"
- ".previous\n"
- : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
- [bitnum] "i" (1 << (bit & 7)),
- [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
- : : t_yes, t_no);
- t_yes:
- return true;
- t_no:
- return false;
+ asm_volatile_goto("1: jmp 6f\n"
+ "2:\n"
+ ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+ "((5f-4f) - (2b-1b)),0x90\n"
+ "3:\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 4f - .\n" /* repl offset */
+ " .word %P[always]\n" /* always replace */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 5f - 4f\n" /* repl len */
+ " .byte 3b - 2b\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_replacement,\"ax\"\n"
+ "4: jmp %l[t_no]\n"
+ "5:\n"
+ ".previous\n"
+ ".section .altinstructions,\"a\"\n"
+ " .long 1b - .\n" /* src offset */
+ " .long 0\n" /* no replacement */
+ " .word %P[feature]\n" /* feature bit */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 0\n" /* repl len */
+ " .byte 0\n" /* pad len */
+ ".previous\n"
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
+ : : [feature] "i" (bit),
+ [always] "i" (X86_FEATURE_ALWAYS),
+ [bitnum] "i" (1 << (bit & 7)),
+ [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ : : t_yes, t_no);
+t_yes:
+ return true;
+t_no:
+ return false;
}
#define static_cpu_has(bit) \
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd6f57a54a26..b605a5b6a30c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -507,6 +507,7 @@ struct kvm_vcpu_arch {
u64 smi_count;
bool tpr_access_reporting;
u64 ia32_xss;
+ u64 microcode_version;
/*
* Paging state of the vcpu
@@ -1095,6 +1096,8 @@ struct kvm_x86_ops {
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+
+ int (*get_msr_feature)(struct kvm_msr_entry *entry);
};
struct kvm_arch_async_pf {
@@ -1464,7 +1467,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
-
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index b3996d60f981..f928ad9b143f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -278,4 +278,41 @@ do { \
} while (0)
#endif /* __ASSEMBLY__ */
+
+/*
+ * Below is used in the eBPF JIT compiler and emits the byte sequence
+ * for the following assembly:
+ *
+ * With retpolines configured:
+ *
+ * callq do_rop
+ * spec_trap:
+ * pause
+ * lfence
+ * jmp spec_trap
+ * do_rop:
+ * mov %rax,(%rsp)
+ * retq
+ *
+ * Without retpolines configured:
+ *
+ * jmp *%rax
+ */
+#ifdef CONFIG_RETPOLINE
+# define RETPOLINE_RAX_BPF_JIT_SIZE 17
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT1_off32(0xE8, 7); /* callq do_rop */ \
+ /* spec_trap: */ \
+ EMIT2(0xF3, 0x90); /* pause */ \
+ EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+ /* do_rop: */ \
+ EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
+ EMIT1(0xC3); /* retq */
+#else
+# define RETPOLINE_RAX_BPF_JIT_SIZE 2
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT2(0xFF, 0xE0); /* jmp *%rax */
+#endif
+
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 4baa6bceb232..d652a3808065 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
void copy_page(void *to, void *from);
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ba3c523aaf16..a06b07399d17 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
{
bool oldbit;
- asm volatile("bt "__percpu_arg(2)",%1"
+ asm volatile("btl "__percpu_arg(2)",%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index d65171120e90..4cf11d88d3b3 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -17,7 +17,7 @@
#define _REFCOUNT_EXCEPTION \
".pushsection .text..refcount\n" \
"111:\tlea %[counter], %%" _ASM_CX "\n" \
- "112:\t" ASM_UD0 "\n" \
+ "112:\t" ASM_UD2 "\n" \
ASM_UNREACHABLE \
".popsection\n" \
"113:\n" \
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 461f53d27708..a4189762b266 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
+void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 8b6780751132..5db8b0b10766 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -352,6 +352,7 @@ enum vmcs_field {
#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
+#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */
#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 197c2e6c7376..099414345865 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -241,24 +241,24 @@
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
struct hv_reenlightenment_control {
- u64 vector:8;
- u64 reserved1:8;
- u64 enabled:1;
- u64 reserved2:15;
- u64 target_vp:32;
+ __u64 vector:8;
+ __u64 reserved1:8;
+ __u64 enabled:1;
+ __u64 reserved2:15;
+ __u64 target_vp:32;
};
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
struct hv_tsc_emulation_control {
- u64 enabled:1;
- u64 reserved:63;
+ __u64 enabled:1;
+ __u64 reserved:63;
};
struct hv_tsc_emulation_status {
- u64 inprogress:1;
- u64 reserved:63;
+ __u64 inprogress:1;
+ __u64 reserved:63;
};
#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 7a2ade4aa235..6cfa9c8cb7d6 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -26,6 +26,7 @@
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_PV_TLB_FLUSH 9
+#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 91723461dc1f..435db58a7bad 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -30,6 +30,7 @@ struct mce {
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
__u64 ppin; /* Protected Processor Inventory Number */
+ __u32 microcode;/* Microcode revision */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3cc471beb50b..bb6f7a2148d7 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
struct irq_desc *desc = irq_data_to_desc(irqd);
+ bool managed = irqd_affinity_is_managed(irqd);
lockdep_assert_held(&vector_lock);
trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
apicd->cpu);
- /* Setup the vector move, if required */
- if (apicd->vector && cpu_online(apicd->cpu)) {
+ /*
+ * If there is no vector associated or if the associated vector is
+ * the shutdown vector, which is associated to make PCI/MSI
+ * shutdown mode work, then there is nothing to release. Clear out
+ * prev_vector for this and the offlined target case.
+ */
+ apicd->prev_vector = 0;
+ if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR)
+ goto setnew;
+ /*
+ * If the target CPU of the previous vector is online, then mark
+ * the vector as move in progress and store it for cleanup when the
+ * first interrupt on the new vector arrives. If the target CPU is
+ * offline then the regular release mechanism via the cleanup
+ * vector is not possible and the vector can be immediately freed
+ * in the underlying matrix allocator.
+ */
+ if (cpu_online(apicd->cpu)) {
apicd->move_in_progress = true;
apicd->prev_vector = apicd->vector;
apicd->prev_cpu = apicd->cpu;
} else {
- apicd->prev_vector = 0;
+ irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
+ managed);
}
+setnew:
apicd->vector = newvec;
apicd->cpu = newcpu;
BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 46b675aaf20b..f11910b44638 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
uv_gre_table = gre;
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ unsigned long size = ((unsigned long)(gre->limit - lgre)
+ << UV_GAM_RANGE_SHFT);
+ int order = 0;
+ char suffix[] = " KMGTPE";
+
+ while (size > 9999 && order < sizeof(suffix)) {
+ size /= 1024;
+ order++;
+ }
+
if (!index) {
pr_info("UV: GAM Range Table...\n");
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
}
- pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
+ pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
- ((unsigned long)(gre->limit - lgre)) >>
- (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+ size, suffix[order],
gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index bdab7d2f51af..fca759d272a1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
goto out_common_fail;
}
closid = ret;
+ ret = 0;
rdtgrp->closid = closid;
list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index aa0d5df9dc60..e956eb267061 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
extern struct mca_config mca_cfg;
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ * are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ * recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3a8e88a611eb..466f47301334 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -56,6 +56,9 @@
static DEFINE_MUTEX(mce_log_mutex);
+/* sysfs synchronization */
+static DEFINE_MUTEX(mce_sysfs_mutex);
+
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -105,6 +108,10 @@ static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn);
+#endif
+
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@@ -126,6 +133,8 @@ void mce_setup(struct mce *m)
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
+
+ m->microcode = boot_cpu_data.microcode;
}
DEFINE_PER_CPU(struct mce, injectm);
@@ -234,7 +243,7 @@ static void __print_mce(struct mce *m)
m->cs, m->ip);
if (m->cs == __KERNEL_CS)
- pr_cont("{%pS}", (void *)m->ip);
+ pr_cont("{%pS}", (void *)(unsigned long)m->ip);
pr_cont("\n");
}
@@ -258,7 +267,7 @@ static void __print_mce(struct mce *m)
*/
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
- cpu_data(m->extcpu).microcode);
+ m->microcode);
}
static void print_mce(struct mce *m)
@@ -590,7 +599,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT;
- memory_failure(pfn, 0);
+ if (!memory_failure(pfn, 0))
+ mce_unmap_kpfn(pfn);
}
return NOTIFY_OK;
@@ -1057,12 +1067,13 @@ static int do_memory_failure(struct mce *m)
ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
if (ret)
pr_err("Memory error not recovered");
+ else
+ mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
return ret;
}
-#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
-
-void arch_unmap_kpfn(unsigned long pfn)
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn)
{
unsigned long decoy_addr;
@@ -1073,7 +1084,7 @@ void arch_unmap_kpfn(unsigned long pfn)
* We would like to just call:
* set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
* but doing that would radically increase the odds of a
- * speculative access to the posion page because we'd have
+ * speculative access to the poison page because we'd have
* the virtual address of the kernel 1:1 mapping sitting
* around in registers.
* Instead we get tricky. We create a non-canonical address
@@ -1098,7 +1109,6 @@ void arch_unmap_kpfn(unsigned long pfn)
if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-
}
#endif
@@ -2081,6 +2091,7 @@ static ssize_t set_ignore_ce(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
@@ -2093,6 +2104,8 @@ static ssize_t set_ignore_ce(struct device *s,
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2105,6 +2118,7 @@ static ssize_t set_cmci_disabled(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
@@ -2116,6 +2130,8 @@ static ssize_t set_cmci_disabled(struct device *s,
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2123,8 +2139,19 @@ static ssize_t store_int_with_restart(struct device *s,
struct device_attribute *attr,
const char *buf, size_t size)
{
- ssize_t ret = device_store_int(s, attr, buf, size);
+ unsigned long old_check_interval = check_interval;
+ ssize_t ret = device_store_ulong(s, attr, buf, size);
+
+ if (check_interval == old_check_interval)
+ return ret;
+
+ if (check_interval < 1)
+ check_interval = 1;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
+
return ret;
}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4e37d1a851a6..bc1a27280c4b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -49,7 +49,7 @@
static int kvmapf = 1;
-static int parse_no_kvmapf(char *arg)
+static int __init parse_no_kvmapf(char *arg)
{
kvmapf = 0;
return 0;
@@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
early_param("no-kvmapf", parse_no_kvmapf);
static int steal_acc = 1;
-static int parse_no_stealacc(char *arg)
+static int __init parse_no_stealacc(char *arg)
{
steal_acc = 0;
return 0;
@@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
static int kvmclock_vsyscall = 1;
-static int parse_no_kvmclock_vsyscall(char *arg)
+static int __init parse_no_kvmclock_vsyscall(char *arg)
{
kvmclock_vsyscall = 0;
return 0;
@@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
#endif
pa |= KVM_ASYNC_PF_ENABLED;
- /* Async page fault support for L1 hypervisor is optional */
- if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
- (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
- wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
+ pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+
+ wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id());
@@ -545,7 +545,8 @@ static void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock;
}
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -633,7 +634,8 @@ static __init int kvm_setup_pv_tlb_flush(void)
{
int cpu;
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
GFP_KERNEL, cpu_to_node(cpu));
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 1f790cf9d38f..3b7427aa7d85 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -542,6 +542,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
goto overflow;
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
value -= (u64)address;
*(u32 *)location = value;
break;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index da0c160e5589..f58336af095c 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
goto overflow;
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
if (*(u32 *)loc != 0)
goto invalid_relocation;
val -= (u64)loc;
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index ac057f9b0763..0d930d8987cc 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -43,6 +43,13 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));
#define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name))
+ BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_code) != 8);
+
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_signo) != 0);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_errno) != 4);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_code) != 8);
/*
* Ensure that the size of each si_field never changes.
* If it does, it is a sign that the
@@ -63,36 +70,94 @@ static inline void signal_compat_build_tests(void)
CHECK_CSI_SIZE (_kill, 2*sizeof(int));
CHECK_SI_SIZE (_kill, 2*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0xC);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+
CHECK_CSI_OFFSET(_timer);
CHECK_CSI_SIZE (_timer, 3*sizeof(int));
CHECK_SI_SIZE (_timer, 6*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_tid) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_overrun) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14);
+
CHECK_CSI_OFFSET(_rt);
CHECK_CSI_SIZE (_rt, 3*sizeof(int));
CHECK_SI_SIZE (_rt, 4*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14);
+
CHECK_CSI_OFFSET(_sigchld);
CHECK_CSI_SIZE (_sigchld, 5*sizeof(int));
CHECK_SI_SIZE (_sigchld, 8*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x18);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_utime) != 0x20);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x28);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_status) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_utime) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_stime) != 0x1C);
+
#ifdef CONFIG_X86_X32_ABI
CHECK_CSI_OFFSET(_sigchld_x32);
CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int));
/* no _sigchld_x32 in the generic siginfo_t */
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime) != 0x20);
#endif
CHECK_CSI_OFFSET(_sigfault);
CHECK_CSI_SIZE (_sigfault, 4*sizeof(int));
CHECK_SI_SIZE (_sigfault, 8*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C);
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10);
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_lower) != 0x20);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_upper) != 0x28);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_lower) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_upper) != 0x18);
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14);
+
CHECK_CSI_OFFSET(_sigpoll);
CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int));
CHECK_SI_SIZE (_sigpoll, 4*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_fd) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_band) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_fd) != 0x10);
+
CHECK_CSI_OFFSET(_sigsys);
CHECK_CSI_SIZE (_sigsys, 3*sizeof(int));
CHECK_SI_SIZE (_sigsys, 4*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_syscall) != 0x18);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_arch) != 0x1C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_call_addr) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_syscall) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_arch) != 0x14);
+
/* any new si_fields should be added here */
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6f27facbaa9b..ff99e2b6fc54 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1281,11 +1281,10 @@ void __init native_smp_prepare_boot_cpu(void)
cpu_set_state_online(me);
}
-void __init native_smp_cpus_done(unsigned int max_cpus)
+void __init calculate_max_logical_packages(void)
{
int ncpus;
- pr_debug("Boot done\n");
/*
* Today neither Intel nor AMD support heterogenous systems so
* extrapolate the boot cpu's data to all packages.
@@ -1293,6 +1292,13 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
pr_info("Max logical packages: %u\n", __max_logical_packages);
+}
+
+void __init native_smp_cpus_done(unsigned int max_cpus)
+{
+ pr_debug("Boot done\n");
+
+ calculate_max_logical_packages();
if (x86_has_numa_in_package)
set_sched_topology(x86_numa_in_package_topology);
@@ -1430,8 +1436,8 @@ static void remove_siblinginfo(int cpu)
cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
- c->phys_proc_id = 0;
c->cpu_core_id = 0;
+ c->booted_cores = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
recompute_smt_state();
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a0c5a69bc7c4..b671fc2d0422 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -607,7 +607,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
(1 << KVM_FEATURE_PV_UNHALT) |
- (1 << KVM_FEATURE_PV_TLB_FLUSH);
+ (1 << KVM_FEATURE_PV_TLB_FLUSH) |
+ (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 924ac8ce9d50..391dda8d43b7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2002,14 +2002,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
{
- struct kvm_lapic *apic;
+ struct kvm_lapic *apic = vcpu->arch.apic;
int i;
- apic_debug("%s\n", __func__);
+ if (!apic)
+ return;
- ASSERT(vcpu);
- apic = vcpu->arch.apic;
- ASSERT(apic != NULL);
+ apic_debug("%s\n", __func__);
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2165,7 +2164,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
- kvm_lapic_reset(vcpu, false);
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
return 0;
@@ -2569,7 +2567,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
pe = xchg(&apic->pending_events, 0);
if (test_bit(KVM_APIC_INIT, &pe)) {
- kvm_lapic_reset(vcpu, true);
kvm_vcpu_reset(vcpu, true);
if (kvm_vcpu_is_bsp(apic->vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 46ff304140c7..763bb3bade63 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2770,8 +2770,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
else
pte_access &= ~ACC_WRITE_MASK;
+ if (!kvm_is_mmio_pfn(pfn))
+ spte |= shadow_me_mask;
+
spte |= (u64)pfn << PAGE_SHIFT;
- spte |= shadow_me_mask;
if (pte_access & ACC_WRITE_MASK) {
@@ -3029,7 +3031,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
return RET_PF_RETRY;
}
- return -EFAULT;
+ return RET_PF_EMULATE;
}
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 24c9521ebc24..be9c839e2c89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -179,6 +179,8 @@ struct vcpu_svm {
uint64_t sysenter_eip;
uint64_t tsc_aux;
+ u64 msr_decfg;
+
u64 next_rip;
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
@@ -301,6 +303,8 @@ module_param(vgif, int, 0444);
static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
module_param(sev, int, 0444);
+static u8 rsm_ins_bytes[] = "\x0f\xaa";
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -1384,6 +1388,7 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_SKINIT);
set_intercept(svm, INTERCEPT_WBINVD);
set_intercept(svm, INTERCEPT_XSETBV);
+ set_intercept(svm, INTERCEPT_RSM);
if (!kvm_mwait_in_guest()) {
set_intercept(svm, INTERCEPT_MONITOR);
@@ -1903,6 +1908,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u32 dummy;
u32 eax = 1;
+ vcpu->arch.microcode_version = 0x01000065;
svm->spec_ctrl = 0;
if (!init_event) {
@@ -3700,6 +3706,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
+static int rsm_interception(struct vcpu_svm *svm)
+{
+ return x86_emulate_instruction(&svm->vcpu, 0, 0,
+ rsm_ins_bytes, 2) == EMULATE_DONE;
+}
+
static int rdpmc_interception(struct vcpu_svm *svm)
{
int err;
@@ -3861,6 +3873,22 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
}
+static int svm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+ msr->data = 0;
+
+ switch (msr->index) {
+ case MSR_F10H_DECFG:
+ if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
+ msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
+ break;
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3936,9 +3964,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = svm->spec_ctrl;
break;
- case MSR_IA32_UCODE_REV:
- msr_info->data = 0x01000065;
- break;
case MSR_F15H_IC_CFG: {
int family, model;
@@ -3956,6 +3981,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0x1E;
}
break;
+ case MSR_F10H_DECFG:
+ msr_info->data = svm->msr_decfg;
+ break;
default:
return kvm_get_msr_common(vcpu, msr_info);
}
@@ -4134,6 +4162,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_F10H_DECFG: {
+ struct kvm_msr_entry msr_entry;
+
+ msr_entry.index = msr->index;
+ if (svm_get_msr_feature(&msr_entry))
+ return 1;
+
+ /* Check the supported bits */
+ if (data & ~msr_entry.data)
+ return 1;
+
+ /* Don't allow the guest to change a bit, #GP */
+ if (!msr->host_initiated && (data ^ msr_entry.data))
+ return 1;
+
+ svm->msr_decfg = data;
+ break;
+ }
case MSR_IA32_APICBASE:
if (kvm_vcpu_apicv_active(vcpu))
avic_update_vapic_bar(to_svm(vcpu), data);
@@ -4542,7 +4588,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_NPF] = npf_interception,
- [SVM_EXIT_RSM] = emulate_on_interception,
+ [SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
[SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
};
@@ -6237,16 +6283,18 @@ e_free:
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
+ void __user *measure = (void __user *)(uintptr_t)argp->data;
struct kvm_sev_info *sev = &kvm->arch.sev_info;
struct sev_data_launch_measure *data;
struct kvm_sev_launch_measure params;
+ void __user *p = NULL;
void *blob = NULL;
int ret;
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ if (copy_from_user(&params, measure, sizeof(params)))
return -EFAULT;
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -6257,17 +6305,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!params.len)
goto cmd;
- if (params.uaddr) {
+ p = (void __user *)(uintptr_t)params.uaddr;
+ if (p) {
if (params.len > SEV_FW_BLOB_MAX_SIZE) {
ret = -EINVAL;
goto e_free;
}
- if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
- ret = -EFAULT;
- goto e_free;
- }
-
ret = -ENOMEM;
blob = kmalloc(params.len, GFP_KERNEL);
if (!blob)
@@ -6291,13 +6335,13 @@ cmd:
goto e_free_blob;
if (blob) {
- if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+ if (copy_to_user(p, blob, params.len))
ret = -EFAULT;
}
done:
params.len = data->len;
- if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ if (copy_to_user(measure, &params, sizeof(params)))
ret = -EFAULT;
e_free_blob:
kfree(blob);
@@ -6598,7 +6642,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
struct page **pages;
void *blob, *hdr;
unsigned long n;
- int ret;
+ int ret, offset;
if (!sev_guest(kvm))
return -ENOTTY;
@@ -6624,6 +6668,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!data)
goto e_unpin_memory;
+ offset = params.guest_uaddr & (PAGE_SIZE - 1);
+ data->guest_address = __sme_page_pa(pages[0]) + offset;
+ data->guest_len = params.guest_len;
+
blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
if (IS_ERR(blob)) {
ret = PTR_ERR(blob);
@@ -6638,8 +6686,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
ret = PTR_ERR(hdr);
goto e_free_blob;
}
- data->trans_address = __psp_pa(blob);
- data->trans_len = params.trans_len;
+ data->hdr_address = __psp_pa(hdr);
+ data->hdr_len = params.hdr_len;
data->handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
@@ -6822,6 +6870,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vcpu_unblocking = svm_vcpu_unblocking,
.update_bp_intercept = update_bp_intercept,
+ .get_msr_feature = svm_get_msr_feature,
.get_msr = svm_get_msr,
.set_msr = svm_set_msr,
.get_segment_base = svm_get_segment_base,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7f8401d05939..2d87603f9179 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1045,6 +1045,13 @@ static inline bool is_machine_check(u32 intr_info)
(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
}
+/* Undocumented: icebp/int1 */
+static inline bool is_icebp(u32 intr_info)
+{
+ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
+ == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
+}
+
static inline bool cpu_has_vmx_msr_bitmap(void)
{
return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
@@ -3227,6 +3234,11 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
return !(val & ~valid_bits);
}
+static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
+{
+ return 1;
+}
+
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
@@ -4486,7 +4498,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
hw_cr4 &= ~X86_CR4_UMIP;
- } else
+ } else if (!is_guest_mode(vcpu) ||
+ !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
@@ -5766,6 +5779,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->rmode.vm86_active = 0;
vmx->spec_ctrl = 0;
+ vcpu->arch.microcode_version = 0x100000000ULL;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vcpu, 0);
@@ -6172,7 +6186,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
vcpu->arch.dr6 &= ~15;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
- if (!(dr6 & ~DR6_RESERVED)) /* icebp */
+ if (is_icebp(intr_info))
skip_emulated_instruction(vcpu);
kvm_queue_exception(vcpu, DB_VECTOR);
@@ -11200,7 +11214,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (ret)
return ret;
- if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
+ /*
+ * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
+ * by event injection, halt vcpu.
+ */
+ if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
+ !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
return kvm_vcpu_halt(vcpu);
vmx->nested.nested_run_pending = 1;
@@ -12291,6 +12310,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.vcpu_put = vmx_vcpu_put,
.update_bp_intercept = update_exception_bitmap,
+ .get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8a0b545ac20..18b5ca7a3197 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1049,6 +1049,45 @@ static u32 emulated_msrs[] = {
static unsigned num_emulated_msrs;
+/*
+ * List of msr numbers which are used to expose MSR-based features that
+ * can be used by a hypervisor to validate requested CPU features.
+ */
+static u32 msr_based_features[] = {
+ MSR_F10H_DECFG,
+ MSR_IA32_UCODE_REV,
+};
+
+static unsigned int num_msr_based_features;
+
+static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+ switch (msr->index) {
+ case MSR_IA32_UCODE_REV:
+ rdmsrl(msr->index, msr->data);
+ break;
+ default:
+ if (kvm_x86_ops->get_msr_feature(msr))
+ return 1;
+ }
+ return 0;
+}
+
+static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+ struct kvm_msr_entry msr;
+ int r;
+
+ msr.index = index;
+ r = kvm_get_msr_feature(&msr);
+ if (r)
+ return r;
+
+ *data = msr.data;
+
+ return 0;
+}
+
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & efer_reserved_bits)
@@ -2222,7 +2261,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_AMD64_NB_CFG:
- case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
case MSR_VM_HSAVE_PA:
case MSR_AMD64_PATCH_LOADER:
@@ -2230,6 +2268,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_DC_CFG:
break;
+ case MSR_IA32_UCODE_REV:
+ if (msr_info->host_initiated)
+ vcpu->arch.microcode_version = data;
+ break;
case MSR_EFER:
return set_efer(vcpu, data);
case MSR_K7_HWCR:
@@ -2525,7 +2567,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0;
break;
case MSR_IA32_UCODE_REV:
- msr_info->data = 0x100000000ULL;
+ msr_info->data = vcpu->arch.microcode_version;
break;
case MSR_MTRRcap:
case 0x200 ... 0x2ff:
@@ -2680,13 +2722,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data))
{
- int i, idx;
+ int i;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
for (i = 0; i < msrs->nmsrs; ++i)
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
return i;
}
@@ -2785,6 +2825,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_GET_MSR_FEATURES:
r = 1;
break;
case KVM_CAP_ADJUST_CLOCK:
@@ -2899,6 +2940,31 @@ long kvm_arch_dev_ioctl(struct file *filp,
goto out;
r = 0;
break;
+ case KVM_GET_MSR_FEATURE_INDEX_LIST: {
+ struct kvm_msr_list __user *user_msr_list = argp;
+ struct kvm_msr_list msr_list;
+ unsigned int n;
+
+ r = -EFAULT;
+ if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
+ goto out;
+ n = msr_list.nmsrs;
+ msr_list.nmsrs = num_msr_based_features;
+ if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
+ goto out;
+ r = -E2BIG;
+ if (n < msr_list.nmsrs)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user(user_msr_list->indices, &msr_based_features,
+ num_msr_based_features * sizeof(u32)))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_GET_MSRS:
+ r = msr_io(NULL, argp, do_get_msr_feature, 1);
+ break;
}
default:
r = -EINVAL;
@@ -3636,12 +3702,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = 0;
break;
}
- case KVM_GET_MSRS:
+ case KVM_GET_MSRS: {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
r = msr_io(vcpu, argp, do_get_msr, 1);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
- case KVM_SET_MSRS:
+ }
+ case KVM_SET_MSRS: {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
r = msr_io(vcpu, argp, do_set_msr, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
+ }
case KVM_TPR_ACCESS_REPORTING: {
struct kvm_tpr_access_ctl tac;
@@ -4464,6 +4536,19 @@ static void kvm_init_msr_list(void)
j++;
}
num_emulated_msrs = j;
+
+ for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+ struct kvm_msr_entry msr;
+
+ msr.index = msr_based_features[i];
+ if (kvm_get_msr_feature(&msr))
+ continue;
+
+ if (j < i)
+ msr_based_features[j] = msr_based_features[i];
+ j++;
+ }
+ num_msr_based_features = j;
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -8017,6 +8102,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
+ kvm_lapic_reset(vcpu, init_event);
+
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;
@@ -8460,10 +8547,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
return r;
}
- if (!size) {
- r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
- WARN_ON(r < 0);
- }
+ if (!size)
+ vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
return 0;
}
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 7b881d03d0dd..3cdf06128d13 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -7,6 +7,7 @@ asmlinkage void just_return_func(void);
asm(
".type just_return_func, @function\n"
+ ".globl just_return_func\n"
"just_return_func:\n"
" ret\n"
".size just_return_func, .-just_return_func\n"
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c88573d90f3e..25a30b5d6582 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (!pmd_k)
return -1;
- if (pmd_huge(*pmd_k))
+ if (pmd_large(*pmd_k))
return 0;
pte_k = pte_offset_kernel(pmd_k, address);
@@ -475,7 +475,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
BUG();
- if (pud_huge(*pud))
+ if (pud_large(*pud))
return 0;
pmd = pmd_offset(pud, address);
@@ -486,7 +486,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
BUG();
- if (pmd_huge(*pmd))
+ if (pmd_large(*pmd))
return 0;
pte_ref = pte_offset_kernel(pmd_ref, address);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index fecb0c0a6077..af11a2890235 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -800,17 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
#define PAGE_INUSE 0xFD
-static void __meminit free_pagetable(struct page *page, int order,
- struct vmem_altmap *altmap)
+static void __meminit free_pagetable(struct page *page, int order)
{
unsigned long magic;
unsigned int nr_pages = 1 << order;
- if (altmap) {
- vmem_altmap_free(altmap, nr_pages);
- return;
- }
-
/* bootmem page has reserved flag */
if (PageReserved(page)) {
__ClearPageReserved(page);
@@ -826,9 +820,17 @@ static void __meminit free_pagetable(struct page *page, int order,
free_pages((unsigned long)page_address(page), order);
}
-static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
+static void __meminit free_hugepage_table(struct page *page,
struct vmem_altmap *altmap)
{
+ if (altmap)
+ vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE);
+ else
+ free_pagetable(page, get_order(PMD_SIZE));
+}
+
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
pte_t *pte;
int i;
@@ -839,14 +841,13 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
}
/* free a pte talbe */
- free_pagetable(pmd_page(*pmd), 0, altmap);
+ free_pagetable(pmd_page(*pmd), 0);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
spin_unlock(&init_mm.page_table_lock);
}
-static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
- struct vmem_altmap *altmap)
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
{
pmd_t *pmd;
int i;
@@ -858,14 +859,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
}
/* free a pmd talbe */
- free_pagetable(pud_page(*pud), 0, altmap);
+ free_pagetable(pud_page(*pud), 0);
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
spin_unlock(&init_mm.page_table_lock);
}
-static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
- struct vmem_altmap *altmap)
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
{
pud_t *pud;
int i;
@@ -877,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
}
/* free a pud talbe */
- free_pagetable(p4d_page(*p4d), 0, altmap);
+ free_pagetable(p4d_page(*p4d), 0);
spin_lock(&init_mm.page_table_lock);
p4d_clear(p4d);
spin_unlock(&init_mm.page_table_lock);
@@ -885,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
static void __meminit
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
- struct vmem_altmap *altmap, bool direct)
+ bool direct)
{
unsigned long next, pages = 0;
pte_t *pte;
@@ -916,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
* freed when offlining, or simplely not in use.
*/
if (!direct)
- free_pagetable(pte_page(*pte), 0, altmap);
+ free_pagetable(pte_page(*pte), 0);
spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte);
@@ -939,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
page_addr = page_address(pte_page(*pte));
if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
- free_pagetable(pte_page(*pte), 0, altmap);
+ free_pagetable(pte_page(*pte), 0);
spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte);
@@ -974,9 +974,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
if (IS_ALIGNED(addr, PMD_SIZE) &&
IS_ALIGNED(next, PMD_SIZE)) {
if (!direct)
- free_pagetable(pmd_page(*pmd),
- get_order(PMD_SIZE),
- altmap);
+ free_hugepage_table(pmd_page(*pmd),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
@@ -989,9 +988,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
page_addr = page_address(pmd_page(*pmd));
if (!memchr_inv(page_addr, PAGE_INUSE,
PMD_SIZE)) {
- free_pagetable(pmd_page(*pmd),
- get_order(PMD_SIZE),
- altmap);
+ free_hugepage_table(pmd_page(*pmd),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
@@ -1003,8 +1001,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
}
pte_base = (pte_t *)pmd_page_vaddr(*pmd);
- remove_pte_table(pte_base, addr, next, altmap, direct);
- free_pte_table(pte_base, pmd, altmap);
+ remove_pte_table(pte_base, addr, next, direct);
+ free_pte_table(pte_base, pmd);
}
/* Call free_pmd_table() in remove_pud_table(). */
@@ -1033,8 +1031,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
IS_ALIGNED(next, PUD_SIZE)) {
if (!direct)
free_pagetable(pud_page(*pud),
- get_order(PUD_SIZE),
- altmap);
+ get_order(PUD_SIZE));
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
@@ -1048,8 +1045,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
if (!memchr_inv(page_addr, PAGE_INUSE,
PUD_SIZE)) {
free_pagetable(pud_page(*pud),
- get_order(PUD_SIZE),
- altmap);
+ get_order(PUD_SIZE));
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
@@ -1062,7 +1058,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
pmd_base = pmd_offset(pud, 0);
remove_pmd_table(pmd_base, addr, next, direct, altmap);
- free_pmd_table(pmd_base, pud, altmap);
+ free_pmd_table(pmd_base, pud);
}
if (direct)
@@ -1094,7 +1090,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
* to adapt for boot-time switching between 4 and 5 level page tables.
*/
if (CONFIG_PGTABLE_LEVELS == 5)
- free_pud_table(pud_base, p4d, altmap);
+ free_pud_table(pud_base, p4d);
}
if (direct)
@@ -1193,8 +1189,8 @@ void __init mem_init(void)
register_page_bootmem_info();
/* Register memory areas for /proc/kcore */
- kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
- PAGE_SIZE, KCORE_OTHER);
+ if (get_gate_vma(&init_mm))
+ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
mem_init_print_info(NULL);
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 004abf9ebf12..34cda7e0551b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd)
return 0;
}
+
+/**
+ * pud_free_pmd_page - Clear pud entry and free pmd page.
+ * @pud: Pointer to a PUD.
+ *
+ * Context: The pud range has been unmaped and TLB purged.
+ * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ */
+int pud_free_pmd_page(pud_t *pud)
+{
+ pmd_t *pmd;
+ int i;
+
+ if (pud_none(*pud))
+ return 1;
+
+ pmd = (pmd_t *)pud_page_vaddr(*pud);
+
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ if (!pmd_free_pte_page(&pmd[i]))
+ return 0;
+
+ pud_clear(pud);
+ free_page((unsigned long)pmd);
+
+ return 1;
+}
+
+/**
+ * pmd_free_pte_page - Clear pmd entry and free pte page.
+ * @pmd: Pointer to a PMD.
+ *
+ * Context: The pmd range has been unmaped and TLB purged.
+ * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ */
+int pmd_free_pte_page(pmd_t *pmd)
+{
+ pte_t *pte;
+
+ if (pmd_none(*pmd))
+ return 1;
+
+ pte = (pte_t *)pmd_page_vaddr(*pmd);
+ pmd_clear(pmd);
+ free_page((unsigned long)pte);
+
+ return 1;
+}
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4923d92f918d..ce5b2ebd5701 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -13,6 +13,7 @@
#include <linux/if_vlan.h>
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
+#include <asm/nospec-branch.h>
#include <linux/bpf.h>
/*
@@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog)
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 43 /* number of bytes to jump */
+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
@@ -299,7 +300,7 @@ static void emit_bpf_tail_call(u8 **pprog)
*/
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 32
+#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
@@ -313,7 +314,7 @@ static void emit_bpf_tail_call(u8 **pprog)
* goto out;
*/
EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
-#define OFFSET3 10
+#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
label3 = cnt;
@@ -326,7 +327,7 @@ static void emit_bpf_tail_call(u8 **pprog)
* rdi == ctx (1st arg)
* rax == prog->bpf_func + prologue_size
*/
- EMIT2(0xFF, 0xE0); /* jmp rax */
+ RETPOLINE_RAX_BPF_JIT();
/* out: */
BUILD_BUG_ON(cnt - label1 != OFFSET1);
@@ -1187,7 +1188,7 @@ skip_init_addrs:
* may converge on the last pass. In such case do one more
* pass to emit the final image
*/
- for (pass = 0; pass < 10 || image; pass++) {
+ for (pass = 0; pass < 20 || image; pass++) {
proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
if (proglen <= 0) {
image = NULL;
@@ -1214,6 +1215,7 @@ skip_init_addrs:
}
}
oldproglen = proglen;
+ cond_resched();
}
if (bpf_jit_enable > 1)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 174c59774cc9..a7a7677265b6 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -460,7 +460,7 @@ static int nmi_setup(void)
goto fail;
for_each_possible_cpu(cpu) {
- if (!cpu)
+ if (!IS_ENABLED(CONFIG_SMP) || !cpu)
continue;
memcpy(per_cpu(cpu_msrs, cpu).counters,
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 2c67bae6bb53..fb1df9488e98 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -79,7 +79,7 @@ static void intel_mid_power_off(void)
static void intel_mid_reboot(void)
{
- intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+ intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
}
static unsigned long __init intel_mid_calibrate_tsc(void)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5d73c443e778..220e97841e49 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
/*
* PC relative relocations don't need to be adjusted unless
* referencing a percpu symbol.
+ *
+ * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.
*/
if (is_percpu_sym(sym, symname))
add_reloc(&relocs32neg, offset);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index c047f42552e1..3c2c2530737e 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
if (!xen_initial_domain()) {
add_preferred_console("xenboot", 0, NULL);
- add_preferred_console("tty", 0, NULL);
- add_preferred_console("hvc", 0, NULL);
if (pci_xen)
x86_init.pci.arch_init = pci_xen_init;
} else {
@@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_boot_params_init_edd();
}
+
+ add_preferred_console("tty", 0, NULL);
+ add_preferred_console("hvc", 0, NULL);
+
#ifdef CONFIG_PCI
/* PCI BIOS service won't work from a PV guest. */
pci_probe &= ~PCI_PROBE_BIOS;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 77c959cf81e7..7a43b2ae19f1 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -122,6 +122,8 @@ void __init xen_smp_cpus_done(unsigned int max_cpus)
if (xen_hvm_domain())
native_smp_cpus_done(max_cpus);
+ else
+ calculate_max_logical_packages();
if (xen_have_vcpu_info_placement)
return;