summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/um/include/shared/os.h4
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/msr-index.h11
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/proto.h4
-rw-r--r--arch/x86/include/asm/thread_info.h6
-rw-r--r--arch/x86/include/asm/tlbflush.h10
-rw-r--r--arch/x86/include/uapi/asm/prctl.h11
-rw-r--r--arch/x86/kernel/cpu/intel.c40
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/process.c151
-rw-r--r--arch/x86/kernel/process_32.c7
-rw-r--r--arch/x86/kernel/process_64.c48
-rw-r--r--arch/x86/kernel/ptrace.c8
-rw-r--r--arch/x86/kvm/i8259.c72
-rw-r--r--arch/x86/kvm/ioapic.c28
-rw-r--r--arch/x86/kvm/ioapic.h16
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/irq.h32
-rw-r--r--arch/x86/kvm/irq_comm.c45
-rw-r--r--arch/x86/kvm/vmx.c40
-rw-r--r--arch/x86/kvm/x86.c108
-rw-r--r--arch/x86/um/Makefile2
-rw-r--r--arch/x86/um/asm/ptrace.h2
-rw-r--r--arch/x86/um/os-Linux/prctl.c4
-rw-r--r--arch/x86/um/syscalls_32.c7
-rw-r--r--arch/x86/um/syscalls_64.c20
-rw-r--r--fs/exec.c1
-rw-r--r--include/linux/compat.h2
-rw-r--r--include/linux/kvm_host.h4
-rw-r--r--include/linux/thread_info.h4
-rwxr-xr-xscripts/checksyscalls.sh1
-rw-r--r--virt/kvm/eventfd.c4
-rw-r--r--virt/kvm/irqchip.c11
-rw-r--r--virt/kvm/kvm_main.c3
37 files changed, 410 insertions, 307 deletions
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index de5d572225f3..cd1fa97776c3 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd);
extern void maybe_sigio_broken(int fd, int read);
extern void sigio_broken(int fd, int read);
-/* sys-x86_64/prctl.c */
-extern int os_arch_prctl(int pid, int code, unsigned long *addr);
+/* prctl.c */
+extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
/* tty.c */
extern int get_pty(void);
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9ba050fe47f3..0af59fa789ea 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -390,3 +390,4 @@
381 i386 pkey_alloc sys_pkey_alloc
382 i386 pkey_free sys_pkey_free
383 i386 statx sys_statx
+384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b04bb6dfed7f..0fe00446f9ca 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -187,6 +187,7 @@
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d962fa998a6f..2cc5ec7cc6f5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -726,6 +726,7 @@ struct kvm_hv {
enum kvm_irqchip_mode {
KVM_IRQCHIP_NONE,
+ KVM_IRQCHIP_INIT_IN_PROGRESS, /* temporarily set during creation */
KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d8b5f8ab8ef9..673f9ac50f6d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -45,6 +45,8 @@
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
#define MSR_PLATFORM_INFO 0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
+#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
@@ -127,6 +129,7 @@
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
@@ -552,10 +555,12 @@
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
-/* MISC_FEATURE_ENABLES non-architectural features */
-#define MSR_MISC_FEATURE_ENABLES 0x00000140
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES 0x00000140
-#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
#define MSR_IA32_TSC_DEADLINE 0x000006E0
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..a80c1b3997ed 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -884,6 +884,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
+DECLARE_PER_CPU(u64, msr_misc_features_shadow);
+
/* Register/unregister a process' MPX related resource */
#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 9b9b30b19441..8d3964fc5f91 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -9,6 +9,7 @@ void syscall_init(void);
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif
#ifdef CONFIG_X86_32
@@ -30,6 +31,7 @@ void x86_report_nx(void);
extern int reboot_force;
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+long do_arch_prctl_common(struct task_struct *task, int option,
+ unsigned long cpuid_enabled);
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6f5eb07a95..9fc44b95f7cb 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -87,6 +87,7 @@ struct thread_info {
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
+#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
@@ -110,6 +111,7 @@ struct thread_info {
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_NOHZ (1 << TIF_NOHZ)
@@ -138,7 +140,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack,
extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index fc5abff9b7fd..75d002bdb3f3 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask)
}
}
+static inline void cr4_toggle_bits(unsigned long mask)
+{
+ unsigned long cr4;
+
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ cr4 ^= mask;
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+}
+
/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void)
{
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 835aa51c7f6e..c45765517092 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -1,10 +1,13 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
+
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 063197771b8d..dfa90a3a5145 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
return;
}
- if (ring3mwait_disabled) {
- msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
- MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
+ if (ring3mwait_disabled)
return;
- }
-
- msr_set_bit(MSR_MISC_FEATURE_ENABLES,
- MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
+ this_cpu_or(msr_misc_features_shadow,
+ 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
if (c == &boot_cpu_data)
ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
@@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c)
init_intel_energy_perf(c);
}
+static void init_cpuid_fault(struct cpuinfo_x86 *c)
+{
+ u64 msr;
+
+ if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
+ if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
+ set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
+ }
+}
+
+static void init_intel_misc_features(struct cpuinfo_x86 *c)
+{
+ u64 msr;
+
+ if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
+ return;
+
+ /* Clear all MISC features */
+ this_cpu_write(msr_misc_features_shadow, 0);
+
+ /* Check features and update capabilities and shadow control bits */
+ init_cpuid_fault(c);
+ probe_xeon_phi_r3mwait(c);
+
+ msr = this_cpu_read(msr_misc_features_shadow);
+ wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
+}
+
static void init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
@@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c)
init_intel_energy_perf(c);
- probe_xeon_phi_r3mwait(c);
+ init_intel_misc_features(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 14f65a5f938e..da5c09789984 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -396,9 +396,9 @@ static u64 kvm_steal_clock(int cpu)
src = &per_cpu(steal_time, cpu);
do {
version = src->version;
- rmb();
+ virt_rmb();
steal = src->steal;
- rmb();
+ virt_rmb();
} while ((version & 1) || (version != src->version));
return steal;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index f67591561711..0bb88428cbf2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -37,6 +37,7 @@
#include <asm/vm86.h>
#include <asm/switch_to.h>
#include <asm/desc.h>
+#include <asm/prctl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -124,11 +125,6 @@ void flush_thread(void)
fpu__clear(&tsk->thread.fpu);
}
-static void hard_disable_TSC(void)
-{
- cr4_set_bits(X86_CR4_TSD);
-}
-
void disable_TSC(void)
{
preempt_disable();
@@ -137,15 +133,10 @@ void disable_TSC(void)
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- hard_disable_TSC();
+ cr4_set_bits(X86_CR4_TSD);
preempt_enable();
}
-static void hard_enable_TSC(void)
-{
- cr4_clear_bits(X86_CR4_TSD);
-}
-
static void enable_TSC(void)
{
preempt_disable();
@@ -154,7 +145,7 @@ static void enable_TSC(void)
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- hard_enable_TSC();
+ cr4_clear_bits(X86_CR4_TSD);
preempt_enable();
}
@@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val)
return 0;
}
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss)
-{
- struct thread_struct *prev, *next;
-
- prev = &prev_p->thread;
- next = &next_p->thread;
+DEFINE_PER_CPU(u64, msr_misc_features_shadow);
- if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
- test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
- unsigned long debugctl = get_debugctlmsr();
+static void set_cpuid_faulting(bool on)
+{
+ u64 msrval;
- debugctl &= ~DEBUGCTLMSR_BTF;
- if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
- debugctl |= DEBUGCTLMSR_BTF;
+ msrval = this_cpu_read(msr_misc_features_shadow);
+ msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
+ msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
+ this_cpu_write(msr_misc_features_shadow, msrval);
+ wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
+}
- update_debugctlmsr(debugctl);
+static void disable_cpuid(void)
+{
+ preempt_disable();
+ if (!test_and_set_thread_flag(TIF_NOCPUID)) {
+ /*
+ * Must flip the CPU state synchronously with
+ * TIF_NOCPUID in the current running context.
+ */
+ set_cpuid_faulting(true);
}
+ preempt_enable();
+}
- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
- /* prev and next are different */
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
- hard_disable_TSC();
- else
- hard_enable_TSC();
+static void enable_cpuid(void)
+{
+ preempt_disable();
+ if (test_and_clear_thread_flag(TIF_NOCPUID)) {
+ /*
+ * Must flip the CPU state synchronously with
+ * TIF_NOCPUID in the current running context.
+ */
+ set_cpuid_faulting(false);
}
+ preempt_enable();
+}
+
+static int get_cpuid_mode(void)
+{
+ return !test_thread_flag(TIF_NOCPUID);
+}
+
+static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
+{
+ if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
+ return -ENODEV;
+
+ if (cpuid_enabled)
+ enable_cpuid();
+ else
+ disable_cpuid();
+
+ return 0;
+}
+
+/*
+ * Called immediately after a successful exec.
+ */
+void arch_setup_new_exec(void)
+{
+ /* If cpuid was previously disabled for this task, re-enable it. */
+ if (test_thread_flag(TIF_NOCPUID))
+ enable_cpuid();
+}
- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+static inline void switch_to_bitmap(struct tss_struct *tss,
+ struct thread_struct *prev,
+ struct thread_struct *next,
+ unsigned long tifp, unsigned long tifn)
+{
+ if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
-
/*
* Make sure that the TSS limit is correct for the CPU
* to notice the IO bitmap.
*/
refresh_tss_limit();
- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+ } else if (tifp & _TIF_IO_BITMAP) {
/*
* Clear any possible leftover bits:
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ struct tss_struct *tss)
+{
+ struct thread_struct *prev, *next;
+ unsigned long tifp, tifn;
+
+ prev = &prev_p->thread;
+ next = &next_p->thread;
+
+ tifn = READ_ONCE(task_thread_info(next_p)->flags);
+ tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+ switch_to_bitmap(tss, prev, next, tifp, tifn);
+
propagate_user_return_notify(prev_p, next_p);
+
+ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
+ arch_has_block_step()) {
+ unsigned long debugctl, msk;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ debugctl &= ~DEBUGCTLMSR_BTF;
+ msk = tifn & _TIF_BLOCKSTEP;
+ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ }
+
+ if ((tifp ^ tifn) & _TIF_NOTSC)
+ cr4_toggle_bits(X86_CR4_TSD);
+
+ if ((tifp ^ tifn) & _TIF_NOCPUID)
+ set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
}
/*
@@ -550,3 +616,16 @@ out:
put_task_stack(p);
return ret;
}
+
+long do_arch_prctl_common(struct task_struct *task, int option,
+ unsigned long cpuid_enabled)
+{
+ switch (option) {
+ case ARCH_GET_CPUID:
+ return get_cpuid_mode();
+ case ARCH_SET_CPUID:
+ return set_cpuid_mode(task, cpuid_enabled);
+ }
+
+ return -EINVAL;
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4c818f8bc135..ff40e74c9181 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/syscalls.h>
#include <asm/pgtable.h>
#include <asm/ldt.h>
@@ -56,6 +57,7 @@
#include <asm/switch_to.h>
#include <asm/vm86.h>
#include <asm/intel_rdt.h>
+#include <asm/proto.h>
void __show_regs(struct pt_regs *regs, int all)
{
@@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
return prev_p;
}
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+ return do_arch_prctl_common(current, option, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index d6b784a5520d..ea1a6180bf39 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <linux/syscalls.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
(struct user_desc __user *)tls, 0);
else
#endif
- err = do_arch_prctl(p, ARCH_SET_FS, tls);
+ err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
if (err)
goto out;
}
@@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
}
#endif
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
{
int ret = 0;
int doit = task == current;
int cpu;
- switch (code) {
+ switch (option) {
case ARCH_SET_GS:
- if (addr >= TASK_SIZE_MAX)
+ if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.gsindex = 0;
- task->thread.gsbase = addr;
+ task->thread.gsbase = arg2;
if (doit) {
load_gs_index(0);
- ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+ ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
}
put_cpu();
break;
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
with gs */
- if (addr >= TASK_SIZE_MAX)
+ if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.fsindex = 0;
- task->thread.fsbase = addr;
+ task->thread.fsbase = arg2;
if (doit) {
/* set the selector to 0 to not confuse __switch_to */
loadsegment(fs, 0);
- ret = wrmsrl_safe(MSR_FS_BASE, addr);
+ ret = wrmsrl_safe(MSR_FS_BASE, arg2);
}
put_cpu();
break;
case ARCH_GET_FS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_FS_BASE, base);
else
base = task->thread.fsbase;
- ret = put_user(base, (unsigned long __user *)addr);
+ ret = put_user(base, (unsigned long __user *)arg2);
break;
}
case ARCH_GET_GS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
base = task->thread.gsbase;
- ret = put_user(base, (unsigned long __user *)addr);
+ ret = put_user(base, (unsigned long __user *)arg2);
break;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
# ifdef CONFIG_X86_X32_ABI
case ARCH_MAP_VDSO_X32:
- return prctl_map_vdso(&vdso_image_x32, addr);
+ return prctl_map_vdso(&vdso_image_x32, arg2);
# endif
# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
case ARCH_MAP_VDSO_32:
- return prctl_map_vdso(&vdso_image_32, addr);
+ return prctl_map_vdso(&vdso_image_32, arg2);
# endif
case ARCH_MAP_VDSO_64:
- return prctl_map_vdso(&vdso_image_64, addr);
+ return prctl_map_vdso(&vdso_image_64, arg2);
#endif
default:
@@ -621,10 +624,23 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
return ret;
}
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+ long ret;
+
+ ret = do_arch_prctl_64(current, option, arg2);
+ if (ret == -EINVAL)
+ ret = do_arch_prctl_common(current, option, arg2);
+
+ return ret;
+}
+
+#ifdef CONFIG_IA32_EMULATION
+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
- return do_arch_prctl(current, code, addr);
+ return do_arch_prctl_common(current, option, arg2);
}
+#endif
unsigned long KSTK_ESP(struct task_struct *task)
{
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 2364b23ea3e5..f37d18124648 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -396,12 +396,12 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX)
return -EIO;
/*
- * When changing the segment base, use do_arch_prctl
+ * When changing the segment base, use do_arch_prctl_64
* to set either thread.fs or thread.fsindex and the
* corresponding GDT slot.
*/
if (child->thread.fsbase != value)
- return do_arch_prctl(child, ARCH_SET_FS, value);
+ return do_arch_prctl_64(child, ARCH_SET_FS, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
/*
@@ -410,7 +410,7 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX)
return -EIO;
if (child->thread.gsbase != value)
- return do_arch_prctl(child, ARCH_SET_GS, value);
+ return do_arch_prctl_64(child, ARCH_SET_GS, value);
return 0;
#endif
}
@@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request,
Works just like arch_prctl, except that the arguments
are reversed. */
case PTRACE_ARCH_PRCTL:
- ret = do_arch_prctl(child, data, addr);
+ ret = do_arch_prctl_64(child, data, addr);
break;
#endif
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 047b17a26269..bdcd4139eca9 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -49,7 +49,7 @@ static void pic_unlock(struct kvm_pic *s)
__releases(&s->lock)
{
bool wakeup = s->wakeup_needed;
- struct kvm_vcpu *vcpu, *found = NULL;
+ struct kvm_vcpu *vcpu;
int i;
s->wakeup_needed = false;
@@ -59,16 +59,11 @@ static void pic_unlock(struct kvm_pic *s)
if (wakeup) {
kvm_for_each_vcpu(i, vcpu, s->kvm) {
if (kvm_apic_accept_pic_intr(vcpu)) {
- found = vcpu;
- break;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ return;
}
}
-
- if (!found)
- return;
-
- kvm_make_request(KVM_REQ_EVENT, found);
- kvm_vcpu_kick(found);
}
}
@@ -239,7 +234,7 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq)
int kvm_pic_read_irq(struct kvm *kvm)
{
int irq, irq2, intno;
- struct kvm_pic *s = pic_irqchip(kvm);
+ struct kvm_pic *s = kvm->arch.vpic;
s->output = 0;
@@ -273,7 +268,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
return intno;
}
-void kvm_pic_reset(struct kvm_kpic_state *s)
+static void kvm_pic_reset(struct kvm_kpic_state *s)
{
int irq, i;
struct kvm_vcpu *vcpu;
@@ -422,19 +417,16 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
return ret;
}
-static u32 pic_ioport_read(void *opaque, u32 addr1)
+static u32 pic_ioport_read(void *opaque, u32 addr)
{
struct kvm_kpic_state *s = opaque;
- unsigned int addr;
int ret;
- addr = addr1;
- addr &= 1;
if (s->poll) {
- ret = pic_poll_read(s, addr1);
+ ret = pic_poll_read(s, addr);
s->poll = 0;
} else
- if (addr == 0)
+ if ((addr & 1) == 0)
if (s->read_reg_select)
ret = s->isr;
else
@@ -456,76 +448,64 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
return s->elcr;
}
-static int picdev_in_range(gpa_t addr)
-{
- switch (addr) {
- case 0x20:
- case 0x21:
- case 0xa0:
- case 0xa1:
- case 0x4d0:
- case 0x4d1:
- return 1;
- default:
- return 0;
- }
-}
-
static int picdev_write(struct kvm_pic *s,
gpa_t addr, int len, const void *val)
{
unsigned char data = *(unsigned char *)val;
- if (!picdev_in_range(addr))
- return -EOPNOTSUPP;
if (len != 1) {
pr_pic_unimpl("non byte write\n");
return 0;
}
- pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
+ pic_lock(s);
pic_ioport_write(&s->pics[addr >> 7], addr, data);
+ pic_unlock(s);
break;
case 0x4d0:
case 0x4d1:
+ pic_lock(s);
elcr_ioport_write(&s->pics[addr & 1], addr, data);
+ pic_unlock(s);
break;
+ default:
+ return -EOPNOTSUPP;
}
- pic_unlock(s);
return 0;
}
static int picdev_read(struct kvm_pic *s,
gpa_t addr, int len, void *val)
{
- unsigned char data = 0;
- if (!picdev_in_range(addr))
- return -EOPNOTSUPP;
+ unsigned char *data = (unsigned char *)val;
if (len != 1) {
memset(val, 0, len);
pr_pic_unimpl("non byte read\n");
return 0;
}
- pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
- data = pic_ioport_read(&s->pics[addr >> 7], addr);
+ pic_lock(s);
+ *data = pic_ioport_read(&s->pics[addr >> 7], addr);
+ pic_unlock(s);
break;
case 0x4d0:
case 0x4d1:
- data = elcr_ioport_read(&s->pics[addr & 1], addr);
+ pic_lock(s);
+ *data = elcr_ioport_read(&s->pics[addr & 1], addr);
+ pic_unlock(s);
break;
+ default:
+ return -EOPNOTSUPP;
}
- *(unsigned char *)val = data;
- pic_unlock(s);
return 0;
}
@@ -576,7 +556,7 @@ static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
*/
static void pic_irq_request(struct kvm *kvm, int level)
{
- struct kvm_pic *s = pic_irqchip(kvm);
+ struct kvm_pic *s = kvm->arch.vpic;
if (!s->output)
s->wakeup_needed = true;
@@ -660,9 +640,11 @@ void kvm_pic_destroy(struct kvm *kvm)
if (!vpic)
return;
+ mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+ mutex_unlock(&kvm->slots_lock);
kvm->arch.vpic = NULL;
kfree(vpic);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 289270a6aecb..bdff437acbcb 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -266,11 +266,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
spin_unlock(&ioapic->lock);
}
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
- struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-
- if (!ioapic)
+ if (!ioapic_in_kernel(kvm))
return;
kvm_make_scan_ioapic_request(kvm);
}
@@ -315,7 +313,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
&& ioapic->irr & (1 << index))
ioapic_service(ioapic, index, false);
- kvm_vcpu_request_scan_ioapic(ioapic->kvm);
+ kvm_make_scan_ioapic_request(ioapic->kvm);
break;
}
}
@@ -624,10 +622,8 @@ int kvm_ioapic_init(struct kvm *kvm)
if (ret < 0) {
kvm->arch.vioapic = NULL;
kfree(ioapic);
- return ret;
}
- kvm_vcpu_request_scan_ioapic(kvm);
return ret;
}
@@ -639,36 +635,32 @@ void kvm_ioapic_destroy(struct kvm *kvm)
return;
cancel_delayed_work_sync(&ioapic->eoi_inject);
+ mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ mutex_unlock(&kvm->slots_lock);
kvm->arch.vioapic = NULL;
kfree(ioapic);
}
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
- struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
- if (!ioapic)
- return -EINVAL;
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
spin_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
state->irr &= ~ioapic->irr_delivered;
spin_unlock(&ioapic->lock);
- return 0;
}
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
- struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
- if (!ioapic)
- return -EINVAL;
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
ioapic->irr = 0;
ioapic->irr_delivered = 0;
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_make_scan_ioapic_request(kvm);
kvm_ioapic_inject_all(ioapic, state->irr);
spin_unlock(&ioapic->lock);
- return 0;
}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 1cc6e54436db..29ce19732ccf 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -105,17 +105,13 @@ do { \
#define ASSERT(x) do { } while (0)
#endif
-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
-{
- return kvm->arch.vioapic;
-}
-
static inline int ioapic_in_kernel(struct kvm *kvm)
{
- int ret;
+ int mode = kvm->arch.irqchip_mode;
- ret = (ioapic_irqchip(kvm) != NULL);
- return ret;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
@@ -132,8 +128,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq,
struct dest_map *dest_map);
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
ulong *ioapic_handled_vectors);
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 60d91c9d160c..5c24811e8b0b 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -60,7 +60,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
if (irqchip_split(v->kvm))
return pending_userspace_extint(v);
else
- return pic_irqchip(v->kvm)->output;
+ return v->kvm->arch.vpic->output;
} else
return 0;
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 40d5b2cf6061..0edd22c3344c 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -78,40 +78,42 @@ void kvm_pic_destroy(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
-static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
-{
- return kvm->arch.vpic;
-}
-
static inline int pic_in_kernel(struct kvm *kvm)
{
- int ret;
+ int mode = kvm->arch.irqchip_mode;
- ret = (pic_irqchip(kvm) != NULL);
- return ret;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
static inline int irqchip_split(struct kvm *kvm)
{
- return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT;
+ int mode = kvm->arch.irqchip_mode;
+
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_SPLIT;
}
static inline int irqchip_kernel(struct kvm *kvm)
{
- return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL;
+ int mode = kvm->arch.irqchip_mode;
+
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
static inline int irqchip_in_kernel(struct kvm *kvm)
{
- bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE;
+ int mode = kvm->arch.irqchip_mode;
- /* Matches with wmb after initializing kvm->irq_routing. */
+ /* Matches smp_wmb() when setting irqchip_mode */
smp_rmb();
- return ret;
+ return mode > KVM_IRQCHIP_INIT_IN_PROGRESS;
}
-void kvm_pic_reset(struct kvm_kpic_state *s);
-
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 6825cd36d13b..4517a4c2ac3a 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -42,7 +42,7 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
- struct kvm_pic *pic = pic_irqchip(kvm);
+ struct kvm_pic *pic = kvm->arch.vpic;
return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
}
@@ -232,11 +232,11 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
goto unlock;
}
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
- if (!ioapic_in_kernel(kvm))
+ if (!irqchip_kernel(kvm))
goto unlock;
kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
- kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
+ kvm_pic_clear_all(kvm->arch.vpic, irq_source_id);
unlock:
mutex_unlock(&kvm->irq_lock);
}
@@ -278,38 +278,35 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int r = -EINVAL;
- int delta;
- unsigned max_pin;
+ /* also allow creation of routes during KVM_IRQCHIP_INIT_IN_PROGRESS */
+ if (kvm->arch.irqchip_mode == KVM_IRQCHIP_NONE)
+ return -EINVAL;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
switch (ue->type) {
case KVM_IRQ_ROUTING_IRQCHIP:
- delta = 0;
+ if (irqchip_split(kvm))
+ return -EINVAL;
+ e->irqchip.pin = ue->u.irqchip.pin;
switch (ue->u.irqchip.irqchip) {
case KVM_IRQCHIP_PIC_SLAVE:
- delta = 8;
+ e->irqchip.pin += PIC_NUM_PINS / 2;
/* fall through */
case KVM_IRQCHIP_PIC_MASTER:
- if (!pic_in_kernel(kvm))
- goto out;
-
+ if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
+ return -EINVAL;
e->set = kvm_set_pic_irq;
- max_pin = PIC_NUM_PINS;
break;
case KVM_IRQCHIP_IOAPIC:
- if (!ioapic_in_kernel(kvm))
- goto out;
-
- max_pin = KVM_IOAPIC_NUM_PINS;
+ if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS)
+ return -EINVAL;
e->set = kvm_set_ioapic_irq;
break;
default:
- goto out;
+ return -EINVAL;
}
e->irqchip.irqchip = ue->u.irqchip.irqchip;
- e->irqchip.pin = ue->u.irqchip.pin + delta;
- if (e->irqchip.pin >= max_pin)
- goto out;
break;
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
@@ -318,7 +315,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.data = ue->u.msi.data;
if (kvm_msi_route_invalid(kvm, e))
- goto out;
+ return -EINVAL;
break;
case KVM_IRQ_ROUTING_HV_SINT:
e->set = kvm_hv_set_sint;
@@ -326,12 +323,10 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->hv_sint.sint = ue->u.hv_sint.sint;
break;
default:
- goto out;
+ return -EINVAL;
}
- r = 0;
-out:
- return r;
+ return 0;
}
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cfdb0d9389d1..c1a12b94e1fd 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
-static bool __read_mostly vmm_exclusive = 1;
-module_param(vmm_exclusive, bool, S_IRUGO);
-
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
@@ -910,8 +907,6 @@ static void nested_release_page_clean(struct page *page)
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
-static void kvm_cpu_vmxon(u64 addr);
-static void kvm_cpu_vmxoff(void);
static bool vmx_xsaves_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -2231,15 +2226,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(phys_addr);
- else if (!already_loaded)
- loaded_vmcs_clear(vmx->loaded_vmcs);
-
if (!already_loaded) {
+ loaded_vmcs_clear(vmx->loaded_vmcs);
local_irq_disable();
crash_disable_local_vmclear(cpu);
@@ -2317,11 +2307,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
vmx_vcpu_pi_put(vcpu);
__vmx_load_host_state(to_vmx(vcpu));
- if (!vmm_exclusive) {
- __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
- vcpu->cpu = -1;
- kvm_cpu_vmxoff();
- }
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
@@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ cr4_set_bits(X86_CR4_VMXE);
intel_pt_handle_vmx(1);
asm volatile (ASM_VMX_VMXON_RAX
@@ -3458,12 +3444,8 @@ static int hardware_enable(void)
/* enable and lock */
wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
}
- cr4_set_bits(X86_CR4_VMXE);
-
- if (vmm_exclusive) {
- kvm_cpu_vmxon(phys_addr);
- ept_sync_global();
- }
+ kvm_cpu_vmxon(phys_addr);
+ ept_sync_global();
native_store_gdt(this_cpu_ptr(&host_gdt));
@@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
intel_pt_handle_vmx(0);
+ cr4_clear_bits(X86_CR4_VMXE);
}
static void hardware_disable(void)
{
- if (vmm_exclusive) {
- vmclear_local_loaded_vmcss();
- kvm_cpu_vmxoff();
- }
- cr4_clear_bits(X86_CR4_VMXE);
+ vmclear_local_loaded_vmcss();
+ kvm_cpu_vmxoff();
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -6221,7 +6201,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* page table accesses are reads or writes.
*/
u64 eptp = nested_ept_get_cr3(vcpu);
- if (eptp & VMX_EPT_AD_ENABLE_BIT)
+ if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
}
@@ -9170,11 +9150,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->loaded_vmcs->shadow_vmcs = NULL;
if (!vmx->loaded_vmcs->vmcs)
goto free_msrs;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
loaded_vmcs_init(vmx->loaded_vmcs);
- if (!vmm_exclusive)
- kvm_cpu_vmxoff();
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6bc47e2712c8..34bf64fb4dea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1443,10 +1443,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
- s64 usdiff;
bool matched;
bool already_matched;
u64 data = msr->data;
+ bool synchronizing = false;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
@@ -1454,51 +1454,34 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
- int faulted = 0;
-
- /* n.b - signed multiplication and division required */
- usdiff = data - kvm->arch.last_tsc_write;
-#ifdef CONFIG_X86_64
- usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
-#else
- /* do_div() only does unsigned */
- asm("1: idivl %[divisor]\n"
- "2: xor %%edx, %%edx\n"
- " movl $0, %[faulted]\n"
- "3:\n"
- ".section .fixup,\"ax\"\n"
- "4: movl $1, %[faulted]\n"
- " jmp 3b\n"
- ".previous\n"
-
- _ASM_EXTABLE(1b, 4b)
-
- : "=A"(usdiff), [faulted] "=r" (faulted)
- : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
-
-#endif
- do_div(elapsed, 1000);
- usdiff -= elapsed;
- if (usdiff < 0)
- usdiff = -usdiff;
-
- /* idivl overflow => difference is larger than USEC_PER_SEC */
- if (faulted)
- usdiff = USEC_PER_SEC;
- } else
- usdiff = USEC_PER_SEC; /* disable TSC match window below */
+ if (data == 0 && msr->host_initiated) {
+ /*
+ * detection of vcpu initialization -- need to sync
+ * with other vCPUs. This particularly helps to keep
+ * kvm_clock stable after CPU hotplug
+ */
+ synchronizing = true;
+ } else {
+ u64 tsc_exp = kvm->arch.last_tsc_write +
+ nsec_to_cycles(vcpu, elapsed);
+ u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
+ /*
+ * Special case: TSC write with a small delta (1 second)
+ * of virtual cycle time against real time is
+ * interpreted as an attempt to synchronize the CPU.
+ */
+ synchronizing = data < tsc_exp + tsc_hz &&
+ data + tsc_hz > tsc_exp;
+ }
+ }
/*
- * Special case: TSC write with a small delta (1 second) of virtual
- * cycle time against real time is interpreted as an attempt to
- * synchronize the CPU.
- *
* For a reliable TSC, we can match TSC offsets, and for an unstable
* TSC, we add elapsed time in this computation. We could let the
* compensation code attempt to catch up if we fall behind, but
* it's better to try to match offsets from the beginning.
*/
- if (usdiff < USEC_PER_SEC &&
+ if (synchronizing &&
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
@@ -2154,6 +2137,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_VM_HSAVE_PA:
case MSR_AMD64_PATCH_LOADER:
case MSR_AMD64_BU_CFG2:
+ case MSR_AMD64_DC_CFG:
break;
case MSR_EFER:
@@ -2416,6 +2400,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_FAM10H_MMIO_CONF_BASE:
case MSR_AMD64_BU_CFG2:
case MSR_IA32_PERF_CTL:
+ case MSR_AMD64_DC_CFG:
msr_info->data = 0;
break;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -3715,22 +3700,21 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
{
+ struct kvm_pic *pic = kvm->arch.vpic;
int r;
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- memcpy(&chip->chip.pic,
- &pic_irqchip(kvm)->pics[0],
+ memcpy(&chip->chip.pic, &pic->pics[0],
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_PIC_SLAVE:
- memcpy(&chip->chip.pic,
- &pic_irqchip(kvm)->pics[1],
+ memcpy(&chip->chip.pic, &pic->pics[1],
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_IOAPIC:
- r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
+ kvm_get_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
@@ -3741,32 +3725,31 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
{
+ struct kvm_pic *pic = kvm->arch.vpic;
int r;
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- spin_lock(&pic_irqchip(kvm)->lock);
- memcpy(&pic_irqchip(kvm)->pics[0],
- &chip->chip.pic,
+ spin_lock(&pic->lock);
+ memcpy(&pic->pics[0], &chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic->lock);
break;
case KVM_IRQCHIP_PIC_SLAVE:
- spin_lock(&pic_irqchip(kvm)->lock);
- memcpy(&pic_irqchip(kvm)->pics[1],
- &chip->chip.pic,
+ spin_lock(&pic->lock);
+ memcpy(&pic->pics[1], &chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic->lock);
break;
case KVM_IRQCHIP_IOAPIC:
- r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
+ kvm_set_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
break;
}
- kvm_pic_update_irq(pic_irqchip(kvm));
+ kvm_pic_update_irq(pic);
return r;
}
@@ -3928,9 +3911,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
goto split_irqchip_unlock;
if (kvm->created_vcpus)
goto split_irqchip_unlock;
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
r = kvm_setup_empty_irq_routing(kvm);
- if (r)
+ if (r) {
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
+ /* Pairs with smp_rmb() when reading irqchip_mode */
+ smp_wmb();
goto split_irqchip_unlock;
+ }
/* Pairs with irqchip_in_kernel. */
smp_wmb();
kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
@@ -4012,20 +4000,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_ioapic_init(kvm);
if (r) {
- mutex_lock(&kvm->slots_lock);
kvm_pic_destroy(kvm);
- mutex_unlock(&kvm->slots_lock);
goto create_irqchip_unlock;
}
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
r = kvm_setup_default_irq_routing(kvm);
if (r) {
- mutex_lock(&kvm->slots_lock);
- mutex_lock(&kvm->irq_lock);
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
+ /* Pairs with smp_rmb() when reading irqchip_mode */
+ smp_wmb();
kvm_ioapic_destroy(kvm);
kvm_pic_destroy(kvm);
- mutex_unlock(&kvm->irq_lock);
- mutex_unlock(&kvm->slots_lock);
goto create_irqchip_unlock;
}
/* Write kvm->irq_routing before enabling irqchip_in_kernel. */
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index e7e7055a8658..69f0827d5f53 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
ifeq ($(CONFIG_X86_32),y)
-obj-y += checksum_32.o
+obj-y += checksum_32.o syscalls_32.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index e59eef20647b..b291ca5cf66b 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
return -ENOSYS;
}
-extern long arch_prctl(struct task_struct *task, int code,
+extern long arch_prctl(struct task_struct *task, int option,
unsigned long __user *addr);
#endif
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c
index 96eb2bd28832..8431e87ac333 100644
--- a/arch/x86/um/os-Linux/prctl.c
+++ b/arch/x86/um/os-Linux/prctl.c
@@ -6,7 +6,7 @@
#include <sys/ptrace.h>
#include <asm/ptrace.h>
-int os_arch_prctl(int pid, int code, unsigned long *addr)
+int os_arch_prctl(int pid, int option, unsigned long *arg2)
{
- return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
+ return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
}
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644
index 000000000000..627d68836b16
--- /dev/null
+++ b/arch/x86/um/syscalls_32.c
@@ -0,0 +1,7 @@
+#include <linux/syscalls.h>
+#include <os.h>
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+ return -EINVAL;
+}
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index 10d907098c26..58f51667e2e4 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -7,13 +7,15 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
+#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <asm/prctl.h> /* XXX This should get the constants from libc */
#include <os.h>
-long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
+long arch_prctl(struct task_struct *task, int option,
+ unsigned long __user *arg2)
{
- unsigned long *ptr = addr, tmp;
+ unsigned long *ptr = arg2, tmp;
long ret;
int pid = task->mm->context.id.u.pid;
@@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
* arch_prctl is run on the host, then the registers are read
* back.
*/
- switch (code) {
+ switch (option) {
case ARCH_SET_FS:
case ARCH_SET_GS:
ret = restore_registers(pid, &current->thread.regs.regs);
@@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
ptr = &tmp;
}
- ret = os_arch_prctl(pid, code, ptr);
+ ret = os_arch_prctl(pid, option, ptr);
if (ret)
return ret;
- switch (code) {
+ switch (option) {
case ARCH_SET_FS:
current->thread.arch.fs = (unsigned long) ptr;
ret = save_registers(pid, &current->thread.regs.regs);
@@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
ret = save_registers(pid, &current->thread.regs.regs);
break;
case ARCH_GET_FS:
- ret = put_user(tmp, addr);
+ ret = put_user(tmp, arg2);
break;
case ARCH_GET_GS:
- ret = put_user(tmp, addr);
+ ret = put_user(tmp, arg2);
break;
}
return ret;
}
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
- return arch_prctl(current, code, (unsigned long __user *) addr);
+ return arch_prctl(current, option, (unsigned long __user *) arg2);
}
void arch_switch_to(struct task_struct *to)
diff --git a/fs/exec.c b/fs/exec.c
index 65145a3df065..72934df68471 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm)
else
set_dumpable(current->mm, suid_dumpable);
+ arch_setup_new_exec();
perf_event_exec();
__set_task_comm(current, kbasename(bprm->filename), true);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index aef47be2a5c1..af9dbc44fd92 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
int, const char __user *);
+asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2);
+
/*
* For most but not all architectures, "am I in a compat syscall?" and
* "am I a compat task?" are the same question. For architectures on which
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7e74ae4d99bb..397b7b5b1933 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -502,10 +502,10 @@ int __must_check vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
#ifdef __KVM_HAVE_IOAPIC
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
void kvm_arch_post_irq_routing_update(struct kvm *kvm);
#else
-static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
}
static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 58373875e8ee..55125d674338 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n,
{ }
#endif /* CONFIG_HARDENED_USERCOPY */
+#ifndef arch_setup_new_exec
+static inline void arch_setup_new_exec(void) { }
+#endif
+
#endif /* __KERNEL__ */
#endif /* _LINUX_THREAD_INFO_H */
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index 2c9082ba6137..116b7735ee9f 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -148,6 +148,7 @@ cat << EOF
#define __IGNORE_sysfs
#define __IGNORE_uselib
#define __IGNORE__sysctl
+#define __IGNORE_arch_prctl
/* ... including the "new" 32-bit uid syscalls */
#define __IGNORE_lchown32
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 4d28a9ddbee0..a8d540398bbd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -490,7 +490,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
mutex_lock(&kvm->irq_lock);
hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
mutex_unlock(&kvm->irq_lock);
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_arch_post_irq_ack_notifier_list_update(kvm);
}
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -500,7 +500,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
hlist_del_init_rcu(&kian->link);
mutex_unlock(&kvm->irq_lock);
synchronize_srcu(&kvm->irq_srcu);
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_arch_post_irq_ack_notifier_list_update(kvm);
}
#endif
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 3bcc9990adf7..cc30d01a56be 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -142,8 +142,8 @@ static int setup_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int r = -EINVAL;
struct kvm_kernel_irq_routing_entry *ei;
+ int r;
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
@@ -153,20 +153,19 @@ static int setup_routing_entry(struct kvm *kvm,
if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
- return r;
+ return -EINVAL;
e->gsi = ue->gsi;
e->type = ue->type;
r = kvm_set_routing_entry(kvm, e, ue);
if (r)
- goto out;
+ return r;
if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
hlist_add_head(&e->link, &rt->map[e->gsi]);
- r = 0;
-out:
- return r;
+
+ return 0;
}
void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f489167839c4..357e67cba32e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3069,8 +3069,11 @@ static long kvm_vm_ioctl(struct file *filp,
routing.nr * sizeof(*entries)))
goto out_free_irq_routing;
}
+ /* avoid races with KVM_CREATE_IRQCHIP on x86 */
+ mutex_lock(&kvm->lock);
r = kvm_set_irq_routing(kvm, entries, routing.nr,
routing.flags);
+ mutex_unlock(&kvm->lock);
out_free_irq_routing:
vfree(entries);
break;