summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S7
-rw-r--r--arch/x86/crypto/camellia_glue.c4
-rw-r--r--arch/x86/crypto/glue_helper.c3
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c4
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c4
-rw-r--r--arch/x86/entry/common.c9
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/hyperv/hv_init.c2
-rw-r--r--arch/x86/include/asm/cacheflush.h85
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h10
-rw-r--r--arch/x86/include/asm/intel_pmc_ipc.h23
-rw-r--r--arch/x86/include/asm/intel_scu_ipc.h8
-rw-r--r--arch/x86/include/asm/iosf_mbi.h87
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h17
-rw-r--r--arch/x86/include/asm/pci.h7
-rw-r--r--arch/x86/include/asm/pmem.h5
-rw-r--r--arch/x86/include/asm/set_memory.h87
-rw-r--r--arch/x86/include/asm/string_64.h1
-rw-r--r--arch/x86/include/asm/thread_info.h26
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/unwind.h6
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/uapi/asm/Kbuild59
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h7
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/include/uapi/asm/vmx.h25
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c3
-rw-r--r--arch/x86/kernel/early-quirks.c1
-rw-r--r--arch/x86/kernel/ftrace.c10
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/kprobes/core.c1
-rw-r--r--arch/x86/kernel/kprobes/opt.c1
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/module.c2
-rw-r--r--arch/x86/kernel/stacktrace.c96
-rw-r--r--arch/x86/kernel/tboot.c3
-rw-r--r--arch/x86/kernel/unwind_frame.c2
-rw-r--r--arch/x86/kvm/Kconfig12
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/assigned-dev.c1058
-rw-r--r--arch/x86/kvm/assigned-dev.h32
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/cpuid.h11
-rw-r--r--arch/x86/kvm/emulate.c23
-rw-r--r--arch/x86/kvm/i8259.c72
-rw-r--r--arch/x86/kvm/ioapic.c28
-rw-r--r--arch/x86/kvm/ioapic.h16
-rw-r--r--arch/x86/kvm/iommu.c356
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/irq.h32
-rw-r--r--arch/x86/kvm/irq_comm.c50
-rw-r--r--arch/x86/kvm/lapic.c26
-rw-r--r--arch/x86/kvm/mmu.c19
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/page_track.c4
-rw-r--r--arch/x86/kvm/paging_tmpl.h58
-rw-r--r--arch/x86/kvm/svm.c14
-rw-r--r--arch/x86/kvm/vmx.c435
-rw-r--r--arch/x86/kvm/x86.c260
-rw-r--r--arch/x86/kvm/x86.h36
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/mm/testmmiotrace.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c8
-rw-r--r--arch/x86/pci/i386.c47
-rw-r--r--arch/x86/pci/pcbios.c2
-rw-r--r--arch/x86/platform/efi/efi.c2
-rw-r--r--arch/x86/platform/intel/iosf_mbi.c49
-rw-r--r--arch/x86/realmode/init.c2
79 files changed, 1042 insertions, 2266 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8d4f87e5bba3..cd18994a9555 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -160,6 +160,7 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER && STACK_VALIDATION
select HAVE_STACK_VALIDATION if X86_64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index a916c4a61165..5f6a5af9c489 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -65,7 +65,6 @@
#include <linux/linkage.h>
#include <asm/inst.h>
-#define CONCAT(a,b) a##b
#define VMOVDQ vmovdqu
#define xdata0 %xmm0
@@ -92,8 +91,6 @@
#define num_bytes %r8
#define tmp %r10
-#define DDQ(i) CONCAT(ddq_add_,i)
-#define XMM(i) CONCAT(%xmm, i)
#define DDQ_DATA 0
#define XDATA 1
#define KEY_128 1
@@ -131,12 +128,12 @@ ddq_add_8:
/* generate a unique variable for ddq_add_x */
.macro setddq n
- var_ddq_add = DDQ(\n)
+ var_ddq_add = ddq_add_\n
.endm
/* generate a unique variable for xmm register */
.macro setxdata n
- var_xdata = XMM(\n)
+ var_xdata = %xmm\n
.endm
/* club the numeric 'id' to the symbol 'name' */
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index aa76cad9d262..af4840ab2a3d 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -1522,7 +1522,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[2 * 4];
+ le128 buf[2 * 4];
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
@@ -1540,7 +1540,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[2 * 4];
+ le128 buf[2 * 4];
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 260a060d7275..24ac9fad832d 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -27,6 +27,7 @@
#include <linux/module.h>
#include <crypto/b128ops.h>
+#include <crypto/gf128mul.h>
#include <crypto/internal/skcipher.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
@@ -457,7 +458,7 @@ void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
le128 ivblk = *iv;
/* generate next IV */
- le128_gf128mul_x_ble(iv, &ivblk);
+ gf128mul_x_ble(iv, &ivblk);
/* CC <- T xor C */
u128_xor(dst, src, (u128 *)&ivblk);
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 644f97ab8cac..ac0e831943f5 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -328,7 +328,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[SERPENT_PARALLEL_BLOCKS];
+ le128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
@@ -355,7 +355,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[SERPENT_PARALLEL_BLOCKS];
+ le128 buf[SERPENT_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->crypt_ctx,
.fpu_enabled = false,
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 2ebb5e9789f3..243e90a4b5d9 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -296,7 +296,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[3];
+ le128 buf[3];
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
@@ -314,7 +314,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[3];
+ le128 buf[3];
struct xts_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 370c42c7f046..cdefcfdd9e63 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -22,6 +22,7 @@
#include <linux/context_tracking.h>
#include <linux/user-return-notifier.h>
#include <linux/uprobes.h>
+#include <linux/livepatch.h>
#include <asm/desc.h>
#include <asm/traps.h>
@@ -130,14 +131,13 @@ static long syscall_trace_enter(struct pt_regs *regs)
#define EXIT_TO_USERMODE_LOOP_FLAGS \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY)
+ _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING)
static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
{
/*
* In order to return to user mode, we need to have IRQs off with
- * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY,
- * _TIF_UPROBE, or _TIF_NEED_RESCHED set. Several of these flags
+ * none of EXIT_TO_USERMODE_LOOP_FLAGS set. Several of these flags
* can be set at any time on preemptable kernels if we have IRQs on,
* so we need to loop. Disabling preemption wouldn't help: doing the
* work to clear some of the flags can sleep.
@@ -164,6 +164,9 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
if (cached_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
+ if (cached_flags & _TIF_PATCH_PENDING)
+ klp_update_patch_state(current);
+
/* Disable IRQs and retry */
local_irq_disable();
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 0af59fa789ea..448ac2161112 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -226,7 +226,7 @@
217 i386 pivot_root sys_pivot_root
218 i386 mincore sys_mincore
219 i386 madvise sys_madvise
-220 i386 getdents64 sys_getdents64 compat_sys_getdents64
+220 i386 getdents64 sys_getdents64
221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
# 222 is unused
# 223 is unused
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 2b01421f7d0f..5b882cc0c0e9 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -25,7 +25,7 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/clockchips.h>
-
+#include <linux/hyperv.h>
#ifdef CONFIG_HYPERV_TSCPAGE
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index e7e1942edff7..8b4140f6724f 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -5,93 +5,8 @@
#include <asm-generic/cacheflush.h>
#include <asm/special_insns.h>
-/*
- * The set_memory_* API can be used to change various attributes of a virtual
- * address range. The attributes include:
- * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack
- * Executability : eXeutable, NoteXecutable
- * Read/Write : ReadOnly, ReadWrite
- * Presence : NotPresent
- *
- * Within a category, the attributes are mutually exclusive.
- *
- * The implementation of this API will take care of various aspects that
- * are associated with changing such attributes, such as:
- * - Flushing TLBs
- * - Flushing CPU caches
- * - Making sure aliases of the memory behind the mapping don't violate
- * coherency rules as defined by the CPU in the system.
- *
- * What this API does not do:
- * - Provide exclusion between various callers - including callers that
- * operation on other mappings of the same physical page
- * - Restore default attributes when a page is freed
- * - Guarantee that mappings other than the requested one are
- * in any state, other than that these do not violate rules for
- * the CPU you have. Do not depend on any effects on other mappings,
- * CPUs other than the one you have may have more relaxed rules.
- * The caller is required to take care of these.
- */
-
-int _set_memory_uc(unsigned long addr, int numpages);
-int _set_memory_wc(unsigned long addr, int numpages);
-int _set_memory_wt(unsigned long addr, int numpages);
-int _set_memory_wb(unsigned long addr, int numpages);
-int set_memory_uc(unsigned long addr, int numpages);
-int set_memory_wc(unsigned long addr, int numpages);
-int set_memory_wt(unsigned long addr, int numpages);
-int set_memory_wb(unsigned long addr, int numpages);
-int set_memory_x(unsigned long addr, int numpages);
-int set_memory_nx(unsigned long addr, int numpages);
-int set_memory_ro(unsigned long addr, int numpages);
-int set_memory_rw(unsigned long addr, int numpages);
-int set_memory_np(unsigned long addr, int numpages);
-int set_memory_4k(unsigned long addr, int numpages);
-
-int set_memory_array_uc(unsigned long *addr, int addrinarray);
-int set_memory_array_wc(unsigned long *addr, int addrinarray);
-int set_memory_array_wt(unsigned long *addr, int addrinarray);
-int set_memory_array_wb(unsigned long *addr, int addrinarray);
-
-int set_pages_array_uc(struct page **pages, int addrinarray);
-int set_pages_array_wc(struct page **pages, int addrinarray);
-int set_pages_array_wt(struct page **pages, int addrinarray);
-int set_pages_array_wb(struct page **pages, int addrinarray);
-
-/*
- * For legacy compatibility with the old APIs, a few functions
- * are provided that work on a "struct page".
- * These functions operate ONLY on the 1:1 kernel mapping of the
- * memory that the struct page represents, and internally just
- * call the set_memory_* function. See the description of the
- * set_memory_* function for more details on conventions.
- *
- * These APIs should be considered *deprecated* and are likely going to
- * be removed in the future.
- * The reason for this is the implicit operation on the 1:1 mapping only,
- * making this not a generally useful API.
- *
- * Specifically, many users of the old APIs had a virtual address,
- * called virt_to_page() or vmalloc_to_page() on that address to
- * get a struct page* that the old API required.
- * To convert these cases, use set_memory_*() on the original
- * virtual address, do not use these functions.
- */
-
-int set_pages_uc(struct page *page, int numpages);
-int set_pages_wb(struct page *page, int numpages);
-int set_pages_x(struct page *page, int numpages);
-int set_pages_nx(struct page *page, int numpages);
-int set_pages_ro(struct page *page, int numpages);
-int set_pages_rw(struct page *page, int numpages);
-
-
void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
-extern int kernel_set_to_readonly;
-void set_kernel_text_rw(void);
-void set_kernel_text_ro(void);
-
#endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 29e53ea7d764..ed8b66de541f 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -125,16 +125,6 @@ static inline void le128_inc(le128 *i)
i->b = cpu_to_le64(b);
}
-static inline void le128_gf128mul_x_ble(le128 *dst, const le128 *src)
-{
- u64 a = le64_to_cpu(src->a);
- u64 b = le64_to_cpu(src->b);
- u64 _tt = ((s64)a >> 63) & 0x87;
-
- dst->a = cpu_to_le64((a << 1) ^ (b >> 63));
- dst->b = cpu_to_le64((b << 1) ^ _tt);
-}
-
extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
struct scatterlist *dst,
diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h
index 4291b6a5ddf7..fac89eb78a6b 100644
--- a/arch/x86/include/asm/intel_pmc_ipc.h
+++ b/arch/x86/include/asm/intel_pmc_ipc.h
@@ -23,6 +23,11 @@
#define IPC_ERR_EMSECURITY 6
#define IPC_ERR_UNSIGNEDKERNEL 7
+/* GCR reg offsets from gcr base*/
+#define PMC_GCR_PMC_CFG_REG 0x08
+#define PMC_GCR_TELEM_DEEP_S0IX_REG 0x78
+#define PMC_GCR_TELEM_SHLW_S0IX_REG 0x80
+
#if IS_ENABLED(CONFIG_INTEL_PMC_IPC)
int intel_pmc_ipc_simple_command(int cmd, int sub);
@@ -31,6 +36,9 @@ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen,
int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen,
u32 *out, u32 outlen);
int intel_pmc_s0ix_counter_read(u64 *data);
+int intel_pmc_gcr_read(u32 offset, u32 *data);
+int intel_pmc_gcr_write(u32 offset, u32 data);
+int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val);
#else
@@ -56,6 +64,21 @@ static inline int intel_pmc_s0ix_counter_read(u64 *data)
return -EINVAL;
}
+static inline int intel_pmc_gcr_read(u32 offset, u32 *data)
+{
+ return -EINVAL;
+}
+
+static inline int intel_pmc_gcr_write(u32 offset, u32 data)
+{
+ return -EINVAL;
+}
+
+static inline int intel_pmc_gcr_update(u32 offset, u32 mask, u32 val)
+{
+ return -EINVAL;
+}
+
#endif /*CONFIG_INTEL_PMC_IPC*/
#endif
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
index 4fb1d0abef95..81d3d8776fd9 100644
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ b/arch/x86/include/asm/intel_scu_ipc.h
@@ -3,6 +3,9 @@
#include <linux/notifier.h>
+#define IPCMSG_INDIRECT_READ 0x02
+#define IPCMSG_INDIRECT_WRITE 0x05
+
#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */
#define IPCMSG_WARM_RESET 0xF0
@@ -45,7 +48,10 @@ int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask);
/* Issue commands to the SCU with or without data */
int intel_scu_ipc_simple_command(int cmd, int sub);
int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
- u32 *out, int outlen);
+ u32 *out, int outlen);
+int intel_scu_ipc_raw_command(int cmd, int sub, u8 *in, int inlen,
+ u32 *out, int outlen, u32 dptr, u32 sptr);
+
/* I2C control api */
int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data);
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index b41ee164930a..c313cac36f56 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -5,6 +5,8 @@
#ifndef IOSF_MBI_SYMS_H
#define IOSF_MBI_SYMS_H
+#include <linux/notifier.h>
+
#define MBI_MCR_OFFSET 0xD0
#define MBI_MDR_OFFSET 0xD4
#define MBI_MCRX_OFFSET 0xD8
@@ -47,6 +49,10 @@
#define QRK_MBI_UNIT_MM 0x05
#define QRK_MBI_UNIT_SOC 0x31
+/* Action values for the pmic_bus_access_notifier functions */
+#define MBI_PMIC_BUS_ACCESS_BEGIN 1
+#define MBI_PMIC_BUS_ACCESS_END 2
+
#if IS_ENABLED(CONFIG_IOSF_MBI)
bool iosf_mbi_available(void);
@@ -88,6 +94,65 @@ int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr);
*/
int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);
+/**
+ * iosf_mbi_punit_acquire() - Acquire access to the P-Unit
+ *
+ * One some systems the P-Unit accesses the PMIC to change various voltages
+ * through the same bus as other kernel drivers use for e.g. battery monitoring.
+ *
+ * If a driver sends requests to the P-Unit which require the P-Unit to access
+ * the PMIC bus while another driver is also accessing the PMIC bus various bad
+ * things happen.
+ *
+ * To avoid these problems this function must be called before accessing the
+ * P-Unit or the PMIC, be it through iosf_mbi* functions or through other means.
+ *
+ * Note on these systems the i2c-bus driver will request a sempahore from the
+ * P-Unit for exclusive access to the PMIC bus when i2c drivers are accessing
+ * it, but this does not appear to be sufficient, we still need to avoid making
+ * certain P-Unit requests during the access window to avoid problems.
+ *
+ * This function locks a mutex, as such it may sleep.
+ */
+void iosf_mbi_punit_acquire(void);
+
+/**
+ * iosf_mbi_punit_release() - Release access to the P-Unit
+ */
+void iosf_mbi_punit_release(void);
+
+/**
+ * iosf_mbi_register_pmic_bus_access_notifier - Register PMIC bus notifier
+ *
+ * This function can be used by drivers which may need to acquire P-Unit
+ * managed resources from interrupt context, where iosf_mbi_punit_acquire()
+ * can not be used.
+ *
+ * This function allows a driver to register a notifier to get notified (in a
+ * process context) before other drivers start accessing the PMIC bus.
+ *
+ * This allows the driver to acquire any resources, which it may need during
+ * the window the other driver is accessing the PMIC, before hand.
+ *
+ * @nb: notifier_block to register
+ */
+int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb);
+
+/**
+ * iosf_mbi_register_pmic_bus_access_notifier - Unregister PMIC bus notifier
+ *
+ * @nb: notifier_block to unregister
+ */
+int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb);
+
+/**
+ * iosf_mbi_call_pmic_bus_access_notifier_chain - Call PMIC bus notifier chain
+ *
+ * @val: action to pass into listener's notifier_call function
+ * @v: data pointer to pass into listener's notifier_call function
+ */
+int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v);
+
#else /* CONFIG_IOSF_MBI is not enabled */
static inline
bool iosf_mbi_available(void)
@@ -115,6 +180,28 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
WARN(1, "IOSF_MBI driver not available");
return -EPERM;
}
+
+static inline void iosf_mbi_punit_acquire(void) {}
+static inline void iosf_mbi_punit_release(void) {}
+
+static inline
+int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline
+int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline
+int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v)
+{
+ return 0;
+}
+
#endif /* CONFIG_IOSF_MBI */
#endif /* IOSF_MBI_SYMS_H */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 3e8c287090e4..055962615779 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -221,6 +221,9 @@ struct x86_emulate_ops {
void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
+
+ unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
+ void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
};
typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -290,7 +293,6 @@ struct x86_emulate_ctxt {
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
- int emul_flags;
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74ef58c8ff53..9c761fea0c98 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -43,8 +43,6 @@
#define KVM_PRIVATE_MEM_SLOTS 3
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
-#define KVM_PIO_PAGE_OFFSET 1
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
#define KVM_HALT_POLL_NS_DEFAULT 400000
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
@@ -63,10 +61,10 @@
#define KVM_REQ_PMI 19
#define KVM_REQ_SMI 20
#define KVM_REQ_MASTERCLOCK_UPDATE 21
-#define KVM_REQ_MCLOCK_INPROGRESS 22
-#define KVM_REQ_SCAN_IOAPIC 23
+#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
-#define KVM_REQ_APIC_PAGE_RELOAD 25
+#define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HV_CRASH 26
#define KVM_REQ_IOAPIC_EOI_EXIT 27
#define KVM_REQ_HV_RESET 28
@@ -343,9 +341,10 @@ struct kvm_mmu {
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
hpa_t root_hpa;
- int root_level;
- int shadow_root_level;
union kvm_mmu_page_role base_role;
+ u8 root_level;
+ u8 shadow_root_level;
+ u8 ept_ad;
bool direct_map;
/*
@@ -612,6 +611,8 @@ struct kvm_vcpu_arch {
unsigned long dr7;
unsigned long eff_db[KVM_NR_DB_REGS];
unsigned long guest_debug_dr7;
+ u64 msr_platform_info;
+ u64 msr_misc_features_enables;
u64 mcg_cap;
u64 mcg_status;
@@ -1019,6 +1020,8 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t offset, unsigned long mask);
+ int (*write_log_dirty)(struct kvm_vcpu *vcpu);
+
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 1411dbed5e5e..f513cc231151 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -7,6 +7,7 @@
#include <linux/string.h>
#include <linux/scatterlist.h>
#include <asm/io.h>
+#include <asm/pat.h>
#include <asm/x86_init.h>
#ifdef __KERNEL__
@@ -102,10 +103,8 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
#define HAVE_PCI_MMAP
-extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state,
- int write_combine);
-
+#define arch_can_pci_mmap_wc() pat_enabled()
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE
#ifdef CONFIG_PCI
extern void early_quirks(void);
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 529bb4a6487a..d5a22bac9988 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -44,11 +44,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
BUG();
}
-static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
-{
- return memcpy_mcsafe(dst, src, n);
-}
-
/**
* arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
new file mode 100644
index 000000000000..eaec6c364e42
--- /dev/null
+++ b/arch/x86/include/asm/set_memory.h
@@ -0,0 +1,87 @@
+#ifndef _ASM_X86_SET_MEMORY_H
+#define _ASM_X86_SET_MEMORY_H
+
+#include <asm/page.h>
+#include <asm-generic/set_memory.h>
+
+/*
+ * The set_memory_* API can be used to change various attributes of a virtual
+ * address range. The attributes include:
+ * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack
+ * Executability : eXeutable, NoteXecutable
+ * Read/Write : ReadOnly, ReadWrite
+ * Presence : NotPresent
+ *
+ * Within a category, the attributes are mutually exclusive.
+ *
+ * The implementation of this API will take care of various aspects that
+ * are associated with changing such attributes, such as:
+ * - Flushing TLBs
+ * - Flushing CPU caches
+ * - Making sure aliases of the memory behind the mapping don't violate
+ * coherency rules as defined by the CPU in the system.
+ *
+ * What this API does not do:
+ * - Provide exclusion between various callers - including callers that
+ * operation on other mappings of the same physical page
+ * - Restore default attributes when a page is freed
+ * - Guarantee that mappings other than the requested one are
+ * in any state, other than that these do not violate rules for
+ * the CPU you have. Do not depend on any effects on other mappings,
+ * CPUs other than the one you have may have more relaxed rules.
+ * The caller is required to take care of these.
+ */
+
+int _set_memory_uc(unsigned long addr, int numpages);
+int _set_memory_wc(unsigned long addr, int numpages);
+int _set_memory_wt(unsigned long addr, int numpages);
+int _set_memory_wb(unsigned long addr, int numpages);
+int set_memory_uc(unsigned long addr, int numpages);
+int set_memory_wc(unsigned long addr, int numpages);
+int set_memory_wt(unsigned long addr, int numpages);
+int set_memory_wb(unsigned long addr, int numpages);
+int set_memory_np(unsigned long addr, int numpages);
+int set_memory_4k(unsigned long addr, int numpages);
+
+int set_memory_array_uc(unsigned long *addr, int addrinarray);
+int set_memory_array_wc(unsigned long *addr, int addrinarray);
+int set_memory_array_wt(unsigned long *addr, int addrinarray);
+int set_memory_array_wb(unsigned long *addr, int addrinarray);
+
+int set_pages_array_uc(struct page **pages, int addrinarray);
+int set_pages_array_wc(struct page **pages, int addrinarray);
+int set_pages_array_wt(struct page **pages, int addrinarray);
+int set_pages_array_wb(struct page **pages, int addrinarray);
+
+/*
+ * For legacy compatibility with the old APIs, a few functions
+ * are provided that work on a "struct page".
+ * These functions operate ONLY on the 1:1 kernel mapping of the
+ * memory that the struct page represents, and internally just
+ * call the set_memory_* function. See the description of the
+ * set_memory_* function for more details on conventions.
+ *
+ * These APIs should be considered *deprecated* and are likely going to
+ * be removed in the future.
+ * The reason for this is the implicit operation on the 1:1 mapping only,
+ * making this not a generally useful API.
+ *
+ * Specifically, many users of the old APIs had a virtual address,
+ * called virt_to_page() or vmalloc_to_page() on that address to
+ * get a struct page* that the old API required.
+ * To convert these cases, use set_memory_*() on the original
+ * virtual address, do not use these functions.
+ */
+
+int set_pages_uc(struct page *page, int numpages);
+int set_pages_wb(struct page *page, int numpages);
+int set_pages_x(struct page *page, int numpages);
+int set_pages_nx(struct page *page, int numpages);
+int set_pages_ro(struct page *page, int numpages);
+int set_pages_rw(struct page *page, int numpages);
+
+extern int kernel_set_to_readonly;
+void set_kernel_text_rw(void);
+void set_kernel_text_ro(void);
+
+#endif /* _ASM_X86_SET_MEMORY_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index a164862d77e3..733bae07fb29 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -79,6 +79,7 @@ int strcmp(const char *cs, const char *ct);
#define memset(s, c, n) __memset(s, c, n)
#endif
+#define __HAVE_ARCH_MEMCPY_MCSAFE 1
__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 9fc44b95f7cb..e00e1bd6e7b3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -73,9 +73,6 @@ struct thread_info {
* thread information flags
* - these are process state flags that various assembly files
* may need to access
- * - pending work-to-be-done flags are in LSW
- * - other flags in MSW
- * Warning: layout of LSW is hardcoded in entry.S
*/
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
@@ -87,6 +84,7 @@ struct thread_info {
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
+#define TIF_PATCH_PENDING 13 /* pending live patching update */
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
@@ -104,13 +102,14 @@ struct thread_info {
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
@@ -135,8 +134,10 @@ struct thread_info {
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
- ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+ (_TIF_SYSCALL_TRACE | _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
+ _TIF_NEED_RESCHED | _TIF_SINGLESTEP | _TIF_SYSCALL_EMU | \
+ _TIF_SYSCALL_AUDIT | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | \
+ _TIF_PATCH_PENDING | _TIF_NOHZ | _TIF_SYSCALL_TRACEPOINT)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
@@ -170,9 +171,9 @@ static inline unsigned long current_stack_pointer(void)
* entirely contained by a single stack frame.
*
* Returns:
- * 1 if within a frame
- * -1 if placed across a frame boundary (or outside stack)
- * 0 unable to determine (no frame pointers, etc)
+ * GOOD_FRAME if within a frame
+ * BAD_STACK if placed across a frame boundary (or outside stack)
+ * NOT_STACK unable to determine (no frame pointers, etc)
*/
static inline int arch_within_stack_frames(const void * const stack,
const void * const stackend,
@@ -199,13 +200,14 @@ static inline int arch_within_stack_frames(const void * const stack,
* the copy as invalid.
*/
if (obj + len <= frame)
- return obj >= oldframe + 2 * sizeof(void *) ? 1 : -1;
+ return obj >= oldframe + 2 * sizeof(void *) ?
+ GOOD_FRAME : BAD_STACK;
oldframe = frame;
frame = *(const void * const *)frame;
}
- return -1;
+ return BAD_STACK;
#else
- return 0;
+ return NOT_STACK;
#endif
}
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 32712a925f26..1ba1536f627e 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -23,7 +23,6 @@
# include <asm/unistd_64.h>
# include <asm/unistd_64_x32.h>
# define __ARCH_WANT_COMPAT_SYS_TIME
-# define __ARCH_WANT_COMPAT_SYS_GETDENTS64
# define __ARCH_WANT_COMPAT_SYS_PREADV64
# define __ARCH_WANT_COMPAT_SYS_PWRITEV64
# define __ARCH_WANT_COMPAT_SYS_PREADV64V2
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 9b10dcd51716..e6676495b125 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -11,6 +11,7 @@ struct unwind_state {
unsigned long stack_mask;
struct task_struct *task;
int graph_idx;
+ bool error;
#ifdef CONFIG_FRAME_POINTER
bool got_irq;
unsigned long *bp, *orig_sp;
@@ -42,6 +43,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
__unwind_start(state, task, regs, first_frame);
}
+static inline bool unwind_error(struct unwind_state *state)
+{
+ return state->error;
+}
+
#ifdef CONFIG_FRAME_POINTER
static inline
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cc54b7026567..35cd06f636ab 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -70,8 +70,10 @@
#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_RDRAND 0x00000800
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_RDSEED 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
@@ -516,12 +518,14 @@ struct vmx_msr_entry {
#define EPT_VIOLATION_READABLE_BIT 3
#define EPT_VIOLATION_WRITABLE_BIT 4
#define EPT_VIOLATION_EXECUTABLE_BIT 5
+#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT)
#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT)
#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT)
+#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
/*
* VM-instruction error numbers
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index 3dec769cadf7..83b6e9a0dce4 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -4,62 +4,3 @@ include include/uapi/asm-generic/Kbuild.asm
genhdr-y += unistd_32.h
genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h
-header-y += a.out.h
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += boot.h
-header-y += bootparam.h
-header-y += byteorder.h
-header-y += debugreg.h
-header-y += e820.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += hw_breakpoint.h
-header-y += hyperv.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += ist.h
-header-y += kvm.h
-header-y += kvm_para.h
-header-y += kvm_perf.h
-header-y += ldt.h
-header-y += mce.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += msr-index.h
-header-y += msr.h
-header-y += mtrr.h
-header-y += param.h
-header-y += perf_regs.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += posix_types_32.h
-header-y += posix_types_64.h
-header-y += posix_types_x32.h
-header-y += prctl.h
-header-y += processor-flags.h
-header-y += ptrace-abi.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += sigcontext32.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += stat.h
-header-y += statfs.h
-header-y += svm.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
-header-y += vm86.h
-header-y += vmx.h
-header-y += vsyscall.h
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 3a20ccf787b8..432df4b1baec 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -124,7 +124,7 @@
* Recommend using hypercall for address space switches rather
* than MOV to CR3 instruction
*/
-#define HV_X64_MWAIT_RECOMMENDED (1 << 0)
+#define HV_X64_AS_SWITCH_RECOMMENDED (1 << 0)
/* Recommend using hypercall for local TLB flushes rather
* than INVLPG or MOV to CR3 instructions */
#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1)
@@ -148,6 +148,11 @@
#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5)
/*
+ * Virtual APIC support
+ */
+#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9)
+
+/*
* Crash notification flag.
*/
#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 739c0c594022..c2824d02ba37 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -9,6 +9,9 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+
#define DE_VECTOR 0
#define DB_VECTOR 1
#define BP_VECTOR 3
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 14458658e988..690a2dcf4078 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -76,7 +76,11 @@
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_RDRAND 57
#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_VMFUNC 59
+#define EXIT_REASON_ENCLS 60
+#define EXIT_REASON_RDSEED 61
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
@@ -90,6 +94,7 @@
{ EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
{ EXIT_REASON_CPUID, "CPUID" }, \
{ EXIT_REASON_HLT, "HLT" }, \
+ { EXIT_REASON_INVD, "INVD" }, \
{ EXIT_REASON_INVLPG, "INVLPG" }, \
{ EXIT_REASON_RDPMC, "RDPMC" }, \
{ EXIT_REASON_RDTSC, "RDTSC" }, \
@@ -108,6 +113,8 @@
{ EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
{ EXIT_REASON_MSR_READ, "MSR_READ" }, \
{ EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
+ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
+ { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
{ EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
{ EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \
{ EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
@@ -115,20 +122,24 @@
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
- { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
- { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
+ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
+ { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
+ { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
{ EXIT_REASON_INVEPT, "INVEPT" }, \
+ { EXIT_REASON_RDTSCP, "RDTSCP" }, \
{ EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \
+ { EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_WBINVD, "WBINVD" }, \
+ { EXIT_REASON_XSETBV, "XSETBV" }, \
{ EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
- { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
- { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
- { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
- { EXIT_REASON_INVD, "INVD" }, \
- { EXIT_REASON_INVVPID, "INVVPID" }, \
+ { EXIT_REASON_RDRAND, "RDRAND" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
+ { EXIT_REASON_VMFUNC, "VMFUNC" }, \
+ { EXIT_REASON_ENCLS, "ENCLS" }, \
+ { EXIT_REASON_RDSEED, "RDSEED" }, \
+ { EXIT_REASON_PML_FULL, "PML_FULL" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
{ EXIT_REASON_XRSTORS, "XRSTORS" }
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index df083efe6ee0..815dd63f49d0 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -36,7 +36,7 @@
#include <asm/proto.h>
#include <asm/iommu.h>
#include <asm/gart.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
#include <asm/amd_nb.h>
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b6da6e75e3a8..bb5abe8f5fd4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -16,7 +16,7 @@
#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
-# include <asm/cacheflush.h>
+# include <asm/set_memory.h>
#endif
#include "cpu.h"
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a44ef52184df..0af86d9242da 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,7 +17,7 @@
#include <asm/paravirt.h>
#include <asm/alternative.h>
#include <asm/pgtable.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
void __init check_bugs(void)
{
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index b5375b9497b3..04cb8d34ccb8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -49,6 +49,9 @@ void hyperv_vector_handler(struct pt_regs *regs)
if (vmbus_handler)
vmbus_handler();
+ if (ms_hyperv.hints & HV_X64_DEPRECATING_AEOI_RECOMMENDED)
+ ack_APIC_irq();
+
exiting_irq();
set_irq_regs(old_regs);
}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index ff7e4b3988ed..d907c3d8633f 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -526,6 +526,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_SKL_IDS(&gen9_early_ops),
INTEL_BXT_IDS(&gen9_early_ops),
INTEL_KBL_IDS(&gen9_early_ops),
+ INTEL_GLK_IDS(&gen9_early_ops),
};
static void __init
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5b7153540727..0651e974dcb3 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,7 +24,7 @@
#include <trace/syscall.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/kprobes.h>
#include <asm/ftrace.h>
#include <asm/nops.h>
@@ -533,7 +533,13 @@ static void do_sync_core(void *data)
static void run_sync(void)
{
- int enable_irqs = irqs_disabled();
+ int enable_irqs;
+
+ /* No need to sync if there's only one CPU */
+ if (num_online_cpus() == 1)
+ return;
+
+ enable_irqs = irqs_disabled();
/* We may be called with interrupts disabled (on bootup). */
if (enable_irqs)
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index be22f5a2192e..4e3b8a587c88 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -418,6 +418,7 @@ struct legacy_pic default_legacy_pic = {
};
struct legacy_pic *legacy_pic = &default_legacy_pic;
+EXPORT_SYMBOL(legacy_pic);
static int __init i8259A_init_ops(void)
{
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 19e1f2a6d7b0..5b2bbfbb3712 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -61,6 +61,7 @@
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/debugreg.h>
+#include <asm/set_memory.h>
#include "common.h"
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 9aadff3d0902..901c640d152f 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,6 +37,7 @@
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/debugreg.h>
+#include <asm/set_memory.h>
#include "common.h"
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 14f65a5f938e..da5c09789984 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -396,9 +396,9 @@ static u64 kvm_steal_clock(int cpu)
src = &per_cpu(steal_time, cpu);
do {
version = src->version;
- rmb();
+ virt_rmb();
steal = src->steal;
- rmb();
+ virt_rmb();
} while ((version & 1) || (version != src->version));
return steal;
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 5f43cec296c5..8c53c5d7a1bc 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -23,7 +23,7 @@
#include <asm/io_apic.h>
#include <asm/cpufeature.h>
#include <asm/desc.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/debugreg.h>
static void set_idt(void *newidt, __u16 limit)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 085c3b300d32..ce640428d6fe 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -27,6 +27,7 @@
#include <asm/debugreg.h>
#include <asm/kexec-bzimage64.h>
#include <asm/setup.h>
+#include <asm/set_memory.h>
#ifdef CONFIG_KEXEC_FILE
static struct kexec_file_ops *kexec_file_loaders[] = {
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 477ae806c2fa..f67bd3205df7 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -85,7 +85,7 @@ void *module_alloc(unsigned long size)
p = __vmalloc_node_range(size, MODULE_ALIGN,
MODULES_VADDR + get_module_load_offset(),
- MODULES_END, GFP_KERNEL | __GFP_HIGHMEM,
+ MODULES_END, GFP_KERNEL,
PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (p && (kasan_module_alloc(p, size) < 0)) {
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 8e2b79b88e51..8dabd7bf1673 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -76,6 +76,101 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
+
+#define STACKTRACE_DUMP_ONCE(task) ({ \
+ static bool __section(.data.unlikely) __dumped; \
+ \
+ if (!__dumped) { \
+ __dumped = true; \
+ WARN_ON(1); \
+ show_stack(task, NULL); \
+ } \
+})
+
+static int __save_stack_trace_reliable(struct stack_trace *trace,
+ struct task_struct *task)
+{
+ struct unwind_state state;
+ struct pt_regs *regs;
+ unsigned long addr;
+
+ for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
+ unwind_next_frame(&state)) {
+
+ regs = unwind_get_entry_regs(&state);
+ if (regs) {
+ /*
+ * Kernel mode registers on the stack indicate an
+ * in-kernel interrupt or exception (e.g., preemption
+ * or a page fault), which can make frame pointers
+ * unreliable.
+ */
+ if (!user_mode(regs))
+ return -EINVAL;
+
+ /*
+ * The last frame contains the user mode syscall
+ * pt_regs. Skip it and finish the unwind.
+ */
+ unwind_next_frame(&state);
+ if (!unwind_done(&state)) {
+ STACKTRACE_DUMP_ONCE(task);
+ return -EINVAL;
+ }
+ break;
+ }
+
+ addr = unwind_get_return_address(&state);
+
+ /*
+ * A NULL or invalid return address probably means there's some
+ * generated code which __kernel_text_address() doesn't know
+ * about.
+ */
+ if (!addr) {
+ STACKTRACE_DUMP_ONCE(task);
+ return -EINVAL;
+ }
+
+ if (save_stack_address(trace, addr, false))
+ return -EINVAL;
+ }
+
+ /* Check for stack corruption */
+ if (unwind_error(&state)) {
+ STACKTRACE_DUMP_ONCE(task);
+ return -EINVAL;
+ }
+
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+
+ return 0;
+}
+
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack. Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int save_stack_trace_tsk_reliable(struct task_struct *tsk,
+ struct stack_trace *trace)
+{
+ int ret;
+
+ if (!try_get_task_stack(tsk))
+ return -EINVAL;
+
+ ret = __save_stack_trace_reliable(trace, tsk);
+
+ put_task_stack(tsk);
+
+ return ret;
+}
+#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
+
/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
struct stack_frame_user {
@@ -138,4 +233,3 @@ void save_stack_trace_user(struct stack_trace *trace)
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
-
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index d4c8011a2293..4b1724059909 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -514,6 +514,9 @@ int tboot_force_iommu(void)
if (!tboot_enabled())
return 0;
+ if (!intel_iommu_tboot_noforce)
+ return 1;
+
if (no_iommu || swiotlb || dmar_disabled)
pr_warning("Forcing Intel-IOMMU to enabled\n");
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index fec70fe3b1ec..82c6d7f1fd73 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -283,6 +283,8 @@ bool unwind_next_frame(struct unwind_state *state)
return true;
bad_address:
+ state->error = true;
+
/*
* When unwinding a non-current task, the task might actually be
* running on another CPU, in which case it could be modifying its
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ab8e32f7b9a8..760433b2574a 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -86,18 +86,6 @@ config KVM_MMU_AUDIT
This option adds a R/W kVM module parameter 'mmu_audit', which allows
auditing of KVM MMU events at runtime.
-config KVM_DEVICE_ASSIGNMENT
- bool "KVM legacy PCI device assignment support (DEPRECATED)"
- depends on KVM && PCI && IOMMU_API
- default n
- ---help---
- Provide support for legacy PCI device assignment through KVM. The
- kernel now also supports a full featured userspace device driver
- framework through VFIO, which supersedes this support and provides
- better security.
-
- If unsure, say N.
-
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/vhost/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3bff20710471..09d4b17be022 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -15,8 +15,6 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o page_track.o debugfs.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
-
kvm-intel-y += vmx.o pmu_intel.o
kvm-amd-y += svm.o pmu_amd.o
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
deleted file mode 100644
index 308b8597c691..000000000000
--- a/arch/x86/kvm/assigned-dev.c
+++ /dev/null
@@ -1,1058 +0,0 @@
-/*
- * Kernel-based Virtual Machine - device assignment support
- *
- * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/namei.h>
-#include <linux/fs.h>
-#include "irq.h"
-#include "assigned-dev.h"
-#include "trace/events/kvm.h"
-
-struct kvm_assigned_dev_kernel {
- struct kvm_irq_ack_notifier ack_notifier;
- struct list_head list;
- int assigned_dev_id;
- int host_segnr;
- int host_busnr;
- int host_devfn;
- unsigned int entries_nr;
- int host_irq;
- bool host_irq_disabled;
- bool pci_2_3;
- struct msix_entry *host_msix_entries;
- int guest_irq;
- struct msix_entry *guest_msix_entries;
- unsigned long irq_requested_type;
- int irq_source_id;
- int flags;
- struct pci_dev *dev;
- struct kvm *kvm;
- spinlock_t intx_lock;
- spinlock_t intx_mask_lock;
- char irq_name[32];
- struct pci_saved_state *pci_saved_state;
-};
-
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
- int assigned_dev_id)
-{
- struct kvm_assigned_dev_kernel *match;
-
- list_for_each_entry(match, head, list) {
- if (match->assigned_dev_id == assigned_dev_id)
- return match;
- }
- return NULL;
-}
-
-static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
- *assigned_dev, int irq)
-{
- int i, index;
- struct msix_entry *host_msix_entries;
-
- host_msix_entries = assigned_dev->host_msix_entries;
-
- index = -1;
- for (i = 0; i < assigned_dev->entries_nr; i++)
- if (irq == host_msix_entries[i].vector) {
- index = i;
- break;
- }
- if (index < 0)
- printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
-
- return index;
-}
-
-static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int ret;
-
- spin_lock(&assigned_dev->intx_lock);
- if (pci_check_and_mask_intx(assigned_dev->dev)) {
- assigned_dev->host_irq_disabled = true;
- ret = IRQ_WAKE_THREAD;
- } else
- ret = IRQ_NONE;
- spin_unlock(&assigned_dev->intx_lock);
-
- return ret;
-}
-
-static void
-kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
- int vector)
-{
- if (unlikely(assigned_dev->irq_requested_type &
- KVM_DEV_IRQ_GUEST_INTX)) {
- spin_lock(&assigned_dev->intx_mask_lock);
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
- kvm_set_irq(assigned_dev->kvm,
- assigned_dev->irq_source_id, vector, 1,
- false);
- spin_unlock(&assigned_dev->intx_mask_lock);
- } else
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- vector, 1, false);
-}
-
-static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- spin_lock_irq(&assigned_dev->intx_lock);
- disable_irq_nosync(irq);
- assigned_dev->host_irq_disabled = true;
- spin_unlock_irq(&assigned_dev->intx_lock);
- }
-
- kvm_assigned_dev_raise_guest_irq(assigned_dev,
- assigned_dev->guest_irq);
-
- return IRQ_HANDLED;
-}
-
-/*
- * Deliver an IRQ in an atomic context if we can, or return a failure,
- * user can retry in a process context.
- * Return value:
- * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
- * Other values - No need to retry.
- */
-static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
- int level)
-{
- struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
- struct kvm_kernel_irq_routing_entry *e;
- int ret = -EINVAL;
- int idx;
-
- trace_kvm_set_irq(irq, level, irq_source_id);
-
- /*
- * Injection into either PIC or IOAPIC might need to scan all CPUs,
- * which would need to be retried from thread context; when same GSI
- * is connected to both PIC and IOAPIC, we'd have to report a
- * partial failure here.
- * Since there's no easy way to do this, we only support injecting MSI
- * which is limited to 1:1 GSI mapping.
- */
- idx = srcu_read_lock(&kvm->irq_srcu);
- if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
- e = &entries[0];
- ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
- irq, level);
- }
- srcu_read_unlock(&kvm->irq_srcu, idx);
- return ret;
-}
-
-
-static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
- return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
- kvm_assigned_dev_raise_guest_irq(assigned_dev,
- assigned_dev->guest_irq);
-
- return IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int index = find_index_from_host_irq(assigned_dev, irq);
- u32 vector;
- int ret = 0;
-
- if (index >= 0) {
- vector = assigned_dev->guest_msix_entries[index].vector;
- ret = kvm_set_irq_inatomic(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- vector, 1);
- }
-
- return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int index = find_index_from_host_irq(assigned_dev, irq);
- u32 vector;
-
- if (index >= 0) {
- vector = assigned_dev->guest_msix_entries[index].vector;
- kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
- }
-
- return IRQ_HANDLED;
-}
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
- struct kvm_assigned_dev_kernel *dev =
- container_of(kian, struct kvm_assigned_dev_kernel,
- ack_notifier);
-
- kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
-
- spin_lock(&dev->intx_mask_lock);
-
- if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
- bool reassert = false;
-
- spin_lock_irq(&dev->intx_lock);
- /*
- * The guest IRQ may be shared so this ack can come from an
- * IRQ for another guest device.
- */
- if (dev->host_irq_disabled) {
- if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
- enable_irq(dev->host_irq);
- else if (!pci_check_and_unmask_intx(dev->dev))
- reassert = true;
- dev->host_irq_disabled = reassert;
- }
- spin_unlock_irq(&dev->intx_lock);
-
- if (reassert)
- kvm_set_irq(dev->kvm, dev->irq_source_id,
- dev->guest_irq, 1, false);
- }
-
- spin_unlock(&dev->intx_mask_lock);
-}
-
-static void deassign_guest_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- if (assigned_dev->ack_notifier.gsi != -1)
- kvm_unregister_irq_ack_notifier(kvm,
- &assigned_dev->ack_notifier);
-
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 0, false);
-
- if (assigned_dev->irq_source_id != -1)
- kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
- assigned_dev->irq_source_id = -1;
- assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
-}
-
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void deassign_host_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- /*
- * We disable irq here to prevent further events.
- *
- * Notice this maybe result in nested disable if the interrupt type is
- * INTx, but it's OK for we are going to free it.
- *
- * If this function is a part of VM destroy, please ensure that till
- * now, the kvm state is still legal for probably we also have to wait
- * on a currently running IRQ handler.
- */
- if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
- int i;
- for (i = 0; i < assigned_dev->entries_nr; i++)
- disable_irq(assigned_dev->host_msix_entries[i].vector);
-
- for (i = 0; i < assigned_dev->entries_nr; i++)
- free_irq(assigned_dev->host_msix_entries[i].vector,
- assigned_dev);
-
- assigned_dev->entries_nr = 0;
- kfree(assigned_dev->host_msix_entries);
- kfree(assigned_dev->guest_msix_entries);
- pci_disable_msix(assigned_dev->dev);
- } else {
- /* Deal with MSI and INTx */
- if ((assigned_dev->irq_requested_type &
- KVM_DEV_IRQ_HOST_INTX) &&
- (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- spin_lock_irq(&assigned_dev->intx_lock);
- pci_intx(assigned_dev->dev, false);
- spin_unlock_irq(&assigned_dev->intx_lock);
- synchronize_irq(assigned_dev->host_irq);
- } else
- disable_irq(assigned_dev->host_irq);
-
- free_irq(assigned_dev->host_irq, assigned_dev);
-
- if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
- pci_disable_msi(assigned_dev->dev);
- }
-
- assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
-}
-
-static int kvm_deassign_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev,
- unsigned long irq_requested_type)
-{
- unsigned long guest_irq_type, host_irq_type;
-
- if (!irqchip_in_kernel(kvm))
- return -EINVAL;
- /* no irq assignment to deassign */
- if (!assigned_dev->irq_requested_type)
- return -ENXIO;
-
- host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
- guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
-
- if (host_irq_type)
- deassign_host_irq(kvm, assigned_dev);
- if (guest_irq_type)
- deassign_guest_irq(kvm, assigned_dev);
-
- return 0;
-}
-
-static void kvm_free_assigned_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
-}
-
-static void kvm_free_assigned_device(struct kvm *kvm,
- struct kvm_assigned_dev_kernel
- *assigned_dev)
-{
- kvm_free_assigned_irq(kvm, assigned_dev);
-
- pci_reset_function(assigned_dev->dev);
- if (pci_load_and_free_saved_state(assigned_dev->dev,
- &assigned_dev->pci_saved_state))
- printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
- __func__, dev_name(&assigned_dev->dev->dev));
- else
- pci_restore_state(assigned_dev->dev);
-
- pci_clear_dev_assigned(assigned_dev->dev);
-
- pci_release_regions(assigned_dev->dev);
- pci_disable_device(assigned_dev->dev);
- pci_dev_put(assigned_dev->dev);
-
- list_del(&assigned_dev->list);
- kfree(assigned_dev);
-}
-
-void kvm_free_all_assigned_devices(struct kvm *kvm)
-{
- struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
-
- list_for_each_entry_safe(assigned_dev, tmp,
- &kvm->arch.assigned_dev_head, list) {
- kvm_free_assigned_device(kvm, assigned_dev);
- }
-}
-
-static int assigned_device_enable_host_intx(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- irq_handler_t irq_handler;
- unsigned long flags;
-
- dev->host_irq = dev->dev->irq;
-
- /*
- * We can only share the IRQ line with other host devices if we are
- * able to disable the IRQ source at device-level - independently of
- * the guest driver. Otherwise host devices may suffer from unbounded
- * IRQ latencies when the guest keeps the line asserted.
- */
- if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
- irq_handler = kvm_assigned_dev_intx;
- flags = IRQF_SHARED;
- } else {
- irq_handler = NULL;
- flags = IRQF_ONESHOT;
- }
- if (request_threaded_irq(dev->host_irq, irq_handler,
- kvm_assigned_dev_thread_intx, flags,
- dev->irq_name, dev))
- return -EIO;
-
- if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
- spin_lock_irq(&dev->intx_lock);
- pci_intx(dev->dev, true);
- spin_unlock_irq(&dev->intx_lock);
- }
- return 0;
-}
-
-static int assigned_device_enable_host_msi(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- int r;
-
- if (!dev->dev->msi_enabled) {
- r = pci_enable_msi(dev->dev);
- if (r)
- return r;
- }
-
- dev->host_irq = dev->dev->irq;
- if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
- kvm_assigned_dev_thread_msi, 0,
- dev->irq_name, dev)) {
- pci_disable_msi(dev->dev);
- return -EIO;
- }
-
- return 0;
-}
-
-static int assigned_device_enable_host_msix(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- int i, r = -EINVAL;
-
- /* host_msix_entries and guest_msix_entries should have been
- * initialized */
- if (dev->entries_nr == 0)
- return r;
-
- r = pci_enable_msix_exact(dev->dev,
- dev->host_msix_entries, dev->entries_nr);
- if (r)
- return r;
-
- for (i = 0; i < dev->entries_nr; i++) {
- r = request_threaded_irq(dev->host_msix_entries[i].vector,
- kvm_assigned_dev_msix,
- kvm_assigned_dev_thread_msix,
- 0, dev->irq_name, dev);
- if (r)
- goto err;
- }
-
- return 0;
-err:
- for (i -= 1; i >= 0; i--)
- free_irq(dev->host_msix_entries[i].vector, dev);
- pci_disable_msix(dev->dev);
- return r;
-}
-
-static int assigned_device_enable_guest_intx(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = irq->guest_irq;
- return 0;
-}
-
-static int assigned_device_enable_guest_msi(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = -1;
- return 0;
-}
-
-static int assigned_device_enable_guest_msix(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = -1;
- return 0;
-}
-
-static int assign_host_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- __u32 host_irq_type)
-{
- int r = -EEXIST;
-
- if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
- return r;
-
- snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
- pci_name(dev->dev));
-
- switch (host_irq_type) {
- case KVM_DEV_IRQ_HOST_INTX:
- r = assigned_device_enable_host_intx(kvm, dev);
- break;
- case KVM_DEV_IRQ_HOST_MSI:
- r = assigned_device_enable_host_msi(kvm, dev);
- break;
- case KVM_DEV_IRQ_HOST_MSIX:
- r = assigned_device_enable_host_msix(kvm, dev);
- break;
- default:
- r = -EINVAL;
- }
- dev->host_irq_disabled = false;
-
- if (!r)
- dev->irq_requested_type |= host_irq_type;
-
- return r;
-}
-
-static int assign_guest_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq,
- unsigned long guest_irq_type)
-{
- int id;
- int r = -EEXIST;
-
- if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
- return r;
-
- id = kvm_request_irq_source_id(kvm);
- if (id < 0)
- return id;
-
- dev->irq_source_id = id;
-
- switch (guest_irq_type) {
- case KVM_DEV_IRQ_GUEST_INTX:
- r = assigned_device_enable_guest_intx(kvm, dev, irq);
- break;
- case KVM_DEV_IRQ_GUEST_MSI:
- r = assigned_device_enable_guest_msi(kvm, dev, irq);
- break;
- case KVM_DEV_IRQ_GUEST_MSIX:
- r = assigned_device_enable_guest_msix(kvm, dev, irq);
- break;
- default:
- r = -EINVAL;
- }
-
- if (!r) {
- dev->irq_requested_type |= guest_irq_type;
- if (dev->ack_notifier.gsi != -1)
- kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
- } else {
- kvm_free_irq_source_id(kvm, dev->irq_source_id);
- dev->irq_source_id = -1;
- }
-
- return r;
-}
-
-/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
- struct kvm_assigned_irq *assigned_irq)
-{
- int r = -EINVAL;
- struct kvm_assigned_dev_kernel *match;
- unsigned long host_irq_type, guest_irq_type;
-
- if (!irqchip_in_kernel(kvm))
- return r;
-
- mutex_lock(&kvm->lock);
- r = -ENODEV;
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
- if (!match)
- goto out;
-
- host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
- guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
-
- r = -EINVAL;
- /* can only assign one type at a time */
- if (hweight_long(host_irq_type) > 1)
- goto out;
- if (hweight_long(guest_irq_type) > 1)
- goto out;
- if (host_irq_type == 0 && guest_irq_type == 0)
- goto out;
-
- r = 0;
- if (host_irq_type)
- r = assign_host_irq(kvm, match, host_irq_type);
- if (r)
- goto out;
-
- if (guest_irq_type)
- r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
- struct kvm_assigned_irq
- *assigned_irq)
-{
- int r = -ENODEV;
- struct kvm_assigned_dev_kernel *match;
- unsigned long irq_type;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
- if (!match)
- goto out;
-
- irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
- KVM_DEV_IRQ_GUEST_MASK);
- r = kvm_deassign_irq(kvm, match, irq_type);
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-/*
- * We want to test whether the caller has been granted permissions to
- * use this device. To be able to configure and control the device,
- * the user needs access to PCI configuration space and BAR resources.
- * These are accessed through PCI sysfs. PCI config space is often
- * passed to the process calling this ioctl via file descriptor, so we
- * can't rely on access to that file. We can check for permissions
- * on each of the BAR resource files, which is a pretty clear
- * indicator that the user has been granted access to the device.
- */
-static int probe_sysfs_permissions(struct pci_dev *dev)
-{
-#ifdef CONFIG_SYSFS
- int i;
- bool bar_found = false;
-
- for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
- char *kpath, *syspath;
- struct path path;
- struct inode *inode;
- int r;
-
- if (!pci_resource_len(dev, i))
- continue;
-
- kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
- if (!kpath)
- return -ENOMEM;
-
- /* Per sysfs-rules, sysfs is always at /sys */
- syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
- kfree(kpath);
- if (!syspath)
- return -ENOMEM;
-
- r = kern_path(syspath, LOOKUP_FOLLOW, &path);
- kfree(syspath);
- if (r)
- return r;
-
- inode = d_backing_inode(path.dentry);
-
- r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
- path_put(&path);
- if (r)
- return r;
-
- bar_found = true;
- }
-
- /* If no resources, probably something special */
- if (!bar_found)
- return -EPERM;
-
- return 0;
-#else
- return -EINVAL; /* No way to control the device without sysfs */
-#endif
-}
-
-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0, idx;
- struct kvm_assigned_dev_kernel *match;
- struct pci_dev *dev;
-
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
- return -EINVAL;
-
- mutex_lock(&kvm->lock);
- idx = srcu_read_lock(&kvm->srcu);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (match) {
- /* device already assigned */
- r = -EEXIST;
- goto out;
- }
-
- match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
- if (match == NULL) {
- printk(KERN_INFO "%s: Couldn't allocate memory\n",
- __func__);
- r = -ENOMEM;
- goto out;
- }
- dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
- assigned_dev->busnr,
- assigned_dev->devfn);
- if (!dev) {
- printk(KERN_INFO "%s: host device not found\n", __func__);
- r = -EINVAL;
- goto out_free;
- }
-
- /* Don't allow bridges to be assigned */
- if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
- r = -EPERM;
- goto out_put;
- }
-
- r = probe_sysfs_permissions(dev);
- if (r)
- goto out_put;
-
- if (pci_enable_device(dev)) {
- printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
- r = -EBUSY;
- goto out_put;
- }
- r = pci_request_regions(dev, "kvm_assigned_device");
- if (r) {
- printk(KERN_INFO "%s: Could not get access to device regions\n",
- __func__);
- goto out_disable;
- }
-
- pci_reset_function(dev);
- pci_save_state(dev);
- match->pci_saved_state = pci_store_saved_state(dev);
- if (!match->pci_saved_state)
- printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
- __func__, dev_name(&dev->dev));
-
- if (!pci_intx_mask_supported(dev))
- assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
-
- match->assigned_dev_id = assigned_dev->assigned_dev_id;
- match->host_segnr = assigned_dev->segnr;
- match->host_busnr = assigned_dev->busnr;
- match->host_devfn = assigned_dev->devfn;
- match->flags = assigned_dev->flags;
- match->dev = dev;
- spin_lock_init(&match->intx_lock);
- spin_lock_init(&match->intx_mask_lock);
- match->irq_source_id = -1;
- match->kvm = kvm;
- match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-
- list_add(&match->list, &kvm->arch.assigned_dev_head);
-
- if (!kvm->arch.iommu_domain) {
- r = kvm_iommu_map_guest(kvm);
- if (r)
- goto out_list_del;
- }
- r = kvm_assign_device(kvm, match->dev);
- if (r)
- goto out_list_del;
-
-out:
- srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm->lock);
- return r;
-out_list_del:
- if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
- printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
- __func__, dev_name(&dev->dev));
- list_del(&match->list);
- pci_release_regions(dev);
-out_disable:
- pci_disable_device(dev);
-out_put:
- pci_dev_put(dev);
-out_free:
- kfree(match);
- srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (!match) {
- printk(KERN_INFO "%s: device hasn't been assigned before, "
- "so cannot be deassigned\n", __func__);
- r = -EINVAL;
- goto out;
- }
-
- kvm_deassign_device(kvm, match->dev);
-
- kvm_free_assigned_device(kvm, match);
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-
-static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
- struct kvm_assigned_msix_nr *entry_nr)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *adev;
-
- mutex_lock(&kvm->lock);
-
- adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- entry_nr->assigned_dev_id);
- if (!adev) {
- r = -EINVAL;
- goto msix_nr_out;
- }
-
- if (adev->entries_nr == 0) {
- adev->entries_nr = entry_nr->entry_nr;
- if (adev->entries_nr == 0 ||
- adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
- r = -EINVAL;
- goto msix_nr_out;
- }
-
- adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
- entry_nr->entry_nr,
- GFP_KERNEL);
- if (!adev->host_msix_entries) {
- r = -ENOMEM;
- goto msix_nr_out;
- }
- adev->guest_msix_entries =
- kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
- GFP_KERNEL);
- if (!adev->guest_msix_entries) {
- kfree(adev->host_msix_entries);
- r = -ENOMEM;
- goto msix_nr_out;
- }
- } else /* Not allowed set MSI-X number twice */
- r = -EINVAL;
-msix_nr_out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
- struct kvm_assigned_msix_entry *entry)
-{
- int r = 0, i;
- struct kvm_assigned_dev_kernel *adev;
-
- mutex_lock(&kvm->lock);
-
- adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- entry->assigned_dev_id);
-
- if (!adev) {
- r = -EINVAL;
- goto msix_entry_out;
- }
-
- for (i = 0; i < adev->entries_nr; i++)
- if (adev->guest_msix_entries[i].vector == 0 ||
- adev->guest_msix_entries[i].entry == entry->entry) {
- adev->guest_msix_entries[i].entry = entry->entry;
- adev->guest_msix_entries[i].vector = entry->gsi;
- adev->host_msix_entries[i].entry = entry->entry;
- break;
- }
- if (i == adev->entries_nr) {
- r = -ENOSPC;
- goto msix_entry_out;
- }
-
-msix_entry_out:
- mutex_unlock(&kvm->lock);
-
- return r;
-}
-
-static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (!match) {
- r = -ENODEV;
- goto out;
- }
-
- spin_lock(&match->intx_mask_lock);
-
- match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
- match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
-
- if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
- if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
- kvm_set_irq(match->kvm, match->irq_source_id,
- match->guest_irq, 0, false);
- /*
- * Masking at hardware-level is performed on demand,
- * i.e. when an IRQ actually arrives at the host.
- */
- } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- /*
- * Unmask the IRQ line if required. Unmasking at
- * device level will be performed by user space.
- */
- spin_lock_irq(&match->intx_lock);
- if (match->host_irq_disabled) {
- enable_irq(match->host_irq);
- match->host_irq_disabled = false;
- }
- spin_unlock_irq(&match->intx_lock);
- }
- }
-
- spin_unlock(&match->intx_mask_lock);
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
- int r;
-
- switch (ioctl) {
- case KVM_ASSIGN_PCI_DEVICE: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_IRQ: {
- r = -EOPNOTSUPP;
- break;
- }
- case KVM_ASSIGN_DEV_IRQ: {
- struct kvm_assigned_irq assigned_irq;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
- goto out;
- r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
- if (r)
- goto out;
- break;
- }
- case KVM_DEASSIGN_DEV_IRQ: {
- struct kvm_assigned_irq assigned_irq;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
- goto out;
- r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
- if (r)
- goto out;
- break;
- }
- case KVM_DEASSIGN_PCI_DEVICE: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_MSIX_NR: {
- struct kvm_assigned_msix_nr entry_nr;
- r = -EFAULT;
- if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
- goto out;
- r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_MSIX_ENTRY: {
- struct kvm_assigned_msix_entry entry;
- r = -EFAULT;
- if (copy_from_user(&entry, argp, sizeof entry))
- goto out;
- r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_INTX_MASK: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
- break;
- }
- default:
- r = -ENOTTY;
- break;
- }
-out:
- return r;
-}
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
deleted file mode 100644
index a428c1a211b2..000000000000
--- a/arch/x86/kvm/assigned-dev.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
-#define ARCH_X86_KVM_ASSIGNED_DEV_H
-
-#include <linux/kvm_host.h>
-
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
-int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
-
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-#else
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
- return 0;
-}
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg)
-{
- return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
-
-#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index efde6cc50875..a181ae76c71c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -876,6 +876,9 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
u32 eax, ebx, ecx, edx;
+ if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
+ return 1;
+
eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 35058c2c0eea..a6fd40aade7c 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -205,4 +205,15 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
return x86_stepping(best->eax);
}
+static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
+}
+
+static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.msr_misc_features_enables &
+ MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
+}
+
#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 45c7306c8780..c25cfaf584e7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2547,7 +2547,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
u64 smbase;
int ret;
- if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
/*
@@ -2596,11 +2596,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_UNHANDLEABLE;
}
- if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
ctxt->ops->set_nmi_mask(ctxt, false);
- ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
- ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
+ ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
+ ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
return X86EMUL_CONTINUE;
}
@@ -3854,6 +3854,13 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
{
u32 eax, ebx, ecx, edx;
+ u64 msr = 0;
+
+ ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
+ if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
+ ctxt->ops->cpl(ctxt)) {
+ return emulate_gp(ctxt, 0);
+ }
eax = reg_read(ctxt, VCPU_REGS_RAX);
ecx = reg_read(ctxt, VCPU_REGS_RCX);
@@ -5316,6 +5323,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
const struct x86_emulate_ops *ops = ctxt->ops;
int rc = X86EMUL_CONTINUE;
int saved_dst_type = ctxt->dst.type;
+ unsigned emul_flags;
ctxt->mem_read.pos = 0;
@@ -5330,6 +5338,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
+ emul_flags = ctxt->ops->get_hflags(ctxt);
if (unlikely(ctxt->d &
(No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
@@ -5363,7 +5372,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
fetch_possible_mmx_operand(ctxt, &ctxt->dst);
}
- if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
+ if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_PRE_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5392,7 +5401,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
- if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
+ if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5446,7 +5455,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
special_insn:
- if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
+ if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_MEMACCESS);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 047b17a26269..bdcd4139eca9 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -49,7 +49,7 @@ static void pic_unlock(struct kvm_pic *s)
__releases(&s->lock)
{
bool wakeup = s->wakeup_needed;
- struct kvm_vcpu *vcpu, *found = NULL;
+ struct kvm_vcpu *vcpu;
int i;
s->wakeup_needed = false;
@@ -59,16 +59,11 @@ static void pic_unlock(struct kvm_pic *s)
if (wakeup) {
kvm_for_each_vcpu(i, vcpu, s->kvm) {
if (kvm_apic_accept_pic_intr(vcpu)) {
- found = vcpu;
- break;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ return;
}
}
-
- if (!found)
- return;
-
- kvm_make_request(KVM_REQ_EVENT, found);
- kvm_vcpu_kick(found);
}
}
@@ -239,7 +234,7 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq)
int kvm_pic_read_irq(struct kvm *kvm)
{
int irq, irq2, intno;
- struct kvm_pic *s = pic_irqchip(kvm);
+ struct kvm_pic *s = kvm->arch.vpic;
s->output = 0;
@@ -273,7 +268,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
return intno;
}
-void kvm_pic_reset(struct kvm_kpic_state *s)
+static void kvm_pic_reset(struct kvm_kpic_state *s)
{
int irq, i;
struct kvm_vcpu *vcpu;
@@ -422,19 +417,16 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
return ret;
}
-static u32 pic_ioport_read(void *opaque, u32 addr1)
+static u32 pic_ioport_read(void *opaque, u32 addr)
{
struct kvm_kpic_state *s = opaque;
- unsigned int addr;
int ret;
- addr = addr1;
- addr &= 1;
if (s->poll) {
- ret = pic_poll_read(s, addr1);
+ ret = pic_poll_read(s, addr);
s->poll = 0;
} else
- if (addr == 0)
+ if ((addr & 1) == 0)
if (s->read_reg_select)
ret = s->isr;
else
@@ -456,76 +448,64 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
return s->elcr;
}
-static int picdev_in_range(gpa_t addr)
-{
- switch (addr) {
- case 0x20:
- case 0x21:
- case 0xa0:
- case 0xa1:
- case 0x4d0:
- case 0x4d1:
- return 1;
- default:
- return 0;
- }
-}
-
static int picdev_write(struct kvm_pic *s,
gpa_t addr, int len, const void *val)
{
unsigned char data = *(unsigned char *)val;
- if (!picdev_in_range(addr))
- return -EOPNOTSUPP;
if (len != 1) {
pr_pic_unimpl("non byte write\n");
return 0;
}
- pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
+ pic_lock(s);
pic_ioport_write(&s->pics[addr >> 7], addr, data);
+ pic_unlock(s);
break;
case 0x4d0:
case 0x4d1:
+ pic_lock(s);
elcr_ioport_write(&s->pics[addr & 1], addr, data);
+ pic_unlock(s);
break;
+ default:
+ return -EOPNOTSUPP;
}
- pic_unlock(s);
return 0;
}
static int picdev_read(struct kvm_pic *s,
gpa_t addr, int len, void *val)
{
- unsigned char data = 0;
- if (!picdev_in_range(addr))
- return -EOPNOTSUPP;
+ unsigned char *data = (unsigned char *)val;
if (len != 1) {
memset(val, 0, len);
pr_pic_unimpl("non byte read\n");
return 0;
}
- pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
- data = pic_ioport_read(&s->pics[addr >> 7], addr);
+ pic_lock(s);
+ *data = pic_ioport_read(&s->pics[addr >> 7], addr);
+ pic_unlock(s);
break;
case 0x4d0:
case 0x4d1:
- data = elcr_ioport_read(&s->pics[addr & 1], addr);
+ pic_lock(s);
+ *data = elcr_ioport_read(&s->pics[addr & 1], addr);
+ pic_unlock(s);
break;
+ default:
+ return -EOPNOTSUPP;
}
- *(unsigned char *)val = data;
- pic_unlock(s);
return 0;
}
@@ -576,7 +556,7 @@ static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
*/
static void pic_irq_request(struct kvm *kvm, int level)
{
- struct kvm_pic *s = pic_irqchip(kvm);
+ struct kvm_pic *s = kvm->arch.vpic;
if (!s->output)
s->wakeup_needed = true;
@@ -660,9 +640,11 @@ void kvm_pic_destroy(struct kvm *kvm)
if (!vpic)
return;
+ mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+ mutex_unlock(&kvm->slots_lock);
kvm->arch.vpic = NULL;
kfree(vpic);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 289270a6aecb..bdff437acbcb 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -266,11 +266,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
spin_unlock(&ioapic->lock);
}
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
- struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-
- if (!ioapic)
+ if (!ioapic_in_kernel(kvm))
return;
kvm_make_scan_ioapic_request(kvm);
}
@@ -315,7 +313,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
&& ioapic->irr & (1 << index))
ioapic_service(ioapic, index, false);
- kvm_vcpu_request_scan_ioapic(ioapic->kvm);
+ kvm_make_scan_ioapic_request(ioapic->kvm);
break;
}
}
@@ -624,10 +622,8 @@ int kvm_ioapic_init(struct kvm *kvm)
if (ret < 0) {
kvm->arch.vioapic = NULL;
kfree(ioapic);
- return ret;
}
- kvm_vcpu_request_scan_ioapic(kvm);
return ret;
}
@@ -639,36 +635,32 @@ void kvm_ioapic_destroy(struct kvm *kvm)
return;
cancel_delayed_work_sync(&ioapic->eoi_inject);
+ mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ mutex_unlock(&kvm->slots_lock);
kvm->arch.vioapic = NULL;
kfree(ioapic);
}
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
- struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
- if (!ioapic)
- return -EINVAL;
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
spin_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
state->irr &= ~ioapic->irr_delivered;
spin_unlock(&ioapic->lock);
- return 0;
}
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
- struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
- if (!ioapic)
- return -EINVAL;
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
ioapic->irr = 0;
ioapic->irr_delivered = 0;
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_make_scan_ioapic_request(kvm);
kvm_ioapic_inject_all(ioapic, state->irr);
spin_unlock(&ioapic->lock);
- return 0;
}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 1cc6e54436db..29ce19732ccf 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -105,17 +105,13 @@ do { \
#define ASSERT(x) do { } while (0)
#endif
-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
-{
- return kvm->arch.vioapic;
-}
-
static inline int ioapic_in_kernel(struct kvm *kvm)
{
- int ret;
+ int mode = kvm->arch.irqchip_mode;
- ret = (ioapic_irqchip(kvm) != NULL);
- return ret;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
@@ -132,8 +128,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq,
struct dest_map *dest_map);
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
ulong *ioapic_handled_vectors);
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
deleted file mode 100644
index b181426f67b4..000000000000
--- a/arch/x86/kvm/iommu.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * Author: Allen M. Kay <allen.m.kay@intel.com>
- * Author: Weidong Han <weidong.han@intel.com>
- * Author: Ben-Ami Yassour <benami@il.ibm.com>
- */
-
-#include <linux/list.h>
-#include <linux/kvm_host.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/stat.h>
-#include <linux/iommu.h>
-#include "assigned-dev.h"
-
-static bool allow_unsafe_assigned_interrupts;
-module_param_named(allow_unsafe_assigned_interrupts,
- allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
- "Enable device assignment on platforms without interrupt remapping support.");
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm);
-static void kvm_iommu_put_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages);
-
-static kvm_pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
- unsigned long npages)
-{
- gfn_t end_gfn;
- kvm_pfn_t pfn;
-
- pfn = gfn_to_pfn_memslot(slot, gfn);
- end_gfn = gfn + npages;
- gfn += 1;
-
- if (is_error_noslot_pfn(pfn))
- return pfn;
-
- while (gfn < end_gfn)
- gfn_to_pfn_memslot(slot, gfn++);
-
- return pfn;
-}
-
-static void kvm_unpin_pages(struct kvm *kvm, kvm_pfn_t pfn,
- unsigned long npages)
-{
- unsigned long i;
-
- for (i = 0; i < npages; ++i)
- kvm_release_pfn_clean(pfn + i);
-}
-
-int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
- gfn_t gfn, end_gfn;
- kvm_pfn_t pfn;
- int r = 0;
- struct iommu_domain *domain = kvm->arch.iommu_domain;
- int flags;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- gfn = slot->base_gfn;
- end_gfn = gfn + slot->npages;
-
- flags = IOMMU_READ;
- if (!(slot->flags & KVM_MEM_READONLY))
- flags |= IOMMU_WRITE;
- if (!kvm->arch.iommu_noncoherent)
- flags |= IOMMU_CACHE;
-
-
- while (gfn < end_gfn) {
- unsigned long page_size;
-
- /* Check if already mapped */
- if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
- gfn += 1;
- continue;
- }
-
- /* Get the page size we could use to map */
- page_size = kvm_host_page_size(kvm, gfn);
-
- /* Make sure the page_size does not exceed the memslot */
- while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
- page_size >>= 1;
-
- /* Make sure gfn is aligned to the page size we want to map */
- while ((gfn << PAGE_SHIFT) & (page_size - 1))
- page_size >>= 1;
-
- /* Make sure hva is aligned to the page size we want to map */
- while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
- page_size >>= 1;
-
- /*
- * Pin all pages we are about to map in memory. This is
- * important because we unmap and unpin in 4kb steps later.
- */
- pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
- if (is_error_noslot_pfn(pfn)) {
- gfn += 1;
- continue;
- }
-
- /* Map into IO address space */
- r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
- page_size, flags);
- if (r) {
- printk(KERN_ERR "kvm_iommu_map_address:"
- "iommu failed to map pfn=%llx\n", pfn);
- kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
- goto unmap_pages;
- }
-
- gfn += page_size >> PAGE_SHIFT;
-
- cond_resched();
- }
-
- return 0;
-
-unmap_pages:
- kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
- return r;
-}
-
-static int kvm_iommu_map_memslots(struct kvm *kvm)
-{
- int idx, r = 0;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- if (kvm->arch.iommu_noncoherent)
- kvm_arch_register_noncoherent_dma(kvm);
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
-
- kvm_for_each_memslot(memslot, slots) {
- r = kvm_iommu_map_pages(kvm, memslot);
- if (r)
- break;
- }
- srcu_read_unlock(&kvm->srcu, idx);
-
- return r;
-}
-
-int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
- int r;
- bool noncoherent;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- if (pdev == NULL)
- return -ENODEV;
-
- r = iommu_attach_device(domain, &pdev->dev);
- if (r) {
- dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
- return r;
- }
-
- noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
-
- /* Check if need to update IOMMU page table for guest memory */
- if (noncoherent != kvm->arch.iommu_noncoherent) {
- kvm_iommu_unmap_memslots(kvm);
- kvm->arch.iommu_noncoherent = noncoherent;
- r = kvm_iommu_map_memslots(kvm);
- if (r)
- goto out_unmap;
- }
-
- kvm_arch_start_assignment(kvm);
- pci_set_dev_assigned(pdev);
-
- dev_info(&pdev->dev, "kvm assign device\n");
-
- return 0;
-out_unmap:
- kvm_iommu_unmap_memslots(kvm);
- return r;
-}
-
-int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- if (pdev == NULL)
- return -ENODEV;
-
- iommu_detach_device(domain, &pdev->dev);
-
- pci_clear_dev_assigned(pdev);
- kvm_arch_end_assignment(kvm);
-
- dev_info(&pdev->dev, "kvm deassign device\n");
-
- return 0;
-}
-
-int kvm_iommu_map_guest(struct kvm *kvm)
-{
- int r;
-
- if (!iommu_present(&pci_bus_type)) {
- printk(KERN_ERR "%s: iommu not found\n", __func__);
- return -ENODEV;
- }
-
- mutex_lock(&kvm->slots_lock);
-
- kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
- if (!kvm->arch.iommu_domain) {
- r = -ENOMEM;
- goto out_unlock;
- }
-
- if (!allow_unsafe_assigned_interrupts &&
- !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
- printk(KERN_WARNING "%s: No interrupt remapping support,"
- " disallowing device assignment."
- " Re-enable with \"allow_unsafe_assigned_interrupts=1\""
- " module option.\n", __func__);
- iommu_domain_free(kvm->arch.iommu_domain);
- kvm->arch.iommu_domain = NULL;
- r = -EPERM;
- goto out_unlock;
- }
-
- r = kvm_iommu_map_memslots(kvm);
- if (r)
- kvm_iommu_unmap_memslots(kvm);
-
-out_unlock:
- mutex_unlock(&kvm->slots_lock);
- return r;
-}
-
-static void kvm_iommu_put_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages)
-{
- struct iommu_domain *domain;
- gfn_t end_gfn, gfn;
- kvm_pfn_t pfn;
- u64 phys;
-
- domain = kvm->arch.iommu_domain;
- end_gfn = base_gfn + npages;
- gfn = base_gfn;
-
- /* check if iommu exists and in use */
- if (!domain)
- return;
-
- while (gfn < end_gfn) {
- unsigned long unmap_pages;
- size_t size;
-
- /* Get physical address */
- phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
-
- if (!phys) {
- gfn++;
- continue;
- }
-
- pfn = phys >> PAGE_SHIFT;
-
- /* Unmap address from IO address space */
- size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
- unmap_pages = 1ULL << get_order(size);
-
- /* Unpin all pages we just unmapped to not leak any memory */
- kvm_unpin_pages(kvm, pfn, unmap_pages);
-
- gfn += unmap_pages;
-
- cond_resched();
- }
-}
-
-void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
- kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
-}
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm)
-{
- int idx;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
-
- kvm_for_each_memslot(memslot, slots)
- kvm_iommu_unmap_pages(kvm, memslot);
-
- srcu_read_unlock(&kvm->srcu, idx);
-
- if (kvm->arch.iommu_noncoherent)
- kvm_arch_unregister_noncoherent_dma(kvm);
-
- return 0;
-}
-
-int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- mutex_lock(&kvm->slots_lock);
- kvm_iommu_unmap_memslots(kvm);
- kvm->arch.iommu_domain = NULL;
- kvm->arch.iommu_noncoherent = false;
- mutex_unlock(&kvm->slots_lock);
-
- iommu_domain_free(domain);
- return 0;
-}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 60d91c9d160c..5c24811e8b0b 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -60,7 +60,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
if (irqchip_split(v->kvm))
return pending_userspace_extint(v);
else
- return pic_irqchip(v->kvm)->output;
+ return v->kvm->arch.vpic->output;
} else
return 0;
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 40d5b2cf6061..d5005cc26521 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -78,40 +78,42 @@ void kvm_pic_destroy(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
-static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
-{
- return kvm->arch.vpic;
-}
-
static inline int pic_in_kernel(struct kvm *kvm)
{
- int ret;
+ int mode = kvm->arch.irqchip_mode;
- ret = (pic_irqchip(kvm) != NULL);
- return ret;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
static inline int irqchip_split(struct kvm *kvm)
{
- return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT;
+ int mode = kvm->arch.irqchip_mode;
+
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_SPLIT;
}
static inline int irqchip_kernel(struct kvm *kvm)
{
- return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL;
+ int mode = kvm->arch.irqchip_mode;
+
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
static inline int irqchip_in_kernel(struct kvm *kvm)
{
- bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE;
+ int mode = kvm->arch.irqchip_mode;
- /* Matches with wmb after initializing kvm->irq_routing. */
+ /* Matches smp_wmb() when setting irqchip_mode */
smp_rmb();
- return ret;
+ return mode != KVM_IRQCHIP_NONE;
}
-void kvm_pic_reset(struct kvm_kpic_state *s);
-
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 6825cd36d13b..3cc3b2d130a0 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -42,7 +42,7 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
- struct kvm_pic *pic = pic_irqchip(kvm);
+ struct kvm_pic *pic = kvm->arch.vpic;
return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
}
@@ -232,11 +232,11 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
goto unlock;
}
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
- if (!ioapic_in_kernel(kvm))
+ if (!irqchip_kernel(kvm))
goto unlock;
kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
- kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
+ kvm_pic_clear_all(kvm->arch.vpic, irq_source_id);
unlock:
mutex_unlock(&kvm->irq_lock);
}
@@ -274,42 +274,42 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
srcu_read_unlock(&kvm->irq_srcu, idx);
}
+bool kvm_arch_can_set_irq_routing(struct kvm *kvm)
+{
+ return irqchip_in_kernel(kvm);
+}
+
int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int r = -EINVAL;
- int delta;
- unsigned max_pin;
-
+ /* We can't check irqchip_in_kernel() here as some callers are
+ * currently inititalizing the irqchip. Other callers should therefore
+ * check kvm_arch_can_set_irq_routing() before calling this function.
+ */
switch (ue->type) {
case KVM_IRQ_ROUTING_IRQCHIP:
- delta = 0;
+ if (irqchip_split(kvm))
+ return -EINVAL;
+ e->irqchip.pin = ue->u.irqchip.pin;
switch (ue->u.irqchip.irqchip) {
case KVM_IRQCHIP_PIC_SLAVE:
- delta = 8;
+ e->irqchip.pin += PIC_NUM_PINS / 2;
/* fall through */
case KVM_IRQCHIP_PIC_MASTER:
- if (!pic_in_kernel(kvm))
- goto out;
-
+ if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
+ return -EINVAL;
e->set = kvm_set_pic_irq;
- max_pin = PIC_NUM_PINS;
break;
case KVM_IRQCHIP_IOAPIC:
- if (!ioapic_in_kernel(kvm))
- goto out;
-
- max_pin = KVM_IOAPIC_NUM_PINS;
+ if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS)
+ return -EINVAL;
e->set = kvm_set_ioapic_irq;
break;
default:
- goto out;
+ return -EINVAL;
}
e->irqchip.irqchip = ue->u.irqchip.irqchip;
- e->irqchip.pin = ue->u.irqchip.pin + delta;
- if (e->irqchip.pin >= max_pin)
- goto out;
break;
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
@@ -318,7 +318,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.data = ue->u.msi.data;
if (kvm_msi_route_invalid(kvm, e))
- goto out;
+ return -EINVAL;
break;
case KVM_IRQ_ROUTING_HV_SINT:
e->set = kvm_hv_set_sint;
@@ -326,12 +326,10 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->hv_sint.sint = ue->u.hv_sint.sint;
break;
default:
- goto out;
+ return -EINVAL;
}
- r = 0;
-out:
- return r;
+ return 0;
}
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bad6a25067bc..c329d2894905 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -177,8 +177,8 @@ static void recalculate_apic_map(struct kvm *kvm)
if (kvm_apic_present(vcpu))
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
- new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
- sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
+ new = kvzalloc(sizeof(struct kvm_apic_map) +
+ sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL);
if (!new)
goto out;
@@ -529,14 +529,16 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
{
- return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, &val,
- sizeof(val));
+
+ return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
+ sizeof(val));
}
static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
{
- return kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, val,
- sizeof(*val));
+
+ return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
+ sizeof(*val));
}
static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
@@ -2285,8 +2287,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
- if (kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data,
- sizeof(u32)))
+ if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
+ sizeof(u32)))
return;
apic_set_tpr(vcpu->arch.apic, data & 0xff);
@@ -2338,14 +2340,14 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
max_isr = 0;
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
- kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data,
- sizeof(u32));
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
+ sizeof(u32));
}
int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
{
if (vapic_addr) {
- if (kvm_vcpu_gfn_to_hva_cache_init(vcpu,
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.apic->vapic_cache,
vapic_addr, sizeof(u32)))
return -EINVAL;
@@ -2439,7 +2441,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
vcpu->arch.pv_eoi.msr_val = data;
if (!pv_eoi_enabled(vcpu))
return 0;
- return kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.pv_eoi.data,
+ return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
addr, sizeof(u8));
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac7810513d0e..5d3376f67794 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1498,6 +1498,21 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
+/**
+ * kvm_arch_write_log_dirty - emulate dirty page logging
+ * @vcpu: Guest mode vcpu
+ *
+ * Emulate arch specific page modification logging for the
+ * nested hypervisor
+ */
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
+{
+ if (kvm_x86_ops->write_log_dirty)
+ return kvm_x86_ops->write_log_dirty(vcpu);
+
+ return 0;
+}
+
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn)
{
@@ -4340,7 +4355,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
-void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
+ bool accessed_dirty)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
@@ -4349,6 +4365,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
context->nx = true;
+ context->ept_ad = accessed_dirty;
context->page_fault = ept_page_fault;
context->gva_to_gpa = ept_gva_to_gpa;
context->sync_page = ept_sync_page;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index ddc56e91f2e4..27975807cc64 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -74,7 +74,8 @@ enum {
int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
-void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
+ bool accessed_dirty);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
@@ -201,4 +202,5 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 60168cdd0546..ea67dc876316 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -40,8 +40,8 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
int i;
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
- slot->arch.gfn_track[i] = kvm_kvzalloc(npages *
- sizeof(*slot->arch.gfn_track[i]));
+ slot->arch.gfn_track[i] = kvzalloc(npages *
+ sizeof(*slot->arch.gfn_track[i]), GFP_KERNEL);
if (!slot->arch.gfn_track[i])
goto track_free;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a01105485315..56241746abbd 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -23,13 +23,6 @@
* so the code in this file is compiled twice, once per pte size.
*/
-/*
- * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro
- * uses for EPT without A/D paging type.
- */
-extern u64 __pure __using_nonexistent_pte_bit(void)
- __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT");
-
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
@@ -39,10 +32,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
- #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
- #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) true
#ifdef CONFIG_X86_64
#define PT_MAX_FULL_LEVELS 4
#define CMPXCHG cmpxchg
@@ -60,10 +52,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
#define PT_LEVEL_BITS PT32_LEVEL_BITS
#define PT_MAX_FULL_LEVELS 2
- #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
- #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) true
#define CMPXCHG cmpxchg
#elif PTTYPE == PTTYPE_EPT
#define pt_element_t u64
@@ -74,16 +65,18 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
- #define PT_GUEST_ACCESSED_MASK 0
- #define PT_GUEST_DIRTY_MASK 0
- #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit()
- #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit()
+ #define PT_GUEST_DIRTY_SHIFT 9
+ #define PT_GUEST_ACCESSED_SHIFT 8
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
#define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 4
#else
#error Invalid PTTYPE value
#endif
+#define PT_GUEST_DIRTY_MASK (1 << PT_GUEST_DIRTY_SHIFT)
+#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT)
+
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
@@ -111,12 +104,13 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
}
-static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
+static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
+ unsigned gpte)
{
unsigned mask;
/* dirty bit is not supported, so no need to track it */
- if (!PT_GUEST_DIRTY_MASK)
+ if (!PT_HAVE_ACCESSED_DIRTY(mmu))
return;
BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
@@ -171,7 +165,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
goto no_present;
/* if accessed bit is not supported prefetch non accessed gpte */
- if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK))
+ if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK))
goto no_present;
return false;
@@ -217,7 +211,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
int ret;
/* dirty/accessed bits are not supported, so no need to update them */
- if (!PT_GUEST_DIRTY_MASK)
+ if (!PT_HAVE_ACCESSED_DIRTY(mmu))
return 0;
for (level = walker->max_level; level >= walker->level; --level) {
@@ -232,6 +226,10 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
if (level == walker->level && write_fault &&
!(pte & PT_GUEST_DIRTY_MASK)) {
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
+#if PTTYPE == PTTYPE_EPT
+ if (kvm_arch_write_log_dirty(vcpu))
+ return -EINVAL;
+#endif
pte |= PT_GUEST_DIRTY_MASK;
}
if (pte == orig_pte)
@@ -286,7 +284,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
+ unsigned nested_access;
gpa_t pte_gpa;
+ bool have_ad;
int offset;
const int write_fault = access & PFERR_WRITE_MASK;
const int user_fault = access & PFERR_USER_MASK;
@@ -299,6 +299,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
retry_walk:
walker->level = mmu->root_level;
pte = mmu->get_cr3(vcpu);
+ have_ad = PT_HAVE_ACCESSED_DIRTY(mmu);
#if PTTYPE == 64
if (walker->level == PT32E_ROOT_LEVEL) {
@@ -312,7 +313,15 @@ retry_walk:
walker->max_level = walker->level;
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
- accessed_dirty = PT_GUEST_ACCESSED_MASK;
+ accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
+
+ /*
+ * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
+ * by the MOV to CR instruction are treated as reads and do not cause the
+ * processor to set the dirty flag in any EPT paging-structure entry.
+ */
+ nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
+
pt_access = pte_access = ACC_ALL;
++walker->level;
@@ -332,7 +341,7 @@ retry_walk:
walker->pte_gpa[walker->level - 1] = pte_gpa;
real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
- PFERR_USER_MASK|PFERR_WRITE_MASK,
+ nested_access,
&walker->fault);
/*
@@ -394,7 +403,7 @@ retry_walk:
walker->gfn = real_gpa >> PAGE_SHIFT;
if (!write_fault)
- FNAME(protect_clean_gpte)(&pte_access, pte);
+ FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
else
/*
* On a write fault, fold the dirty bit into accessed_dirty.
@@ -485,7 +494,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
- FNAME(protect_clean_gpte)(&pte_access, gpte);
+ FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
no_dirty_log && (pte_access & ACC_WRITE_MASK));
if (is_error_pfn(pfn))
@@ -979,7 +988,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access;
pte_access &= FNAME(gpte_access)(vcpu, gpte);
- FNAME(protect_clean_gpte)(&pte_access, gpte);
+ FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
&nr_present))
@@ -1025,3 +1034,4 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
#undef PT_GUEST_DIRTY_MASK
#undef PT_GUEST_DIRTY_SHIFT
#undef PT_GUEST_ACCESSED_SHIFT
+#undef PT_HAVE_ACCESSED_DIRTY
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5f48f62b8dc2..c27ac6923a18 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1196,10 +1196,13 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_CLGI);
set_intercept(svm, INTERCEPT_SKINIT);
set_intercept(svm, INTERCEPT_WBINVD);
- set_intercept(svm, INTERCEPT_MONITOR);
- set_intercept(svm, INTERCEPT_MWAIT);
set_intercept(svm, INTERCEPT_XSETBV);
+ if (!kvm_mwait_in_guest()) {
+ set_intercept(svm, INTERCEPT_MONITOR);
+ set_intercept(svm, INTERCEPT_MWAIT);
+ }
+
control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = __pa(svm->msrpm);
control->int_ctl = V_INTR_MASKING_MASK;
@@ -5254,6 +5257,12 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
avic_handle_ldr_update(vcpu);
}
+static void svm_setup_mce(struct kvm_vcpu *vcpu)
+{
+ /* [63:9] are reserved. */
+ vcpu->arch.mcg_cap &= 0x1ff;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -5365,6 +5374,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.pmu_ops = &amd_pmu_ops,
.deliver_posted_interrupt = svm_deliver_avic_intr,
.update_pi_irte = svm_update_pi_irte,
+ .setup_mce = svm_setup_mce,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1a471e5f963f..c6f4ad44aa95 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
-static bool __read_mostly vmm_exclusive = 1;
-module_param(vmm_exclusive, bool, S_IRUGO);
-
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
@@ -251,6 +248,7 @@ struct __packed vmcs12 {
u64 xss_exit_bitmap;
u64 guest_physical_address;
u64 vmcs_link_pointer;
+ u64 pml_address;
u64 guest_ia32_debugctl;
u64 guest_ia32_pat;
u64 guest_ia32_efer;
@@ -372,6 +370,7 @@ struct __packed vmcs12 {
u16 guest_ldtr_selector;
u16 guest_tr_selector;
u16 guest_intr_status;
+ u16 guest_pml_index;
u16 host_es_selector;
u16 host_cs_selector;
u16 host_ss_selector;
@@ -410,6 +409,7 @@ struct nested_vmx {
/* Has the level1 guest done vmxon? */
bool vmxon;
gpa_t vmxon_ptr;
+ bool pml_full;
/* The guest-physical address of the current VMCS L1 keeps for L2 */
gpa_t current_vmptr;
@@ -615,10 +615,6 @@ struct vcpu_vmx {
int vpid;
bool emulation_required;
- /* Support for vnmi-less CPUs */
- int soft_vnmi_blocked;
- ktime_t entry_time;
- s64 vnmi_blocked_time;
u32 exit_reason;
/* Posted interrupt descriptor */
@@ -749,6 +745,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
FIELD(GUEST_INTR_STATUS, guest_intr_status),
+ FIELD(GUEST_PML_INDEX, guest_pml_index),
FIELD(HOST_ES_SELECTOR, host_es_selector),
FIELD(HOST_CS_SELECTOR, host_cs_selector),
FIELD(HOST_SS_SELECTOR, host_ss_selector),
@@ -774,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
+ FIELD64(PML_ADDRESS, pml_address),
FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
@@ -914,8 +912,6 @@ static void nested_release_page_clean(struct page *page)
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
-static void kvm_cpu_vmxon(u64 addr);
-static void kvm_cpu_vmxoff(void);
static bool vmx_xsaves_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -1289,11 +1285,6 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}
-static inline bool cpu_has_virtual_nmis(void)
-{
- return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
-}
-
static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1328,6 +1319,11 @@ static inline bool report_flexpriority(void)
return flexpriority_enabled;
}
+static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
+{
+ return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
+}
+
static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
{
return vmcs12->cpu_based_vm_exec_control & bit;
@@ -1362,6 +1358,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
vmx_xsaves_supported();
}
+static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
+}
+
static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -2238,15 +2239,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(phys_addr);
- else if (!already_loaded)
- loaded_vmcs_clear(vmx->loaded_vmcs);
-
if (!already_loaded) {
+ loaded_vmcs_clear(vmx->loaded_vmcs);
local_irq_disable();
crash_disable_local_vmclear(cpu);
@@ -2324,11 +2320,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
vmx_vcpu_pi_put(vcpu);
__vmx_load_host_state(to_vmx(vcpu));
- if (!vmm_exclusive) {
- __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
- vcpu->cpu = -1;
- kvm_cpu_vmxoff();
- }
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
@@ -2752,6 +2743,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high);
vmx->nested.nested_vmx_secondary_ctls_low = 0;
vmx->nested.nested_vmx_secondary_ctls_high &=
+ SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED |
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_DESC |
@@ -2766,14 +2758,19 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT;
vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
- VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
- VMX_EPT_INVEPT_BIT;
+ VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
if (cpu_has_vmx_ept_execute_only())
vmx->nested.nested_vmx_ept_caps |=
VMX_EPT_EXECUTE_ONLY_BIT;
vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
- VMX_EPT_EXTENT_CONTEXT_BIT;
+ VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
+ VMX_EPT_1GB_PAGE_BIT;
+ if (enable_ept_ad_bits) {
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_ENABLE_PML;
+ vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+ }
} else
vmx->nested.nested_vmx_ept_caps = 0;
@@ -3420,6 +3417,7 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ cr4_set_bits(X86_CR4_VMXE);
intel_pt_handle_vmx(1);
asm volatile (ASM_VMX_VMXON_RAX
@@ -3462,12 +3460,8 @@ static int hardware_enable(void)
/* enable and lock */
wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
}
- cr4_set_bits(X86_CR4_VMXE);
-
- if (vmm_exclusive) {
- kvm_cpu_vmxon(phys_addr);
- ept_sync_global();
- }
+ kvm_cpu_vmxon(phys_addr);
+ ept_sync_global();
return 0;
}
@@ -3491,15 +3485,13 @@ static void kvm_cpu_vmxoff(void)
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
intel_pt_handle_vmx(0);
+ cr4_clear_bits(X86_CR4_VMXE);
}
static void hardware_disable(void)
{
- if (vmm_exclusive) {
- vmclear_local_loaded_vmcss();
- kvm_cpu_vmxoff();
- }
- cr4_clear_bits(X86_CR4_VMXE);
+ vmclear_local_loaded_vmcss();
+ kvm_cpu_vmxoff();
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -3549,11 +3541,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_USE_IO_BITMAPS |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
- CPU_BASED_MWAIT_EXITING |
- CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING |
CPU_BASED_RDPMC_EXITING;
+ if (!kvm_mwait_in_guest())
+ min |= CPU_BASED_MWAIT_EXITING |
+ CPU_BASED_MONITOR_EXITING;
+
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -3619,9 +3613,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
- PIN_BASED_VMX_PREEMPTION_TIMER;
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
+ PIN_BASED_VIRTUAL_NMIS;
+ opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@@ -4013,11 +4007,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
{
- vpid_sync_context(vpid);
if (enable_ept) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
+ } else {
+ vpid_sync_context(vpid);
}
}
@@ -5293,8 +5288,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->rmode.vm86_active = 0;
- vmx->soft_vnmi_blocked = 0;
-
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vcpu, 0);
@@ -5414,8 +5407,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
- if (!cpu_has_virtual_nmis() ||
- vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
@@ -5456,19 +5448,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!is_guest_mode(vcpu)) {
- if (!cpu_has_virtual_nmis()) {
- /*
- * Tracking the NMI-blocked state in software is built upon
- * finding the next open IRQ window. This, in turn, depends on
- * well-behaving guests: They have to keep IRQs disabled at
- * least as long as the NMI handler runs. Otherwise we may
- * cause NMI nesting, maybe breaking the guest. But as this is
- * highly unlikely, we can live with the residual risk.
- */
- vmx->soft_vnmi_blocked = 1;
- vmx->vnmi_blocked_time = 0;
- }
-
++vcpu->stat.nmi_injections;
vmx->nmi_known_unmasked = false;
}
@@ -5485,8 +5464,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
{
- if (!cpu_has_virtual_nmis())
- return to_vmx(vcpu)->soft_vnmi_blocked;
if (to_vmx(vcpu)->nmi_known_unmasked)
return false;
return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5496,20 +5473,13 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!cpu_has_virtual_nmis()) {
- if (vmx->soft_vnmi_blocked != masked) {
- vmx->soft_vnmi_blocked = masked;
- vmx->vnmi_blocked_time = 0;
- }
- } else {
- vmx->nmi_known_unmasked = !masked;
- if (masked)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- }
+ vmx->nmi_known_unmasked = !masked;
+ if (masked)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5517,9 +5487,6 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
- if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
- return 0;
-
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
@@ -6240,21 +6207,18 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
unsigned long exit_qualification;
gpa_t gpa;
u32 error_code;
- int gla_validity;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- gla_validity = (exit_qualification >> 7) & 0x3;
- if (gla_validity == 0x2) {
- printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
- printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
- (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
- vmcs_readl(GUEST_LINEAR_ADDRESS));
- printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
- (long unsigned int)exit_qualification);
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
- vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
- return 0;
+ if (is_guest_mode(vcpu)
+ && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) {
+ /*
+ * Fix up exit_qualification according to whether guest
+ * page table accesses are reads or writes.
+ */
+ u64 eptp = nested_ept_get_cr3(vcpu);
+ if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
+ exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
}
/*
@@ -6264,7 +6228,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
- cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@@ -6350,7 +6313,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (intr_window_requested && vmx_interrupt_allowed(vcpu))
return handle_interrupt_window(&vmx->vcpu);
- if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
+ if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
@@ -7103,34 +7066,24 @@ out_msr_bitmap:
static int handle_vmon(struct kvm_vcpu *vcpu)
{
int ret;
- struct kvm_segment cs;
struct vcpu_vmx *vmx = to_vmx(vcpu);
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
- /* The Intel VMX Instruction Reference lists a bunch of bits that
- * are prerequisite to running VMXON, most notably cr4.VMXE must be
- * set to 1 (see vmx_set_cr4() for when we allow the guest to set this).
- * Otherwise, we should fail with #UD. We test these now:
+ /*
+ * The Intel VMX Instruction Reference lists a bunch of bits that are
+ * prerequisite to running VMXON, most notably cr4.VMXE must be set to
+ * 1 (see vmx_set_cr4() for when we allow the guest to set this).
+ * Otherwise, we should fail with #UD. But most faulting conditions
+ * have already been checked by hardware, prior to the VM-exit for
+ * VMXON. We do test guest cr4.VMXE because processor CR4 always has
+ * that bit set to 1 in non-root mode.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE) ||
- !kvm_read_cr0_bits(vcpu, X86_CR0_PE) ||
- (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
+ if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
- if (is_long_mode(vcpu) && !cs.l) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
-
- if (vmx_get_cpl(vcpu)) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
-
if (vmx->nested.vmxon) {
nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
return kvm_skip_emulated_instruction(vcpu);
@@ -7157,29 +7110,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
* Intel's VMX Instruction Reference specifies a common set of prerequisites
* for running VMX instructions (except VMXON, whose prerequisites are
* slightly different). It also specifies what exception to inject otherwise.
+ * Note that many of these exceptions have priority over VM exits, so they
+ * don't have to be checked again here.
*/
static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
{
- struct kvm_segment cs;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (!vmx->nested.vmxon) {
+ if (!to_vmx(vcpu)->nested.vmxon) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 0;
}
-
- vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
- if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) ||
- (is_long_mode(vcpu) && !cs.l)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 0;
- }
-
- if (vmx_get_cpl(vcpu)) {
- kvm_inject_gp(vcpu, 0);
- return 0;
- }
-
return 1;
}
@@ -7523,7 +7462,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
if (get_vmx_mem_address(vcpu, exit_qualification,
vmx_instruction_info, true, &gva))
return 1;
- /* _system ok, as nested_vmx_check_permission verified cpl=0 */
+ /* _system ok, as hardware has verified cpl=0 */
kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
&field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
}
@@ -7656,7 +7595,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
if (get_vmx_mem_address(vcpu, exit_qualification,
vmx_instruction_info, true, &vmcs_gva))
return 1;
- /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
+ /* ok to use *_system, as hardware has verified cpl=0 */
if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
(void *)&to_vmx(vcpu)->nested.current_vmptr,
sizeof(u64), &e)) {
@@ -7689,11 +7628,6 @@ static int handle_invept(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
-
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
@@ -7815,7 +7749,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
- cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -8117,6 +8050,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_RDPMC:
return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
+ case EXIT_REASON_RDRAND:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND);
+ case EXIT_REASON_RDSEED:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED);
case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
@@ -8195,7 +8132,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_PREEMPTION_TIMER:
return false;
case EXIT_REASON_PML_FULL:
- /* We don't expose PML support to L1. */
+ /* We emulate PML support to L1. */
return false;
default:
return true;
@@ -8490,26 +8427,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
- !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
- get_vmcs12(vcpu))))) {
- if (vmx_interrupt_allowed(vcpu)) {
- vmx->soft_vnmi_blocked = 0;
- } else if (vmx->vnmi_blocked_time > 1000000000LL &&
- vcpu->arch.nmi_pending) {
- /*
- * This CPU don't support us in finding the end of an
- * NMI-blocked window if the guest runs with IRQs
- * disabled. So we pull the trigger after 1 s of
- * futile waiting, but inform the user about this.
- */
- printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
- "state on VCPU %d after 1 s timeout\n",
- __func__, vcpu->vcpu_id);
- vmx->soft_vnmi_blocked = 0;
- }
- }
-
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
@@ -8785,37 +8702,33 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
- if (cpu_has_virtual_nmis()) {
- if (vmx->nmi_known_unmasked)
- return;
- /*
- * Can't use vmx->exit_intr_info since we're not sure what
- * the exit reason is.
- */
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- /*
- * SDM 3: 27.7.1.2 (September 2008)
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
- * a guest IRET fault.
- * SDM 3: 23.2.2 (September 2008)
- * Bit 12 is undefined in any of the following cases:
- * If the VM exit sets the valid bit in the IDT-vectoring
- * information field.
- * If the VM exit is due to a double fault.
- */
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
- vector != DF_VECTOR && !idtv_info_valid)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmx->nmi_known_unmasked =
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
- & GUEST_INTR_STATE_NMI);
- } else if (unlikely(vmx->soft_vnmi_blocked))
- vmx->vnmi_blocked_time +=
- ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
+ if (vmx->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ /*
+ * SDM 3: 27.7.1.2 (September 2008)
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ * a guest IRET fault.
+ * SDM 3: 23.2.2 (September 2008)
+ * Bit 12 is undefined in any of the following cases:
+ * If the VM exit sets the valid bit in the IDT-vectoring
+ * information field.
+ * If the VM exit is due to a double fault.
+ */
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ vector != DF_VECTOR && !idtv_info_valid)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmx->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
}
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -8932,10 +8845,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr4;
- /* Record the guest's net vcpu time for enforced NMI injections. */
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
- vmx->entry_time = ktime_get();
-
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
@@ -9143,16 +9052,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_complete_interrupts(vmx);
}
-static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
+static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int cpu;
- if (vmx->loaded_vmcs == &vmx->vmcs01)
+ if (vmx->loaded_vmcs == vmcs)
return;
cpu = get_cpu();
- vmx->loaded_vmcs = &vmx->vmcs01;
+ vmx->loaded_vmcs = vmcs;
vmx_vcpu_put(vcpu);
vmx_vcpu_load(vcpu, cpu);
vcpu->cpu = cpu;
@@ -9170,7 +9079,7 @@ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
r = vcpu_load(vcpu);
BUG_ON(r);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
free_nested(vmx);
vcpu_put(vcpu);
}
@@ -9231,11 +9140,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->loaded_vmcs->shadow_vmcs = NULL;
if (!vmx->loaded_vmcs->vmcs)
goto free_msrs;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
loaded_vmcs_init(vmx->loaded_vmcs);
- if (!vmm_exclusive)
- kvm_cpu_vmxoff();
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
@@ -9477,13 +9382,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exit_reason;
+ unsigned long exit_qualification = vcpu->arch.exit_qualification;
- if (fault->error_code & PFERR_RSVD_MASK)
+ if (vmx->nested.pml_full) {
+ exit_reason = EXIT_REASON_PML_FULL;
+ vmx->nested.pml_full = false;
+ exit_qualification &= INTR_INFO_UNBLOCK_NMI;
+ } else if (fault->error_code & PFERR_RSVD_MASK)
exit_reason = EXIT_REASON_EPT_MISCONFIG;
else
exit_reason = EXIT_REASON_EPT_VIOLATION;
- nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
+
+ nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
vmcs12->guest_physical_address = fault->address;
}
@@ -9495,17 +9407,26 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
return get_vmcs12(vcpu)->ept_pointer;
}
-static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
+ u64 eptp;
+
WARN_ON(mmu_is_nested(vcpu));
+ eptp = nested_ept_get_cr3(vcpu);
+ if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits)
+ return 1;
+
+ kvm_mmu_unload(vcpu);
kvm_init_shadow_ept_mmu(vcpu,
to_vmx(vcpu)->nested.nested_vmx_ept_caps &
- VMX_EPT_EXECUTE_ONLY_BIT);
+ VMX_EPT_EXECUTE_ONLY_BIT,
+ eptp & VMX_EPT_AD_ENABLE_BIT);
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
+ return 0;
}
static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -9817,6 +9738,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
return 0;
}
+static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ u64 address = vmcs12->pml_address;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+ if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
+ if (!nested_cpu_has_ept(vmcs12) ||
+ !IS_ALIGNED(address, 4096) ||
+ address >> maxphyaddr)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
struct vmx_msr_entry *e)
{
@@ -9990,7 +9927,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
bool from_vmentry, u32 *entry_failure_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 exec_control;
+ u32 exec_control, vmcs12_exec_ctrl;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10121,8 +10058,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT);
if (nested_cpu_has(vmcs12,
- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
- exec_control |= vmcs12->secondary_vm_exec_control;
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
+ vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
+ ~SECONDARY_EXEC_ENABLE_PML;
+ exec_control |= vmcs12_exec_ctrl;
+ }
if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
vmcs_write64(EOI_EXIT_BITMAP0,
@@ -10279,8 +10219,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
if (nested_cpu_has_ept(vmcs12)) {
- kvm_mmu_unload(vcpu);
- nested_ept_init_mmu_context(vcpu);
+ if (nested_ept_init_mmu_context(vcpu)) {
+ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+ return 1;
+ }
} else if (nested_cpu_has2(vmcs12,
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
vmx_flush_tlb_ept_only(vcpu);
@@ -10350,12 +10292,16 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_vmx_check_pml_controls(vcpu, vmcs12))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
vmx->nested.nested_vmx_procbased_ctls_low,
vmx->nested.nested_vmx_procbased_ctls_high) ||
- !vmx_control_verify(vmcs12->secondary_vm_exec_control,
- vmx->nested.nested_vmx_secondary_ctls_low,
- vmx->nested.nested_vmx_secondary_ctls_high) ||
+ (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
+ !vmx_control_verify(vmcs12->secondary_vm_exec_control,
+ vmx->nested.nested_vmx_secondary_ctls_low,
+ vmx->nested.nested_vmx_secondary_ctls_high)) ||
!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
vmx->nested.nested_vmx_pinbased_ctls_low,
vmx->nested.nested_vmx_pinbased_ctls_high) ||
@@ -10367,6 +10313,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.nested_vmx_entry_ctls_high))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
!nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
!nested_cr3_valid(vcpu, vmcs12->host_cr3))
@@ -10434,7 +10383,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct loaded_vmcs *vmcs02;
- int cpu;
u32 msr_entry_idx;
u32 exit_qual;
@@ -10447,18 +10395,12 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
- cpu = get_cpu();
- vmx->loaded_vmcs = vmcs02;
- vmx_vcpu_put(vcpu);
- vmx_vcpu_load(vcpu, cpu);
- vcpu->cpu = cpu;
- put_cpu();
-
+ vmx_switch_vmcs(vcpu, vmcs02);
vmx_segment_cache_clear(vmx);
if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
leave_guest_mode(vcpu);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, exit_qual);
return 1;
@@ -10471,7 +10413,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
vmcs12->vm_entry_msr_load_count);
if (msr_entry_idx) {
leave_guest_mode(vcpu);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
return 1;
@@ -11039,7 +10981,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
if (unlikely(vmx->fail))
vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
&& nested_exit_intr_ack_set(vcpu)) {
@@ -11251,6 +11193,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
kvm_flush_pml_buffers(kvm);
}
+static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ gpa_t gpa;
+ struct page *page = NULL;
+ u64 *pml_address;
+
+ if (is_guest_mode(vcpu)) {
+ WARN_ON_ONCE(vmx->nested.pml_full);
+
+ /*
+ * Check if PML is enabled for the nested guest.
+ * Whether eptp bit 6 is set is already checked
+ * as part of A/D emulation.
+ */
+ vmcs12 = get_vmcs12(vcpu);
+ if (!nested_cpu_has_pml(vmcs12))
+ return 0;
+
+ if (vmcs12->guest_pml_index > PML_ENTITY_NUM) {
+ vmx->nested.pml_full = true;
+ return 1;
+ }
+
+ gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
+
+ page = nested_get_page(vcpu, vmcs12->pml_address);
+ if (!page)
+ return 0;
+
+ pml_address = kmap(page);
+ pml_address[vmcs12->guest_pml_index--] = gpa;
+ kunmap(page);
+ nested_release_page_clean(page);
+ }
+
+ return 0;
+}
+
static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *memslot,
gfn_t offset, unsigned long mask)
@@ -11610,6 +11592,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
.flush_log_dirty = vmx_flush_log_dirty,
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+ .write_log_dirty = vmx_write_pml_buffer,
.pre_block = vmx_pre_block,
.post_block = vmx_post_block,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ccbd45ecd41a..464da936c53d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,7 +27,6 @@
#include "kvm_cache_regs.h"
#include "x86.h"
#include "cpuid.h"
-#include "assigned-dev.h"
#include "pmu.h"
#include "hyperv.h"
@@ -1008,6 +1007,8 @@ static u32 emulated_msrs[] = {
MSR_IA32_MCG_CTL,
MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
+ MSR_PLATFORM_INFO,
+ MSR_MISC_FEATURES_ENABLES,
};
static unsigned num_emulated_msrs;
@@ -1444,10 +1445,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
- s64 usdiff;
bool matched;
bool already_matched;
u64 data = msr->data;
+ bool synchronizing = false;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
@@ -1455,51 +1456,34 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
- int faulted = 0;
-
- /* n.b - signed multiplication and division required */
- usdiff = data - kvm->arch.last_tsc_write;
-#ifdef CONFIG_X86_64
- usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
-#else
- /* do_div() only does unsigned */
- asm("1: idivl %[divisor]\n"
- "2: xor %%edx, %%edx\n"
- " movl $0, %[faulted]\n"
- "3:\n"
- ".section .fixup,\"ax\"\n"
- "4: movl $1, %[faulted]\n"
- " jmp 3b\n"
- ".previous\n"
-
- _ASM_EXTABLE(1b, 4b)
-
- : "=A"(usdiff), [faulted] "=r" (faulted)
- : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
-
-#endif
- do_div(elapsed, 1000);
- usdiff -= elapsed;
- if (usdiff < 0)
- usdiff = -usdiff;
-
- /* idivl overflow => difference is larger than USEC_PER_SEC */
- if (faulted)
- usdiff = USEC_PER_SEC;
- } else
- usdiff = USEC_PER_SEC; /* disable TSC match window below */
+ if (data == 0 && msr->host_initiated) {
+ /*
+ * detection of vcpu initialization -- need to sync
+ * with other vCPUs. This particularly helps to keep
+ * kvm_clock stable after CPU hotplug
+ */
+ synchronizing = true;
+ } else {
+ u64 tsc_exp = kvm->arch.last_tsc_write +
+ nsec_to_cycles(vcpu, elapsed);
+ u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
+ /*
+ * Special case: TSC write with a small delta (1 second)
+ * of virtual cycle time against real time is
+ * interpreted as an attempt to synchronize the CPU.
+ */
+ synchronizing = data < tsc_exp + tsc_hz &&
+ data + tsc_hz > tsc_exp;
+ }
+ }
/*
- * Special case: TSC write with a small delta (1 second) of virtual
- * cycle time against real time is interpreted as an attempt to
- * synchronize the CPU.
- *
* For a reliable TSC, we can match TSC offsets, and for an unstable
* TSC, we add elapsed time in this computation. We could let the
* compensation code attempt to catch up if we fall behind, but
* it's better to try to match offsets from the beginning.
*/
- if (usdiff < USEC_PER_SEC &&
+ if (synchronizing &&
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
@@ -1769,13 +1753,13 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
/* guest entries allowed */
kvm_for_each_vcpu(i, vcpu, kvm)
- clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
spin_unlock(&ka->pvclock_gtod_sync_lock);
#endif
}
-static u64 __get_kvmclock_ns(struct kvm *kvm)
+u64 get_kvmclock_ns(struct kvm *kvm)
{
struct kvm_arch *ka = &kvm->arch;
struct pvclock_vcpu_time_info hv_clock;
@@ -1796,24 +1780,12 @@ static u64 __get_kvmclock_ns(struct kvm *kvm)
return __pvclock_read_cycles(&hv_clock, rdtsc());
}
-u64 get_kvmclock_ns(struct kvm *kvm)
-{
- unsigned long flags;
- s64 ns;
-
- local_irq_save(flags);
- ns = __get_kvmclock_ns(kvm);
- local_irq_restore(flags);
-
- return ns;
-}
-
static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
{
struct kvm_vcpu_arch *vcpu = &v->arch;
struct pvclock_vcpu_time_info guest_hv_clock;
- if (unlikely(kvm_vcpu_read_guest_cached(v, &vcpu->pv_time,
+ if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
&guest_hv_clock, sizeof(guest_hv_clock))))
return;
@@ -1834,9 +1806,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
vcpu->hv_clock.version = guest_hv_clock.version + 1;
- kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock.version));
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock.version));
smp_wmb();
@@ -1850,16 +1822,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
- kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock));
smp_wmb();
vcpu->hv_clock.version++;
- kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock.version));
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock.version));
}
static int kvm_guest_time_update(struct kvm_vcpu *v)
@@ -2092,7 +2064,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
- if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apf.data, gpa,
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
sizeof(u32)))
return 1;
@@ -2111,7 +2083,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
- if (unlikely(kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.st.stime,
+ if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
return;
@@ -2122,7 +2094,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
vcpu->arch.st.steal.version += 1;
- kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
smp_wmb();
@@ -2131,14 +2103,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
vcpu->arch.st.last_steal;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
- kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
smp_wmb();
vcpu->arch.st.steal.version += 1;
- kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
+ kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
}
@@ -2155,6 +2127,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_VM_HSAVE_PA:
case MSR_AMD64_PATCH_LOADER:
case MSR_AMD64_BU_CFG2:
+ case MSR_AMD64_DC_CFG:
break;
case MSR_EFER:
@@ -2230,8 +2203,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
if (ka->boot_vcpu_runs_old_kvmclock != tmp)
- set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
- &vcpu->requests);
+ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
ka->boot_vcpu_runs_old_kvmclock = tmp;
}
@@ -2243,7 +2215,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!(data & 1))
break;
- if (kvm_vcpu_gfn_to_hva_cache_init(vcpu,
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.pv_time, data & ~1ULL,
sizeof(struct pvclock_vcpu_time_info)))
vcpu->arch.pv_time_enabled = false;
@@ -2264,7 +2236,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & KVM_STEAL_RESERVED_MASK)
return 1;
- if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.st.stime,
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
data & KVM_STEAL_VALID_BITS,
sizeof(struct kvm_steal_time)))
return 1;
@@ -2331,6 +2303,21 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.osvw.status = data;
break;
+ case MSR_PLATFORM_INFO:
+ if (!msr_info->host_initiated ||
+ data & ~MSR_PLATFORM_INFO_CPUID_FAULT ||
+ (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
+ cpuid_fault_enabled(vcpu)))
+ return 1;
+ vcpu->arch.msr_platform_info = data;
+ break;
+ case MSR_MISC_FEATURES_ENABLES:
+ if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
+ (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
+ !supports_cpuid_fault(vcpu)))
+ return 1;
+ vcpu->arch.msr_misc_features_enables = data;
+ break;
default:
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
@@ -2417,6 +2404,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_FAM10H_MMIO_CONF_BASE:
case MSR_AMD64_BU_CFG2:
case MSR_IA32_PERF_CTL:
+ case MSR_AMD64_DC_CFG:
msr_info->data = 0;
break;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -2545,6 +2533,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.osvw.status;
break;
+ case MSR_PLATFORM_INFO:
+ msr_info->data = vcpu->arch.msr_platform_info;
+ break;
+ case MSR_MISC_FEATURES_ENABLES:
+ msr_info->data = vcpu->arch.msr_misc_features_enables;
+ break;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -2675,15 +2669,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
- case KVM_CAP_ASSIGN_DEV_IRQ:
- case KVM_CAP_PCI_2_3:
-#endif
r = 1;
break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
break;
+ case KVM_CAP_X86_GUEST_MWAIT:
+ r = kvm_mwait_in_guest();
+ break;
case KVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
* and SMM handlers might indeed rely on 4G segment limits,
@@ -2695,9 +2688,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
break;
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
break;
@@ -2713,11 +2703,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
- case KVM_CAP_IOMMU:
- r = iommu_present(&pci_bus_type);
- break;
-#endif
case KVM_CAP_MCE:
r = KVM_MAX_MCE_BANKS;
break;
@@ -2816,11 +2801,6 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
return kvm_arch_has_noncoherent_dma(vcpu->kvm);
}
-static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
- set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Address WBINVD may be executed by guest */
@@ -2864,7 +2844,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != cpu)
- kvm_migrate_timers(vcpu);
+ kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
vcpu->cpu = cpu;
}
@@ -2878,7 +2858,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
vcpu->arch.st.steal.preempted = 1;
- kvm_vcpu_write_guest_offset_cached(vcpu, &vcpu->arch.st.stime,
+ kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
&vcpu->arch.st.steal.preempted,
offsetof(struct kvm_steal_time, preempted),
sizeof(vcpu->arch.st.steal.preempted));
@@ -3124,7 +3104,14 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return -EINVAL;
if (events->exception.injected &&
- (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
+ (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
+ is_guest_mode(vcpu)))
+ return -EINVAL;
+
+ /* INITs are latched while in SMM */
+ if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
+ (events->smi.smm || events->smi.pending) &&
+ vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
return -EINVAL;
process_nmi(vcpu);
@@ -3721,22 +3708,21 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
{
+ struct kvm_pic *pic = kvm->arch.vpic;
int r;
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- memcpy(&chip->chip.pic,
- &pic_irqchip(kvm)->pics[0],
+ memcpy(&chip->chip.pic, &pic->pics[0],
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_PIC_SLAVE:
- memcpy(&chip->chip.pic,
- &pic_irqchip(kvm)->pics[1],
+ memcpy(&chip->chip.pic, &pic->pics[1],
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_IOAPIC:
- r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
+ kvm_get_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
@@ -3747,32 +3733,31 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
{
+ struct kvm_pic *pic = kvm->arch.vpic;
int r;
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- spin_lock(&pic_irqchip(kvm)->lock);
- memcpy(&pic_irqchip(kvm)->pics[0],
- &chip->chip.pic,
+ spin_lock(&pic->lock);
+ memcpy(&pic->pics[0], &chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic->lock);
break;
case KVM_IRQCHIP_PIC_SLAVE:
- spin_lock(&pic_irqchip(kvm)->lock);
- memcpy(&pic_irqchip(kvm)->pics[1],
- &chip->chip.pic,
+ spin_lock(&pic->lock);
+ memcpy(&pic->pics[1], &chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic->lock);
break;
case KVM_IRQCHIP_IOAPIC:
- r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
+ kvm_set_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
break;
}
- kvm_pic_update_irq(pic_irqchip(kvm));
+ kvm_pic_update_irq(pic);
return r;
}
@@ -4018,20 +4003,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_ioapic_init(kvm);
if (r) {
- mutex_lock(&kvm->slots_lock);
kvm_pic_destroy(kvm);
- mutex_unlock(&kvm->slots_lock);
goto create_irqchip_unlock;
}
r = kvm_setup_default_irq_routing(kvm);
if (r) {
- mutex_lock(&kvm->slots_lock);
- mutex_lock(&kvm->irq_lock);
kvm_ioapic_destroy(kvm);
kvm_pic_destroy(kvm);
- mutex_unlock(&kvm->irq_lock);
- mutex_unlock(&kvm->slots_lock);
goto create_irqchip_unlock;
}
/* Write kvm->irq_routing before enabling irqchip_in_kernel. */
@@ -4196,10 +4175,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
r = 0;
- local_irq_disable();
- now_ns = __get_kvmclock_ns(kvm);
+ now_ns = get_kvmclock_ns(kvm);
kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
- local_irq_enable();
kvm_gen_update_masterclock(kvm);
break;
}
@@ -4207,11 +4184,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
struct kvm_clock_data user_ns;
u64 now_ns;
- local_irq_disable();
- now_ns = __get_kvmclock_ns(kvm);
+ now_ns = get_kvmclock_ns(kvm);
user_ns.clock = now_ns;
user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
- local_irq_enable();
memset(&user_ns.pad, 0, sizeof(user_ns.pad));
r = -EFAULT;
@@ -4230,7 +4205,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
default:
- r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
+ r = -ENOTTY;
}
out:
return r;
@@ -5223,6 +5198,16 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
}
+static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
+{
+ return emul_to_vcpu(ctxt)->arch.hflags;
+}
+
+static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
+{
+ kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+}
+
static const struct x86_emulate_ops emulate_ops = {
.read_gpr = emulator_read_gpr,
.write_gpr = emulator_write_gpr,
@@ -5262,6 +5247,8 @@ static const struct x86_emulate_ops emulate_ops = {
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
.set_nmi_mask = emulator_set_nmi_mask,
+ .get_hflags = emulator_get_hflags,
+ .set_hflags = emulator_set_hflags,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -5314,7 +5301,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
- ctxt->emul_flags = vcpu->arch.hflags;
init_decode_cache(ctxt);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
@@ -5718,8 +5704,6 @@ restart:
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
- if (vcpu->arch.hflags != ctxt->emul_flags)
- kvm_set_hflags(vcpu, ctxt->emul_flags);
kvm_rip_write(vcpu, ctxt->eip);
if (r == EMULATE_DONE)
kvm_vcpu_check_singlestep(vcpu, rflags, &r);
@@ -6869,7 +6853,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/*
* 1) We should set ->mode before checking ->requests. Please see
- * the comment in kvm_make_all_cpus_request.
+ * the comment in kvm_vcpu_exiting_guest_mode().
*
* 2) For APICv, we should set ->mode before checking PIR.ON. This
* pairs with the memory barrier implicit in pi_test_and_set_on
@@ -7051,7 +7035,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (r <= 0)
break;
- clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
@@ -7179,7 +7163,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
kvm_apic_accept_events(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
r = -EAGAIN;
goto out;
}
@@ -7355,6 +7339,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
return -EINVAL;
+ /* INITs are latched while in SMM */
+ if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+ (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
+ mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
+ return -EINVAL;
+
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
@@ -7724,6 +7714,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (!init_event) {
kvm_pmu_reset(vcpu);
vcpu->arch.smbase = 0x30000;
+
+ vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
+ vcpu->arch.msr_misc_features_enables = 0;
}
memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
@@ -8068,7 +8061,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
{
cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
- kvm_free_all_assigned_devices(kvm);
kvm_free_pit(kvm);
}
@@ -8152,7 +8144,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
}
if (kvm_x86_ops->vm_destroy)
kvm_x86_ops->vm_destroy(kvm);
- kvm_iommu_unmap_guest(kvm);
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm);
@@ -8199,13 +8190,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
slot->base_gfn, level) + 1;
slot->arch.rmap[i] =
- kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
+ kvzalloc(lpages * sizeof(*slot->arch.rmap[i]), GFP_KERNEL);
if (!slot->arch.rmap[i])
goto out_free;
if (i == 0)
continue;
- linfo = kvm_kvzalloc(lpages * sizeof(*linfo));
+ linfo = kvzalloc(lpages * sizeof(*linfo), GFP_KERNEL);
if (!linfo)
goto out_free;
@@ -8385,7 +8376,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (atomic_read(&vcpu->arch.nmi_queued))
return true;
- if (test_bit(KVM_REQ_SMI, &vcpu->requests))
+ if (kvm_test_request(KVM_REQ_SMI, vcpu))
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -8536,8 +8527,9 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
{
- return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apf.data, &val,
- sizeof(val));
+
+ return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
+ sizeof(val));
}
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e8ff3e4ce38a..612067074905 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -1,6 +1,8 @@
#ifndef ARCH_X86_KVM_X86_H
#define ARCH_X86_KVM_X86_H
+#include <asm/processor.h>
+#include <asm/mwait.h>
#include <linux/kvm_host.h>
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
@@ -212,4 +214,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
__rem; \
})
+static inline bool kvm_mwait_in_guest(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
+ return false;
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ /* All AMD CPUs have a working MWAIT implementation */
+ return true;
+ case X86_VENDOR_INTEL:
+ /* Handle Intel below */
+ break;
+ default:
+ return false;
+ }
+
+ /*
+ * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
+ * they would allow guest to stop the CPU completely by disabling
+ * interrupts then invoking MWAIT.
+ */
+ if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+ return false;
+
+ cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+ return false;
+
+ return true;
+}
+
#endif
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 138bad2fb6bc..cbc87ea98751 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -5,7 +5,7 @@
#include <linux/memblock.h>
#include <linux/bootmem.h> /* for max_low_pfn */
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/e820/api.h>
#include <asm/init.h>
#include <asm/page.h>
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f34d275ee201..99fb83819a5f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -48,7 +48,7 @@
#include <asm/sections.h>
#include <asm/paravirt.h>
#include <asm/setup.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/page_types.h>
#include <asm/init.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 745e5e183169..41270b96403d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -50,7 +50,7 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/numa.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/init.h>
#include <asm/uv/uv.h>
#include <asm/setup.h>
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index e4f7b25df18e..bbc558b88a88 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -14,7 +14,7 @@
#include <linux/vmalloc.h>
#include <linux/mmiotrace.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/e820/api.h>
#include <asm/fixmap.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 56b22fa504df..1dcd2be4cce4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -24,6 +24,7 @@
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pat.h>
+#include <asm/set_memory.h>
/*
* The current flushing context - we pass it instead of 5 arguments:
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index 38868adf07ea..f6ae6830b341 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -9,7 +9,7 @@
#include <linux/mmiotrace.h>
static unsigned long mmio_address;
-module_param(mmio_address, ulong, 0);
+module_param_hw(mmio_address, ulong, iomem, 0);
MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB "
"(or 8 MB if read_far is non-zero).");
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 32322ce9b405..f58939393eef 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -12,6 +12,7 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <linux/bpf.h>
int bpf_jit_enable __read_mostly;
@@ -490,13 +491,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
case BPF_LD | BPF_IMM | BPF_DW:
- if (insn[1].code != 0 || insn[1].src_reg != 0 ||
- insn[1].dst_reg != 0 || insn[1].off != 0) {
- /* verifier must catch invalid insns */
- pr_err("invalid BPF_LD_IMM64 insn\n");
- return -EINVAL;
- }
-
/* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
* to save 7 bytes.
*/
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 6fa84d531f4f..7b4307163eac 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -406,50 +406,3 @@ void __init pcibios_resource_survey(void)
*/
ioapic_insert_resources();
}
-
-static const struct vm_operations_struct pci_mmap_ops = {
- .access = generic_access_phys,
-};
-
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine)
-{
- unsigned long prot;
-
- /* I/O space cannot be accessed via normal processor loads and
- * stores on this platform.
- */
- if (mmap_state == pci_mmap_io)
- return -EINVAL;
-
- prot = pgprot_val(vma->vm_page_prot);
-
- /*
- * Return error if pat is not enabled and write_combine is requested.
- * Caller can followup with UC MINUS request and add a WC mtrr if there
- * is a free mtrr slot.
- */
- if (!pat_enabled() && write_combine)
- return -EINVAL;
-
- if (pat_enabled() && write_combine)
- prot |= cachemode2protval(_PAGE_CACHE_MODE_WC);
- else if (pat_enabled() || boot_cpu_data.x86 > 3)
- /*
- * ioremap() and ioremap_nocache() defaults to UC MINUS for now.
- * To avoid attribute conflicts, request UC MINUS here
- * as well.
- */
- prot |= cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS);
-
- vma->vm_page_prot = __pgprot(prot);
-
- if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot))
- return -EAGAIN;
-
- vma->vm_ops = &pci_mmap_ops;
-
- return 0;
-}
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 29e9ba6ace9d..c1bdb9edcae7 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -11,7 +11,7 @@
#include <asm/pci_x86.h>
#include <asm/e820/types.h>
#include <asm/pci-functions.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
/* BIOS32 signature: "_32_" */
#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index a15cf815ac4e..7e76a4d8304b 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -49,7 +49,7 @@
#include <asm/efi.h>
#include <asm/e820/api.h>
#include <asm/time.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
#include <asm/uv/uv.h>
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
index edf2c54bf131..a952ac199741 100644
--- a/arch/x86/platform/intel/iosf_mbi.c
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -34,6 +34,8 @@
static struct pci_dev *mbi_pdev;
static DEFINE_SPINLOCK(iosf_mbi_lock);
+static DEFINE_MUTEX(iosf_mbi_punit_mutex);
+static BLOCKING_NOTIFIER_HEAD(iosf_mbi_pmic_bus_access_notifier);
static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset)
{
@@ -190,6 +192,53 @@ bool iosf_mbi_available(void)
}
EXPORT_SYMBOL(iosf_mbi_available);
+void iosf_mbi_punit_acquire(void)
+{
+ mutex_lock(&iosf_mbi_punit_mutex);
+}
+EXPORT_SYMBOL(iosf_mbi_punit_acquire);
+
+void iosf_mbi_punit_release(void)
+{
+ mutex_unlock(&iosf_mbi_punit_mutex);
+}
+EXPORT_SYMBOL(iosf_mbi_punit_release);
+
+int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb)
+{
+ int ret;
+
+ /* Wait for the bus to go inactive before registering */
+ mutex_lock(&iosf_mbi_punit_mutex);
+ ret = blocking_notifier_chain_register(
+ &iosf_mbi_pmic_bus_access_notifier, nb);
+ mutex_unlock(&iosf_mbi_punit_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_register_pmic_bus_access_notifier);
+
+int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb)
+{
+ int ret;
+
+ /* Wait for the bus to go inactive before unregistering */
+ mutex_lock(&iosf_mbi_punit_mutex);
+ ret = blocking_notifier_chain_unregister(
+ &iosf_mbi_pmic_bus_access_notifier, nb);
+ mutex_unlock(&iosf_mbi_punit_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier);
+
+int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v)
+{
+ return blocking_notifier_call_chain(
+ &iosf_mbi_pmic_bus_access_notifier, val, v);
+}
+EXPORT_SYMBOL(iosf_mbi_call_pmic_bus_access_notifier_chain);
+
#ifdef CONFIG_IOSF_MBI_DEBUG
static u32 dbg_mdr;
static u32 dbg_mcr;
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 5db706f14111..a163a90af4aa 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -2,7 +2,7 @@
#include <linux/slab.h>
#include <linux/memblock.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/pgtable.h>
#include <asm/realmode.h>
#include <asm/tlbflush.h>