summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/api.rst10
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h65
-rw-r--r--arch/x86/kvm/cpuid.c971
-rw-r--r--arch/x86/kvm/cpuid.h128
-rw-r--r--arch/x86/kvm/governed_features.h22
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/kvm_emulate.h2
-rw-r--r--arch/x86/kvm/lapic.c6
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/mmu/mmu.c4
-rw-r--r--arch/x86/kvm/pmu.c1
-rw-r--r--arch/x86/kvm/reverse_cpuid.h23
-rw-r--r--arch/x86/kvm/smm.c10
-rw-r--r--arch/x86/kvm/svm/nested.c22
-rw-r--r--arch/x86/kvm/svm/pmu.c8
-rw-r--r--arch/x86/kvm/svm/sev.c21
-rw-r--r--arch/x86/kvm/svm/svm.c70
-rw-r--r--arch/x86/kvm/svm/svm.h4
-rw-r--r--arch/x86/kvm/trace.h17
-rw-r--r--arch/x86/kvm/vmx/hyperv.h2
-rw-r--r--arch/x86/kvm/vmx/main.c1
-rw-r--r--arch/x86/kvm/vmx/nested.c18
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c4
-rw-r--r--arch/x86/kvm/vmx/sgx.c14
-rw-r--r--arch/x86/kvm/vmx/vmx.c100
-rw-r--r--arch/x86/kvm/vmx/x86_ops.h3
-rw-r--r--arch/x86/kvm/x86.c188
-rw-r--r--arch/x86/kvm/x86.h6
-rw-r--r--include/uapi/linux/kvm.h4
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h25
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c53
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_pv_test.c38
-rw-r--r--tools/testing/selftests/kvm/x86/set_sregs_test.c63
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c2
35 files changed, 1189 insertions, 721 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index f15b61317aad..2ba70c1fad51 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1825,15 +1825,18 @@ emulate them efficiently. The fields in each entry are defined as follows:
the values returned by the cpuid instruction for
this function/index combination
-The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
-as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
-support. Instead it is reported via::
+x2APIC (CPUID leaf 1, ecx[21) and TSC deadline timer (CPUID leaf 1, ecx[24])
+may be returned as true, but they depend on KVM_CREATE_IRQCHIP for in-kernel
+emulation of the local APIC. TSC deadline timer support is also reported via::
ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
+Enabling x2APIC in KVM_SET_CPUID2 requires KVM_CREATE_IRQCHIP as KVM doesn't
+support forwarding x2APIC MSR accesses to userspace, i.e. KVM does not support
+emulating x2APIC in userspace.
4.47 KVM_PPC_GET_PVINFO
-----------------------
@@ -7673,6 +7676,7 @@ branch to guests' 0x200 interrupt vector.
:Architectures: x86
:Parameters: args[0] defines which exits are disabled
:Returns: 0 on success, -EINVAL when args[0] contains invalid exits
+ or if any vCPUs have already been created
Valid bits in args[0] are::
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 7342af00e319..7b4536ff3834 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -99,6 +99,7 @@ KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
+KVM_X86_OP(get_entry_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7bee06c14742..5193c3dfbce1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -739,6 +739,23 @@ struct kvm_queued_exception {
bool has_payload;
};
+/*
+ * Hardware-defined CPUID leafs that are either scattered by the kernel or are
+ * unknown to the kernel, but need to be directly used by KVM. Note, these
+ * word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
+ */
+enum kvm_only_cpuid_leafs {
+ CPUID_12_EAX = NCAPINTS,
+ CPUID_7_1_EDX,
+ CPUID_8000_0007_EDX,
+ CPUID_8000_0022_EAX,
+ CPUID_7_2_EDX,
+ CPUID_24_0_EBX,
+ NR_KVM_CPU_CAPS,
+
+ NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
+};
+
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -854,27 +871,23 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
- struct kvm_hypervisor_cpuid kvm_cpuid;
bool is_amd_compatible;
/*
- * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
- * when "struct kvm_vcpu_arch" is no longer defined in an
- * arch/x86/include/asm header. The max is mostly arbitrary, i.e.
- * can be increased as necessary.
- */
-#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
-
- /*
- * Track whether or not the guest is allowed to use features that are
- * governed by KVM, where "governed" means KVM needs to manage state
- * and/or explicitly enable the feature in hardware. Typically, but
- * not always, governed features can be used by the guest if and only
- * if both KVM and userspace want to expose the feature to the guest.
+ * cpu_caps holds the effective guest capabilities, i.e. the features
+ * the vCPU is allowed to use. Typically, but not always, features can
+ * be used by the guest if and only if both KVM and userspace want to
+ * expose the feature to the guest.
+ *
+ * A common exception is for virtualization holes, i.e. when KVM can't
+ * prevent the guest from using a feature, in which case the vCPU "has"
+ * the feature regardless of what KVM or userspace desires.
+ *
+ * Note, features that don't require KVM involvement in any way are
+ * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the
+ * guest CPUID provided by userspace.
*/
- struct {
- DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
- } governed_features;
+ u32 cpu_caps[NR_KVM_CPU_CAPS];
u64 reserved_gpa_bits;
int maxphyaddr;
@@ -1756,12 +1769,15 @@ struct kvm_x86_ops {
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
- * Retrieve somewhat arbitrary exit information. Intended to
+ * Retrieve somewhat arbitrary exit/entry information. Intended to
* be used only from within tracepoints or error paths.
*/
void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2,
- u32 *exit_int_info, u32 *exit_int_info_err_code);
+ u32 *intr_info, u32 *error_code);
+
+ void (*get_entry_info)(struct kvm_vcpu *vcpu,
+ u32 *intr_info, u32 *error_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
@@ -2018,8 +2034,8 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
* VMware backdoor emulation handles select instructions
* and reinjects the #GP for all other cases.
*
- * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
- * case the CR2/GPA value pass on the stack is valid.
+ * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case
+ * the CR2/GPA value pass on the stack is valid.
*
* EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
* state and inject single-step #DBs after skipping
@@ -2054,6 +2070,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
+static inline bool kvm_can_emulate_event_vectoring(int emul_type)
+{
+ return !(emul_type & EMULTYPE_PF);
+}
+
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
@@ -2061,6 +2082,8 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
u64 *data, u8 ndata);
void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
+void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
+
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ae0b438a2c99..edef30359c19 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -82,15 +82,6 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
return ret;
}
-#define F feature_bit
-
-/* Scattered Flag - For features that are scattered by cpufeatures.h. */
-#define SF(name) \
-({ \
- BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \
- (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0); \
-})
-
/*
* Magic value used by KVM when querying userspace-provided CPUID entries and
* doesn't care about the CPIUD index because the index of the function in
@@ -100,8 +91,8 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
*/
#define KVM_CPUID_INDEX_NOT_SIGNIFICANT -1ull
-static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
- struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index)
+static struct kvm_cpuid_entry2 *cpuid_entry2_find(struct kvm_vcpu *vcpu,
+ u32 function, u64 index)
{
struct kvm_cpuid_entry2 *e;
int i;
@@ -118,8 +109,8 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
*/
lockdep_assert_irqs_enabled();
- for (i = 0; i < nent; i++) {
- e = &entries[i];
+ for (i = 0; i < vcpu->arch.cpuid_nent; i++) {
+ e = &vcpu->arch.cpuid_entries[i];
if (e->function != function)
continue;
@@ -151,9 +142,27 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
return NULL;
}
-static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
- struct kvm_cpuid_entry2 *entries,
- int nent)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
+ u32 function, u32 index)
+{
+ return cpuid_entry2_find(vcpu, function, index);
+}
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index);
+
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function)
+{
+ return cpuid_entry2_find(vcpu, function, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+}
+EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
+
+/*
+ * cpuid_entry2_find() and KVM_CPUID_INDEX_NOT_SIGNIFICANT should never be used
+ * directly outside of kvm_find_cpuid_entry() and kvm_find_cpuid_entry_index().
+ */
+#undef KVM_CPUID_INDEX_NOT_SIGNIFICANT
+
+static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
u64 xfeatures;
@@ -162,8 +171,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
* The existing code assumes virtual address is 48-bit or 57-bit in the
* canonical address checks; exit if it is ever changed.
*/
- best = cpuid_entry2_find(entries, nent, 0x80000008,
- KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008);
if (best) {
int vaddr_bits = (best->eax & 0xff00) >> 8;
@@ -175,7 +183,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
* Exposing dynamic xfeatures to the guest requires additional
* enabling in the FPU, e.g. to expand the guest XSAVE state size.
*/
- best = cpuid_entry2_find(entries, nent, 0xd, 0);
+ best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0);
if (!best)
return 0;
@@ -187,6 +195,8 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu,
return fpu_enable_guest_xfd_features(&vcpu->arch.guest_fpu, xfeatures);
}
+static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu);
+
/* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */
static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
@@ -194,6 +204,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
struct kvm_cpuid_entry2 *orig;
int i;
+ /*
+ * Apply runtime CPUID updates to the incoming CPUID entries to avoid
+ * false positives due mismatches on KVM-owned feature flags.
+ *
+ * Note! @e2 and @nent track the _old_ CPUID entries!
+ */
+ kvm_update_cpuid_runtime(vcpu);
+ kvm_apply_cpuid_pv_features_quirk(vcpu);
+
if (nent != vcpu->arch.cpuid_nent)
return -EINVAL;
@@ -210,15 +229,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
return 0;
}
-static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries,
- int nent, const char *sig)
+static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
+ const char *sig)
{
struct kvm_hypervisor_cpuid cpuid = {};
struct kvm_cpuid_entry2 *entry;
u32 base;
for_each_possible_hypervisor_cpuid_base(base) {
- entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ entry = kvm_find_cpuid_entry(vcpu, base);
if (entry) {
u32 signature[3];
@@ -238,118 +257,90 @@ static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_e
return cpuid;
}
-static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
- const char *sig)
+static u32 kvm_apply_cpuid_pv_features_quirk(struct kvm_vcpu *vcpu)
{
- return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent, sig);
-}
-
-static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries,
- int nent, u32 kvm_cpuid_base)
-{
- return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES,
- KVM_CPUID_INDEX_NOT_SIGNIFICANT);
-}
-
-static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
-{
- u32 base = vcpu->arch.kvm_cpuid.base;
+ struct kvm_hypervisor_cpuid kvm_cpuid;
+ struct kvm_cpuid_entry2 *best;
- if (!base)
- return NULL;
+ kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE);
+ if (!kvm_cpuid.base)
+ return 0;
- return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent, base);
-}
+ best = kvm_find_cpuid_entry(vcpu, kvm_cpuid.base | KVM_CPUID_FEATURES);
+ if (!best)
+ return 0;
-void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu);
+ if (kvm_hlt_in_guest(vcpu->kvm))
+ best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
- /*
- * save the feature bitmap to avoid cpuid lookup for every PV
- * operation
- */
- if (best)
- vcpu->arch.pv_cpuid.features = best->eax;
+ return best->eax;
}
/*
* Calculate guest's supported XCR0 taking into account guest CPUID data and
* KVM's supported XCR0 (comprised of host's XCR0 and KVM_SUPPORTED_XCR0).
*/
-static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
+static u64 cpuid_get_supported_xcr0(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
- best = cpuid_entry2_find(entries, nent, 0xd, 0);
+ best = kvm_find_cpuid_entry_index(vcpu, 0xd, 0);
if (!best)
return 0;
return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
}
-static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
- int nent)
+static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid_entry2 *entry,
+ unsigned int x86_feature,
+ bool has_feature)
+{
+ cpuid_entry_change(entry, x86_feature, has_feature);
+ guest_cpu_cap_change(vcpu, x86_feature, has_feature);
+}
+
+void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
- struct kvm_hypervisor_cpuid kvm_cpuid;
- best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ best = kvm_find_cpuid_entry(vcpu, 1);
if (best) {
- /* Update OSXSAVE bit */
- if (boot_cpu_has(X86_FEATURE_XSAVE))
- cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
+ kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE,
kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
- cpuid_entry_change(best, X86_FEATURE_APIC,
- vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
+ kvm_update_feature_runtime(vcpu, best, X86_FEATURE_APIC,
+ vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
+
+ if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
+ kvm_update_feature_runtime(vcpu, best, X86_FEATURE_MWAIT,
+ vcpu->arch.ia32_misc_enable_msr &
+ MSR_IA32_MISC_ENABLE_MWAIT);
}
- best = cpuid_entry2_find(entries, nent, 7, 0);
- if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
- cpuid_entry_change(best, X86_FEATURE_OSPKE,
- kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
+ best = kvm_find_cpuid_entry_index(vcpu, 7, 0);
+ if (best)
+ kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSPKE,
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
+
- best = cpuid_entry2_find(entries, nent, 0xD, 0);
+ best = kvm_find_cpuid_entry_index(vcpu, 0xD, 0);
if (best)
best->ebx = xstate_required_size(vcpu->arch.xcr0, false);
- best = cpuid_entry2_find(entries, nent, 0xD, 1);
+ best = kvm_find_cpuid_entry_index(vcpu, 0xD, 1);
if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
-
- kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE);
- if (kvm_cpuid.base) {
- best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base);
- if (kvm_hlt_in_guest(vcpu->kvm) && best)
- best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
- }
-
- if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
- best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
- if (best)
- cpuid_entry_change(best, X86_FEATURE_MWAIT,
- vcpu->arch.ia32_misc_enable_msr &
- MSR_IA32_MISC_ENABLE_MWAIT);
- }
-}
-
-void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
-{
- __kvm_update_cpuid_runtime(vcpu, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
}
EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
-static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
+static bool kvm_cpuid_has_hyperv(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_HYPERV
struct kvm_cpuid_entry2 *entry;
- entry = cpuid_entry2_find(entries, nent, HYPERV_CPUID_INTERFACE,
- KVM_CPUID_INDEX_NOT_SIGNIFICANT);
+ entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE);
return entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX;
#else
return false;
@@ -368,15 +359,71 @@ static bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
is_guest_vendor_hygon(entry->ebx, entry->ecx, entry->edx);
}
-static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+/*
+ * This isn't truly "unsafe", but except for the cpu_caps initialization code,
+ * all register lookups should use __cpuid_entry_get_reg(), which provides
+ * compile-time validation of the input.
+ */
+static u32 cpuid_get_reg_unsafe(struct kvm_cpuid_entry2 *entry, u32 reg)
+{
+ switch (reg) {
+ case CPUID_EAX:
+ return entry->eax;
+ case CPUID_EBX:
+ return entry->ebx;
+ case CPUID_ECX:
+ return entry->ecx;
+ case CPUID_EDX:
+ return entry->edx;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+
+static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func,
+ bool include_partially_emulated);
+
+void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
+ struct kvm_cpuid_entry2 *entry;
bool allow_gbpages;
+ int i;
+
+ memset(vcpu->arch.cpu_caps, 0, sizeof(vcpu->arch.cpu_caps));
+ BUILD_BUG_ON(ARRAY_SIZE(reverse_cpuid) != NR_KVM_CPU_CAPS);
- BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
- bitmap_zero(vcpu->arch.governed_features.enabled,
- KVM_MAX_NR_GOVERNED_FEATURES);
+ /*
+ * Reset guest capabilities to userspace's guest CPUID definition, i.e.
+ * honor userspace's definition for features that don't require KVM or
+ * hardware management/support (or that KVM simply doesn't care about).
+ */
+ for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
+ const struct cpuid_reg cpuid = reverse_cpuid[i];
+ struct kvm_cpuid_entry2 emulated;
+
+ if (!cpuid.function)
+ continue;
+
+ entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index);
+ if (!entry)
+ continue;
+
+ cpuid_func_emulated(&emulated, cpuid.function, true);
+
+ /*
+ * A vCPU has a feature if it's supported by KVM and is enabled
+ * in guest CPUID. Note, this includes features that are
+ * supported by KVM but aren't advertised to userspace!
+ */
+ vcpu->arch.cpu_caps[i] = kvm_cpu_caps[i] |
+ cpuid_get_reg_unsafe(&emulated, cpuid.reg);
+ vcpu->arch.cpu_caps[i] &= cpuid_get_reg_unsafe(entry, cpuid.reg);
+ }
+
+ kvm_update_cpuid_runtime(vcpu);
/*
* If TDP is enabled, let the guest use GBPAGES if they're supported in
@@ -390,9 +437,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* and can install smaller shadow pages if the host lacks 1GiB support.
*/
allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
- guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
- if (allow_gbpages)
- kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
+ guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES);
+ guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages);
best = kvm_find_cpuid_entry(vcpu, 1);
if (best && apic) {
@@ -404,21 +450,22 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_apic_set_version(vcpu);
}
- vcpu->arch.guest_supported_xcr0 =
- cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+ vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu);
- kvm_update_pv_runtime(vcpu);
+ vcpu->arch.pv_cpuid.features = kvm_apply_cpuid_pv_features_quirk(vcpu);
vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
kvm_pmu_refresh(vcpu);
- vcpu->arch.cr4_guest_rsvd_bits =
- __cr4_reserved_bits(guest_cpuid_has, vcpu);
- kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent));
+#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+ vcpu->arch.cr4_guest_rsvd_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_) |
+ __cr4_reserved_bits(guest_cpu_cap_has, vcpu);
+#undef __kvm_cpu_cap_has
+
+ kvm_hv_set_cpuid(vcpu, kvm_cpuid_has_hyperv(vcpu));
/* Invoke the vendor callback only after the above state is updated. */
kvm_x86_call(vcpu_after_set_cpuid)(vcpu);
@@ -457,9 +504,25 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
int nent)
{
+ u32 vcpu_caps[NR_KVM_CPU_CAPS];
int r;
- __kvm_update_cpuid_runtime(vcpu, e2, nent);
+ /*
+ * Swap the existing (old) entries with the incoming (new) entries in
+ * order to massage the new entries, e.g. to account for dynamic bits
+ * that KVM controls, without clobbering the current guest CPUID, which
+ * KVM needs to preserve in order to unwind on failure.
+ *
+ * Similarly, save the vCPU's current cpu_caps so that the capabilities
+ * can be updated alongside the CPUID entries when performing runtime
+ * updates. Full initialization is done if and only if the vCPU hasn't
+ * run, i.e. only if userspace is potentially changing CPUID features.
+ */
+ swap(vcpu->arch.cpuid_entries, e2);
+ swap(vcpu->arch.cpuid_nent, nent);
+
+ memcpy(vcpu_caps, vcpu->arch.cpu_caps, sizeof(vcpu_caps));
+ BUILD_BUG_ON(sizeof(vcpu_caps) != sizeof(vcpu->arch.cpu_caps));
/*
* KVM does not correctly handle changing guest CPUID after KVM_RUN, as
@@ -475,35 +538,36 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
if (kvm_vcpu_has_run(vcpu)) {
r = kvm_cpuid_check_equal(vcpu, e2, nent);
if (r)
- return r;
-
- kvfree(e2);
- return 0;
+ goto err;
+ goto success;
}
#ifdef CONFIG_KVM_HYPERV
- if (kvm_cpuid_has_hyperv(e2, nent)) {
+ if (kvm_cpuid_has_hyperv(vcpu)) {
r = kvm_hv_vcpu_init(vcpu);
if (r)
- return r;
+ goto err;
}
#endif
- r = kvm_check_cpuid(vcpu, e2, nent);
+ r = kvm_check_cpuid(vcpu);
if (r)
- return r;
-
- kvfree(vcpu->arch.cpuid_entries);
- vcpu->arch.cpuid_entries = e2;
- vcpu->arch.cpuid_nent = nent;
+ goto err;
- vcpu->arch.kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE);
#ifdef CONFIG_KVM_XEN
vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE);
#endif
kvm_vcpu_after_set_cpuid(vcpu);
+success:
+ kvfree(e2);
return 0;
+
+err:
+ memcpy(vcpu->arch.cpu_caps, vcpu_caps, sizeof(vcpu_caps));
+ swap(vcpu->arch.cpuid_entries, e2);
+ swap(vcpu->arch.cpuid_nent, nent);
+ return r;
}
/* when an old userspace process fills a new kernel module */
@@ -590,107 +654,294 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
return 0;
}
-/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */
-static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf)
+static __always_inline u32 raw_cpuid_get(struct cpuid_reg cpuid)
{
- const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32);
struct kvm_cpuid_entry2 entry;
+ u32 base;
- reverse_cpuid_check(leaf);
+ /*
+ * KVM only supports features defined by Intel (0x0), AMD (0x80000000),
+ * and Centaur (0xc0000000). WARN if a feature for new vendor base is
+ * defined, as this and other code would need to be updated.
+ */
+ base = cpuid.function & 0xffff0000;
+ if (WARN_ON_ONCE(base && base != 0x80000000 && base != 0xc0000000))
+ return 0;
+
+ if (cpuid_eax(base) < cpuid.function)
+ return 0;
cpuid_count(cpuid.function, cpuid.index,
&entry.eax, &entry.ebx, &entry.ecx, &entry.edx);
- kvm_cpu_caps[leaf] &= *__cpuid_entry_get_reg(&entry, cpuid.reg);
+ return *__cpuid_entry_get_reg(&entry, cpuid.reg);
}
-static __always_inline
-void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask)
-{
- /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */
- BUILD_BUG_ON(leaf < NCAPINTS);
+/*
+ * For kernel-defined leafs, mask KVM's supported feature set with the kernel's
+ * capabilities as well as raw CPUID. For KVM-defined leafs, consult only raw
+ * CPUID, as KVM is the one and only authority (in the kernel).
+ */
+#define kvm_cpu_cap_init(leaf, feature_initializers...) \
+do { \
+ const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); \
+ const u32 __maybe_unused kvm_cpu_cap_init_in_progress = leaf; \
+ const u32 *kernel_cpu_caps = boot_cpu_data.x86_capability; \
+ u32 kvm_cpu_cap_passthrough = 0; \
+ u32 kvm_cpu_cap_synthesized = 0; \
+ u32 kvm_cpu_cap_emulated = 0; \
+ u32 kvm_cpu_cap_features = 0; \
+ \
+ feature_initializers \
+ \
+ kvm_cpu_caps[leaf] = kvm_cpu_cap_features; \
+ \
+ if (leaf < NCAPINTS) \
+ kvm_cpu_caps[leaf] &= kernel_cpu_caps[leaf]; \
+ \
+ kvm_cpu_caps[leaf] |= kvm_cpu_cap_passthrough; \
+ kvm_cpu_caps[leaf] &= (raw_cpuid_get(cpuid) | \
+ kvm_cpu_cap_synthesized); \
+ kvm_cpu_caps[leaf] |= kvm_cpu_cap_emulated; \
+} while (0)
- kvm_cpu_caps[leaf] = mask;
+/*
+ * Assert that the feature bit being declared, e.g. via F(), is in the CPUID
+ * word that's being initialized. Exempt 0x8000_0001.EDX usage of 0x1.EDX
+ * features, as AMD duplicated many 0x1.EDX features into 0x8000_0001.EDX.
+ */
+#define KVM_VALIDATE_CPU_CAP_USAGE(name) \
+do { \
+ u32 __leaf = __feature_leaf(X86_FEATURE_##name); \
+ \
+ BUILD_BUG_ON(__leaf != kvm_cpu_cap_init_in_progress); \
+} while (0)
+
+#define F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+ kvm_cpu_cap_features |= feature_bit(name); \
+})
- __kvm_cpu_cap_mask(leaf);
-}
+/* Scattered Flag - For features that are scattered by cpufeatures.h. */
+#define SCATTERED_F(name) \
+({ \
+ BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+ if (boot_cpu_has(X86_FEATURE_##name)) \
+ F(name); \
+})
-static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
-{
- /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */
- BUILD_BUG_ON(leaf >= NCAPINTS);
+/* Features that KVM supports only on 64-bit kernels. */
+#define X86_64_F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+ if (IS_ENABLED(CONFIG_X86_64)) \
+ F(name); \
+})
- kvm_cpu_caps[leaf] &= mask;
+/*
+ * Emulated Feature - For features that KVM emulates in software irrespective
+ * of host CPU/kernel support.
+ */
+#define EMULATED_F(name) \
+({ \
+ kvm_cpu_cap_emulated |= feature_bit(name); \
+ F(name); \
+})
- __kvm_cpu_cap_mask(leaf);
-}
+/*
+ * Synthesized Feature - For features that are synthesized into boot_cpu_data,
+ * i.e. may not be present in the raw CPUID, but can still be advertised to
+ * userspace. Primarily used for mitigation related feature flags.
+ */
+#define SYNTHESIZED_F(name) \
+({ \
+ kvm_cpu_cap_synthesized |= feature_bit(name); \
+ F(name); \
+})
+
+/*
+ * Passthrough Feature - For features that KVM supports based purely on raw
+ * hardware CPUID, i.e. that KVM virtualizes even if the host kernel doesn't
+ * use the feature. Simply force set the feature in KVM's capabilities, raw
+ * CPUID support will be factored in by kvm_cpu_cap_mask().
+ */
+#define PASSTHROUGH_F(name) \
+({ \
+ kvm_cpu_cap_passthrough |= feature_bit(name); \
+ F(name); \
+})
+
+/*
+ * Aliased Features - For features in 0x8000_0001.EDX that are duplicates of
+ * identical 0x1.EDX features, and thus are aliased from 0x1 to 0x8000_0001.
+ */
+#define ALIASED_1_EDX_F(name) \
+({ \
+ BUILD_BUG_ON(__feature_leaf(X86_FEATURE_##name) != CPUID_1_EDX); \
+ BUILD_BUG_ON(kvm_cpu_cap_init_in_progress != CPUID_8000_0001_EDX); \
+ kvm_cpu_cap_features |= feature_bit(name); \
+})
+
+/*
+ * Vendor Features - For features that KVM supports, but are added in later
+ * because they require additional vendor enabling.
+ */
+#define VENDOR_F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+})
+
+/*
+ * Runtime Features - For features that KVM dynamically sets/clears at runtime,
+ * e.g. when CR4 changes, but which are never advertised to userspace.
+ */
+#define RUNTIME_F(name) \
+({ \
+ KVM_VALIDATE_CPU_CAP_USAGE(name); \
+})
+
+/*
+ * Undefine the MSR bit macro to avoid token concatenation issues when
+ * processing X86_FEATURE_SPEC_CTRL_SSBD.
+ */
+#undef SPEC_CTRL_SSBD
+
+/* DS is defined by ptrace-abi.h on 32-bit builds. */
+#undef DS
void kvm_set_cpu_caps(void)
{
-#ifdef CONFIG_X86_64
- unsigned int f_gbpages = F(GBPAGES);
- unsigned int f_lm = F(LM);
- unsigned int f_xfd = F(XFD);
-#else
- unsigned int f_gbpages = 0;
- unsigned int f_lm = 0;
- unsigned int f_xfd = 0;
-#endif
memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps));
BUILD_BUG_ON(sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)) >
sizeof(boot_cpu_data.x86_capability));
- memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
- sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)));
-
- kvm_cpu_cap_mask(CPUID_1_ECX,
+ kvm_cpu_cap_init(CPUID_1_ECX,
+ F(XMM3),
+ F(PCLMULQDQ),
+ VENDOR_F(DTES64),
/*
* NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not*
- * advertised to guests via CPUID!
+ * advertised to guests via CPUID! MWAIT is also technically a
+ * runtime flag thanks to IA32_MISC_ENABLES; mark it as such so
+ * that KVM is aware that it's a known, unadvertised flag.
*/
- F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
- 0 /* DS-CPL, VMX, SMX, EST */ |
- 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
- F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) |
- F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
- F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
- 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
- F(F16C) | F(RDRAND)
+ RUNTIME_F(MWAIT),
+ /* DS-CPL */
+ VENDOR_F(VMX),
+ /* SMX, EST */
+ /* TM2 */
+ F(SSSE3),
+ /* CNXT-ID */
+ /* Reserved */
+ F(FMA),
+ F(CX16),
+ /* xTPR Update */
+ F(PDCM),
+ F(PCID),
+ /* Reserved, DCA */
+ F(XMM4_1),
+ F(XMM4_2),
+ EMULATED_F(X2APIC),
+ F(MOVBE),
+ F(POPCNT),
+ EMULATED_F(TSC_DEADLINE_TIMER),
+ F(AES),
+ F(XSAVE),
+ RUNTIME_F(OSXSAVE),
+ F(AVX),
+ F(F16C),
+ F(RDRAND),
+ EMULATED_F(HYPERVISOR),
);
- /* KVM emulates x2apic in software irrespective of host support. */
- kvm_cpu_cap_set(X86_FEATURE_X2APIC);
-
- kvm_cpu_cap_mask(CPUID_1_EDX,
- F(FPU) | F(VME) | F(DE) | F(PSE) |
- F(TSC) | F(MSR) | F(PAE) | F(MCE) |
- F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
- F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
- F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
- 0 /* Reserved, DS, ACPI */ | F(MMX) |
- F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
- 0 /* HTT, TM, Reserved, PBE */
+
+ kvm_cpu_cap_init(CPUID_1_EDX,
+ F(FPU),
+ F(VME),
+ F(DE),
+ F(PSE),
+ F(TSC),
+ F(MSR),
+ F(PAE),
+ F(MCE),
+ F(CX8),
+ F(APIC),
+ /* Reserved */
+ F(SEP),
+ F(MTRR),
+ F(PGE),
+ F(MCA),
+ F(CMOV),
+ F(PAT),
+ F(PSE36),
+ /* PSN */
+ F(CLFLUSH),
+ /* Reserved */
+ VENDOR_F(DS),
+ /* ACPI */
+ F(MMX),
+ F(FXSR),
+ F(XMM),
+ F(XMM2),
+ F(SELFSNOOP),
+ /* HTT, TM, Reserved, PBE */
);
- kvm_cpu_cap_mask(CPUID_7_0_EBX,
- F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) |
- F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) |
- F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) |
- F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) |
- F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) |
- F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) |
- F(AVX512VL));
-
- kvm_cpu_cap_mask(CPUID_7_ECX,
- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
- F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
- F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
- F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
- F(SGX_LC) | F(BUS_LOCK_DETECT)
+ kvm_cpu_cap_init(CPUID_7_0_EBX,
+ F(FSGSBASE),
+ EMULATED_F(TSC_ADJUST),
+ F(SGX),
+ F(BMI1),
+ F(HLE),
+ F(AVX2),
+ F(FDP_EXCPTN_ONLY),
+ F(SMEP),
+ F(BMI2),
+ F(ERMS),
+ F(INVPCID),
+ F(RTM),
+ F(ZERO_FCS_FDS),
+ VENDOR_F(MPX),
+ F(AVX512F),
+ F(AVX512DQ),
+ F(RDSEED),
+ F(ADX),
+ F(SMAP),
+ F(AVX512IFMA),
+ F(CLFLUSHOPT),
+ F(CLWB),
+ VENDOR_F(INTEL_PT),
+ F(AVX512PF),
+ F(AVX512ER),
+ F(AVX512CD),
+ F(SHA_NI),
+ F(AVX512BW),
+ F(AVX512VL),
+ );
+
+ kvm_cpu_cap_init(CPUID_7_ECX,
+ F(AVX512VBMI),
+ PASSTHROUGH_F(LA57),
+ F(PKU),
+ RUNTIME_F(OSPKE),
+ F(RDPID),
+ F(AVX512_VPOPCNTDQ),
+ F(UMIP),
+ F(AVX512_VBMI2),
+ F(GFNI),
+ F(VAES),
+ F(VPCLMULQDQ),
+ F(AVX512_VNNI),
+ F(AVX512_BITALG),
+ F(CLDEMOTE),
+ F(MOVDIRI),
+ F(MOVDIR64B),
+ VENDOR_F(WAITPKG),
+ F(SGX_LC),
+ F(BUS_LOCK_DETECT),
);
- /* Set LA57 based on hardware capability. */
- if (cpuid_ecx(7) & F(LA57))
- kvm_cpu_cap_set(X86_FEATURE_LA57);
/*
* PKU not yet implemented for shadow paging and requires OSPKE
@@ -699,18 +950,25 @@ void kvm_set_cpu_caps(void)
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
kvm_cpu_cap_clear(X86_FEATURE_PKU);
- kvm_cpu_cap_mask(CPUID_7_EDX,
- F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
- F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
- F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
- F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) |
- F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D)
+ kvm_cpu_cap_init(CPUID_7_EDX,
+ F(AVX512_4VNNIW),
+ F(AVX512_4FMAPS),
+ F(SPEC_CTRL),
+ F(SPEC_CTRL_SSBD),
+ EMULATED_F(ARCH_CAPABILITIES),
+ F(INTEL_STIBP),
+ F(MD_CLEAR),
+ F(AVX512_VP2INTERSECT),
+ F(FSRM),
+ F(SERIALIZE),
+ F(TSXLDTRK),
+ F(AVX512_FP16),
+ F(AMX_TILE),
+ F(AMX_INT8),
+ F(AMX_BF16),
+ F(FLUSH_L1D),
);
- /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
- kvm_cpu_cap_set(X86_FEATURE_TSC_ADJUST);
- kvm_cpu_cap_set(X86_FEATURE_ARCH_CAPABILITIES);
-
if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) &&
boot_cpu_has(X86_FEATURE_AMD_IBPB) &&
boot_cpu_has(X86_FEATURE_AMD_IBRS))
@@ -720,65 +978,133 @@ void kvm_set_cpu_caps(void)
if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
- kvm_cpu_cap_mask(CPUID_7_1_EAX,
- F(SHA512) | F(SM3) | F(SM4) | F(AVX_VNNI) | F(AVX512_BF16) |
- F(CMPCCXADD) | F(FZRM) | F(FSRS) | F(FSRC) | F(AMX_FP16) |
- F(AVX_IFMA) | F(LAM)
+ kvm_cpu_cap_init(CPUID_7_1_EAX,
+ F(SHA512),
+ F(SM3),
+ F(SM4),
+ F(AVX_VNNI),
+ F(AVX512_BF16),
+ F(CMPCCXADD),
+ F(FZRM),
+ F(FSRS),
+ F(FSRC),
+ F(AMX_FP16),
+ F(AVX_IFMA),
+ F(LAM),
);
- kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
- F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(AMX_COMPLEX) |
- F(AVX_VNNI_INT16) | F(PREFETCHITI) | F(AVX10)
+ kvm_cpu_cap_init(CPUID_7_1_EDX,
+ F(AVX_VNNI_INT8),
+ F(AVX_NE_CONVERT),
+ F(AMX_COMPLEX),
+ F(AVX_VNNI_INT16),
+ F(PREFETCHITI),
+ F(AVX10),
);
- kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
- F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) |
- F(BHI_CTRL) | F(MCDT_NO)
+ kvm_cpu_cap_init(CPUID_7_2_EDX,
+ F(INTEL_PSFD),
+ F(IPRED_CTRL),
+ F(RRSBA_CTRL),
+ F(DDPD_U),
+ F(BHI_CTRL),
+ F(MCDT_NO),
);
- kvm_cpu_cap_mask(CPUID_D_1_EAX,
- F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
+ kvm_cpu_cap_init(CPUID_D_1_EAX,
+ F(XSAVEOPT),
+ F(XSAVEC),
+ F(XGETBV1),
+ F(XSAVES),
+ X86_64_F(XFD),
);
- kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX,
- SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
+ kvm_cpu_cap_init(CPUID_12_EAX,
+ SCATTERED_F(SGX1),
+ SCATTERED_F(SGX2),
+ SCATTERED_F(SGX_EDECCSSA),
);
- kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX,
- F(AVX10_128) | F(AVX10_256) | F(AVX10_512)
+ kvm_cpu_cap_init(CPUID_24_0_EBX,
+ F(AVX10_128),
+ F(AVX10_256),
+ F(AVX10_512),
);
- kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
- F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
- F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
- F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
- 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
- F(TOPOEXT) | 0 /* PERFCTR_CORE */
+ kvm_cpu_cap_init(CPUID_8000_0001_ECX,
+ F(LAHF_LM),
+ F(CMP_LEGACY),
+ VENDOR_F(SVM),
+ /* ExtApicSpace */
+ F(CR8_LEGACY),
+ F(ABM),
+ F(SSE4A),
+ F(MISALIGNSSE),
+ F(3DNOWPREFETCH),
+ F(OSVW),
+ /* IBS */
+ F(XOP),
+ /* SKINIT, WDT, LWP */
+ F(FMA4),
+ F(TBM),
+ F(TOPOEXT),
+ VENDOR_F(PERFCTR_CORE),
);
- kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
- F(FPU) | F(VME) | F(DE) | F(PSE) |
- F(TSC) | F(MSR) | F(PAE) | F(MCE) |
- F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
- F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
- F(PAT) | F(PSE36) | 0 /* Reserved */ |
- F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
- F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
- 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
+ kvm_cpu_cap_init(CPUID_8000_0001_EDX,
+ ALIASED_1_EDX_F(FPU),
+ ALIASED_1_EDX_F(VME),
+ ALIASED_1_EDX_F(DE),
+ ALIASED_1_EDX_F(PSE),
+ ALIASED_1_EDX_F(TSC),
+ ALIASED_1_EDX_F(MSR),
+ ALIASED_1_EDX_F(PAE),
+ ALIASED_1_EDX_F(MCE),
+ ALIASED_1_EDX_F(CX8),
+ ALIASED_1_EDX_F(APIC),
+ /* Reserved */
+ F(SYSCALL),
+ ALIASED_1_EDX_F(MTRR),
+ ALIASED_1_EDX_F(PGE),
+ ALIASED_1_EDX_F(MCA),
+ ALIASED_1_EDX_F(CMOV),
+ ALIASED_1_EDX_F(PAT),
+ ALIASED_1_EDX_F(PSE36),
+ /* Reserved */
+ F(NX),
+ /* Reserved */
+ F(MMXEXT),
+ ALIASED_1_EDX_F(MMX),
+ ALIASED_1_EDX_F(FXSR),
+ F(FXSR_OPT),
+ X86_64_F(GBPAGES),
+ F(RDTSCP),
+ /* Reserved */
+ X86_64_F(LM),
+ F(3DNOWEXT),
+ F(3DNOW),
);
if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64))
kvm_cpu_cap_set(X86_FEATURE_GBPAGES);
- kvm_cpu_cap_init_kvm_defined(CPUID_8000_0007_EDX,
- SF(CONSTANT_TSC)
+ kvm_cpu_cap_init(CPUID_8000_0007_EDX,
+ SCATTERED_F(CONSTANT_TSC),
);
- kvm_cpu_cap_mask(CPUID_8000_0008_EBX,
- F(CLZERO) | F(XSAVEERPTR) |
- F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
- F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
- F(AMD_PSFD) | F(AMD_IBPB_RET)
+ kvm_cpu_cap_init(CPUID_8000_0008_EBX,
+ F(CLZERO),
+ F(XSAVEERPTR),
+ F(WBNOINVD),
+ F(AMD_IBPB),
+ F(AMD_IBRS),
+ F(AMD_SSBD),
+ F(VIRT_SSBD),
+ F(AMD_SSB_NO),
+ F(AMD_STIBP),
+ F(AMD_STIBP_ALWAYS_ON),
+ F(AMD_PSFD),
+ F(AMD_IBPB_RET),
);
/*
@@ -808,50 +1134,72 @@ void kvm_set_cpu_caps(void)
!boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
- /*
- * Hide all SVM features by default, SVM will set the cap bits for
- * features it emulates and/or exposes for L1.
- */
- kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0);
-
- kvm_cpu_cap_mask(CPUID_8000_001F_EAX,
- 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ |
- F(SME_COHERENT));
+ /* All SVM features required additional vendor module enabling. */
+ kvm_cpu_cap_init(CPUID_8000_000A_EDX,
+ VENDOR_F(NPT),
+ VENDOR_F(VMCBCLEAN),
+ VENDOR_F(FLUSHBYASID),
+ VENDOR_F(NRIPS),
+ VENDOR_F(TSCRATEMSR),
+ VENDOR_F(V_VMSAVE_VMLOAD),
+ VENDOR_F(LBRV),
+ VENDOR_F(PAUSEFILTER),
+ VENDOR_F(PFTHRESHOLD),
+ VENDOR_F(VGIF),
+ VENDOR_F(VNMI),
+ VENDOR_F(SVME_ADDR_CHK),
+ );
- kvm_cpu_cap_mask(CPUID_8000_0021_EAX,
- F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ |
- F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ |
- F(WRMSR_XX_BASE_NS)
+ kvm_cpu_cap_init(CPUID_8000_001F_EAX,
+ VENDOR_F(SME),
+ VENDOR_F(SEV),
+ /* VM_PAGE_FLUSH */
+ VENDOR_F(SEV_ES),
+ F(SME_COHERENT),
);
- kvm_cpu_cap_check_and_set(X86_FEATURE_SBPB);
- kvm_cpu_cap_check_and_set(X86_FEATURE_IBPB_BRTYPE);
- kvm_cpu_cap_check_and_set(X86_FEATURE_SRSO_NO);
+ kvm_cpu_cap_init(CPUID_8000_0021_EAX,
+ F(NO_NESTED_DATA_BP),
+ /*
+ * Synthesize "LFENCE is serializing" into the AMD-defined entry
+ * in KVM's supported CPUID, i.e. if the feature is reported as
+ * supported by the kernel. LFENCE_RDTSC was a Linux-defined
+ * synthetic feature long before AMD joined the bandwagon, e.g.
+ * LFENCE is serializing on most CPUs that support SSE2. On
+ * CPUs that don't support AMD's leaf, ANDing with the raw host
+ * CPUID will drop the flags, and reporting support in AMD's
+ * leaf can make it easier for userspace to detect the feature.
+ */
+ SYNTHESIZED_F(LFENCE_RDTSC),
+ /* SmmPgCfgLock */
+ F(NULL_SEL_CLR_BASE),
+ F(AUTOIBRS),
+ EMULATED_F(NO_SMM_CTL_MSR),
+ /* PrefetchCtlMsr */
+ F(WRMSR_XX_BASE_NS),
+ SYNTHESIZED_F(SBPB),
+ SYNTHESIZED_F(IBPB_BRTYPE),
+ SYNTHESIZED_F(SRSO_NO),
+ );
- kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
- F(PERFMON_V2)
+ kvm_cpu_cap_init(CPUID_8000_0022_EAX,
+ F(PERFMON_V2),
);
- /*
- * Synthesize "LFENCE is serializing" into the AMD-defined entry in
- * KVM's supported CPUID if the feature is reported as supported by the
- * kernel. LFENCE_RDTSC was a Linux-defined synthetic feature long
- * before AMD joined the bandwagon, e.g. LFENCE is serializing on most
- * CPUs that support SSE2. On CPUs that don't support AMD's leaf,
- * kvm_cpu_cap_mask() will unfortunately drop the flag due to ANDing
- * the mask with the raw host CPUID, and reporting support in AMD's
- * leaf can make it easier for userspace to detect the feature.
- */
- if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
- kvm_cpu_cap_set(X86_FEATURE_LFENCE_RDTSC);
if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
kvm_cpu_cap_set(X86_FEATURE_NULL_SEL_CLR_BASE);
- kvm_cpu_cap_set(X86_FEATURE_NO_SMM_CTL_MSR);
- kvm_cpu_cap_mask(CPUID_C000_0001_EDX,
- F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
- F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
- F(PMM) | F(PMM_EN)
+ kvm_cpu_cap_init(CPUID_C000_0001_EDX,
+ F(XSTORE),
+ F(XSTORE_EN),
+ F(XCRYPT),
+ F(XCRYPT_EN),
+ F(ACE2),
+ F(ACE2_EN),
+ F(PHE),
+ F(PHE_EN),
+ F(PMM),
+ F(PMM_EN),
);
/*
@@ -871,6 +1219,16 @@ void kvm_set_cpu_caps(void)
}
EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
+#undef F
+#undef SCATTERED_F
+#undef X86_64_F
+#undef EMULATED_F
+#undef SYNTHESIZED_F
+#undef PASSTHROUGH_F
+#undef ALIASED_1_EDX_F
+#undef VENDOR_F
+#undef RUNTIME_F
+
struct kvm_cpuid_array {
struct kvm_cpuid_entry2 *entries;
int maxnent;
@@ -928,14 +1286,11 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
return entry;
}
-static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
+static int cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, u32 func,
+ bool include_partially_emulated)
{
- struct kvm_cpuid_entry2 *entry;
-
- if (array->nent >= array->maxnent)
- return -E2BIG;
+ memset(entry, 0, sizeof(*entry));
- entry = &array->entries[array->nent];
entry->function = func;
entry->index = 0;
entry->flags = 0;
@@ -943,23 +1298,37 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
switch (func) {
case 0:
entry->eax = 7;
- ++array->nent;
- break;
+ return 1;
case 1:
- entry->ecx = F(MOVBE);
- ++array->nent;
- break;
+ entry->ecx = feature_bit(MOVBE);
+ /*
+ * KVM allows userspace to enumerate MONITOR+MWAIT support to
+ * the guest, but the MWAIT feature flag is never advertised
+ * to userspace because MONITOR+MWAIT aren't virtualized by
+ * hardware, can't be faithfully emulated in software (KVM
+ * emulates them as NOPs), and allowing the guest to execute
+ * them natively requires enabling a per-VM capability.
+ */
+ if (include_partially_emulated)
+ entry->ecx |= feature_bit(MWAIT);
+ return 1;
case 7:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
entry->eax = 0;
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
- entry->ecx = F(RDPID);
- ++array->nent;
- break;
+ entry->ecx = feature_bit(RDPID);
+ return 1;
default:
- break;
+ return 0;
}
+}
+
+static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
+{
+ if (array->nent >= array->maxnent)
+ return -E2BIG;
+ array->nent += cpuid_func_emulated(&array->entries[array->nent], func, false);
return 0;
}
@@ -1103,7 +1472,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
goto out;
cpuid_entry_override(entry, CPUID_D_1_EAX);
- if (entry->eax & (F(XSAVES)|F(XSAVEC)))
+ if (entry->eax & (feature_bit(XSAVES) | feature_bit(XSAVEC)))
entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss,
true);
else {
@@ -1540,22 +1909,6 @@ out_free:
return r;
}
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
- u32 function, u32 index)
-{
- return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
- function, index);
-}
-EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry_index);
-
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
- u32 function)
-{
- return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
- function, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
-}
-EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
-
/*
* Intel CPUID semantics treats any query for an out-of-range leaf as if the
* highest basic leaf (i.e. CPUID.0H:EAX) were requested. AMD CPUID semantics
@@ -1648,10 +2001,10 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
u64 data;
if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
(data & TSX_CTRL_CPUID_CLEAR))
- *ebx &= ~(F(RTM) | F(HLE));
+ *ebx &= ~(feature_bit(RTM) | feature_bit(HLE));
} else if (function == 0x80000007) {
if (kvm_hv_invtsc_suppressed(vcpu))
- *edx &= ~SF(CONSTANT_TSC);
+ *edx &= ~feature_bit(CONSTANT_TSC);
}
} else {
*eax = *ebx = *ecx = *edx = 0;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index f16a7b2c2adc..67d80aa72d50 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -10,8 +10,8 @@
extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
void kvm_set_cpu_caps(void);
+void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
-void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
u32 function, u32 index);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
@@ -67,41 +67,40 @@ static __always_inline void cpuid_entry_override(struct kvm_cpuid_entry2 *entry,
*reg = kvm_cpu_caps[leaf];
}
-static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu,
- unsigned int x86_feature)
+static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
{
const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
struct kvm_cpuid_entry2 *entry;
+ u32 *reg;
+
+ /*
+ * XSAVES is a special snowflake. Due to lack of a dedicated intercept
+ * on SVM, KVM must assume that XSAVES (and thus XRSTORS) is usable by
+ * the guest if the host supports XSAVES and *XSAVE* is exposed to the
+ * guest. Because the guest can execute XSAVES and XRSTORS, i.e. can
+ * indirectly consume XSS, KVM must ensure XSS is zeroed when running
+ * the guest, i.e. must set XSAVES in vCPU capabilities. But to reject
+ * direct XSS reads and writes (to minimize the virtualization hole and
+ * honor userspace's CPUID), KVM needs to check the raw guest CPUID,
+ * not KVM's view of guest capabilities.
+ *
+ * For all other features, guest capabilities are accurate. Expand
+ * this allowlist with extreme vigilance.
+ */
+ BUILD_BUG_ON(x86_feature != X86_FEATURE_XSAVES);
entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index);
if (!entry)
return NULL;
- return __cpuid_entry_get_reg(entry, cpuid.reg);
-}
-
-static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu,
- unsigned int x86_feature)
-{
- u32 *reg;
-
- reg = guest_cpuid_get_register(vcpu, x86_feature);
+ reg = __cpuid_entry_get_reg(entry, cpuid.reg);
if (!reg)
return false;
return *reg & __feature_bit(x86_feature);
}
-static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu,
- unsigned int x86_feature)
-{
- u32 *reg;
-
- reg = guest_cpuid_get_register(vcpu, x86_feature);
- if (reg)
- *reg &= ~__feature_bit(x86_feature);
-}
-
static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
{
return vcpu->arch.is_amd_compatible;
@@ -150,21 +149,6 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
return x86_stepping(best->eax);
}
-static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
-{
- return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
- guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) ||
- guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) ||
- guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD));
-}
-
-static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
-{
- return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
- guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB) ||
- guest_cpuid_has(vcpu, X86_FEATURE_SBPB));
-}
-
static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
{
return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
@@ -180,7 +164,6 @@ static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
- reverse_cpuid_check(x86_leaf);
kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
}
@@ -188,7 +171,6 @@ static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
- reverse_cpuid_check(x86_leaf);
kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
}
@@ -196,7 +178,6 @@ static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
- reverse_cpuid_check(x86_leaf);
return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature);
}
@@ -220,58 +201,61 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
}
-enum kvm_governed_features {
-#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x,
-#include "governed_features.h"
- KVM_NR_GOVERNED_FEATURES
-};
-
-static __always_inline int kvm_governed_feature_index(unsigned int x86_feature)
+static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
{
- switch (x86_feature) {
-#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x;
-#include "governed_features.h"
- default:
- return -1;
- }
-}
+ unsigned int x86_leaf = __feature_leaf(x86_feature);
-static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature)
-{
- return kvm_governed_feature_index(x86_feature) >= 0;
+ vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
}
-static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu,
- unsigned int x86_feature)
+static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
{
- BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+ unsigned int x86_leaf = __feature_leaf(x86_feature);
- __set_bit(kvm_governed_feature_index(x86_feature),
- vcpu->arch.governed_features.enabled);
+ vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
}
-static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu,
- unsigned int x86_feature)
+static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature,
+ bool guest_has_cap)
{
- if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
- kvm_governed_feature_set(vcpu, x86_feature);
+ if (guest_has_cap)
+ guest_cpu_cap_set(vcpu, x86_feature);
+ else
+ guest_cpu_cap_clear(vcpu, x86_feature);
}
-static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
- unsigned int x86_feature)
+static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
{
- BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+ unsigned int x86_leaf = __feature_leaf(x86_feature);
- return test_bit(kvm_governed_feature_index(x86_feature),
- vcpu->arch.governed_features.enabled);
+ return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature);
}
static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
- if (guest_can_use(vcpu, X86_FEATURE_LAM))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_LAM))
cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
return kvm_vcpu_is_legal_gpa(vcpu, cr3);
}
+static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
+{
+ return (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_STIBP) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBRS) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_SSBD));
+}
+
+static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
+{
+ return (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBPB) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_SBPB));
+}
+
#endif
diff --git a/arch/x86/kvm/governed_features.h b/arch/x86/kvm/governed_features.h
deleted file mode 100644
index ad463b1ed4e4..000000000000
--- a/arch/x86/kvm/governed_features.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE)
-BUILD_BUG()
-#endif
-
-#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x)
-
-KVM_GOVERNED_X86_FEATURE(GBPAGES)
-KVM_GOVERNED_X86_FEATURE(XSAVES)
-KVM_GOVERNED_X86_FEATURE(VMX)
-KVM_GOVERNED_X86_FEATURE(NRIPS)
-KVM_GOVERNED_X86_FEATURE(TSCRATEMSR)
-KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD)
-KVM_GOVERNED_X86_FEATURE(LBRV)
-KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
-KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
-KVM_GOVERNED_X86_FEATURE(VGIF)
-KVM_GOVERNED_X86_FEATURE(VNMI)
-KVM_GOVERNED_X86_FEATURE(LAM)
-
-#undef KVM_GOVERNED_X86_FEATURE
-#undef KVM_GOVERNED_FEATURE
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4f0a94346d00..6a6dd5a84f22 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1352,7 +1352,7 @@ static void __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
return;
if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) ||
- !guest_cpuid_has(vcpu, X86_FEATURE_XSAVEC))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVEC))
return;
pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. "
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 10495fffb890..73072585e164 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -88,6 +88,8 @@ struct x86_instruction_info {
#define X86EMUL_CMPXCHG_FAILED 4 /* cmpxchg did not see expected value */
#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
+/* Emulation during event vectoring is unhandleable. */
+#define X86EMUL_UNHANDLEABLE_VECTORING 7
/* x86-specific emulation flags */
#define X86EMUL_F_WRITE BIT(0)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1c89d20bbc1e..a009c94c26c2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -598,7 +598,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
* version first and level-triggered interrupts never get EOIed in
* IOAPIC.
*/
- if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) &&
!ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
@@ -2365,7 +2365,7 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_LVTT:
if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
- val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
+ val &= (apic_lvt_mask[LVT_TIMER] | apic->lapic_timer.timer_mode_mask);
kvm_lapic_set_reg(apic, APIC_LVTT, val);
apic_update_lvtt(apic);
break;
@@ -2642,7 +2642,7 @@ int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated)
return 0;
u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
- (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
+ (guest_cpu_cap_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
if ((value & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
return 1;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e9322358678b..caec3d11638d 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -126,7 +126,7 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu)
{
- if (!guest_can_use(vcpu, X86_FEATURE_LAM))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LAM))
return 0;
return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2401606db260..74fa38ebddbf 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5022,7 +5022,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
__reset_rsvds_bits_mask(&context->guest_rsvd_check,
vcpu->arch.reserved_gpa_bits,
context->cpu_role.base.level, is_efer_nx(context),
- guest_can_use(vcpu, X86_FEATURE_GBPAGES),
+ guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES),
is_cr4_pse(context),
guest_cpuid_is_amd_compatible(vcpu));
}
@@ -5099,7 +5099,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
context->root_role.level,
context->root_role.efer_nx,
- guest_can_use(vcpu, X86_FEATURE_GBPAGES),
+ guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES),
is_pse, is_amd);
if (!shadow_me_mask)
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 47a46283c866..75e9cfc689f8 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -797,7 +797,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
memset(pmu, 0, sizeof(*pmu));
kvm_pmu_call(init)(vcpu);
- kvm_pmu_refresh(vcpu);
}
/* Release perf_events for vPMCs that have been unused for a full time slice. */
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index e46220ece83c..fde0ae986003 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -7,23 +7,6 @@
#include <asm/cpufeatures.h>
/*
- * Hardware-defined CPUID leafs that are either scattered by the kernel or are
- * unknown to the kernel, but need to be directly used by KVM. Note, these
- * word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
- */
-enum kvm_only_cpuid_leafs {
- CPUID_12_EAX = NCAPINTS,
- CPUID_7_1_EDX,
- CPUID_8000_0007_EDX,
- CPUID_8000_0022_EAX,
- CPUID_7_2_EDX,
- CPUID_24_0_EBX,
- NR_KVM_CPU_CAPS,
-
- NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
-};
-
-/*
* Define a KVM-only feature flag.
*
* For features that are scattered by cpufeatures.h, __feature_translate() also
@@ -145,7 +128,10 @@ static __always_inline u32 __feature_translate(int x86_feature)
static __always_inline u32 __feature_leaf(int x86_feature)
{
- return __feature_translate(x86_feature) / 32;
+ u32 x86_leaf = __feature_translate(x86_feature) / 32;
+
+ reverse_cpuid_check(x86_leaf);
+ return x86_leaf;
}
/*
@@ -168,7 +154,6 @@ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned int x86_featu
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
- reverse_cpuid_check(x86_leaf);
return reverse_cpuid[x86_leaf];
}
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 85241c0c7f56..e0ab7df27b66 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -283,7 +283,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
memset(smram.bytes, 0, sizeof(smram.bytes));
#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
enter_smm_save_state_64(vcpu, &smram.smram64);
else
#endif
@@ -353,7 +353,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
if (kvm_x86_call(set_efer)(vcpu, 0))
goto error;
#endif
@@ -586,7 +586,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
* supports long mode.
*/
#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
struct kvm_segment cs_desc;
unsigned long cr4;
@@ -609,7 +609,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM)) {
unsigned long cr4, efer;
/* Clear CR4.PAE before clearing EFER.LME. */
@@ -634,7 +634,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_UNHANDLEABLE;
#ifdef CONFIG_X86_64
- if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
ret = rsm_load_state_64(ctxt, &smram.smram64);
else
#endif
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index b708bdf7eaff..d77b094d9a4d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -111,7 +111,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
{
- if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
+ if (!guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
return true;
if (!nested_npt_enabled(svm))
@@ -594,7 +594,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
vmcb_mark_dirty(vmcb02, VMCB_DR);
}
- if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+ if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
/*
* Reserved bits of DEBUGCTL are ignored. Be consistent with
@@ -651,7 +651,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
* exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
*/
- if (guest_can_use(vcpu, X86_FEATURE_VGIF) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_VGIF) &&
(svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
else
@@ -689,7 +689,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
- if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR) &&
svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
nested_svm_update_tsc_ratio_msr(vcpu);
@@ -710,7 +710,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
* what a nrips=0 CPU would do (L1 is responsible for advancing RIP
* prior to injecting the event).
*/
- if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
vmcb02->control.next_rip = svm->nested.ctl.next_rip;
else if (boot_cpu_has(X86_FEATURE_NRIPS))
vmcb02->control.next_rip = vmcb12_rip;
@@ -720,7 +720,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
svm->soft_int_injected = true;
svm->soft_int_csbase = vmcb12_csbase;
svm->soft_int_old_rip = vmcb12_rip;
- if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
svm->soft_int_next_rip = svm->nested.ctl.next_rip;
else
svm->soft_int_next_rip = vmcb12_rip;
@@ -728,18 +728,18 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
vmcb02->control.virt_ext = vmcb01->control.virt_ext &
LBR_CTL_ENABLE_MASK;
- if (guest_can_use(vcpu, X86_FEATURE_LBRV))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV))
vmcb02->control.virt_ext |=
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
if (!nested_vmcb_needs_vls_intercept(svm))
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
- if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER))
pause_count12 = svm->nested.ctl.pause_filter_count;
else
pause_count12 = 0;
- if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD))
pause_thresh12 = svm->nested.ctl.pause_filter_thresh;
else
pause_thresh12 = 0;
@@ -1026,7 +1026,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (vmcb12->control.exit_code != SVM_EXIT_ERR)
nested_save_pending_event_to_vmcb12(svm, vmcb12);
- if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
vmcb12->control.next_rip = vmcb02->control.next_rip;
vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
@@ -1065,7 +1065,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
if (!nested_exit_on_intr(svm))
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
- if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+ if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
svm_copy_lbrs(vmcb12, vmcb02);
svm_update_lbrv(vcpu);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 22d5a65b410c..288f7f2a46f2 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -46,7 +46,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
switch (msr) {
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
- if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE))
return NULL;
/*
* Each PMU counter has a pair of CTL and CTR MSRs. CTLn
@@ -109,7 +109,7 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
return pmu->version > 0;
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
- return guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE);
+ return guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE);
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
@@ -179,7 +179,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
union cpuid_0x80000022_ebx ebx;
pmu->version = 1;
- if (guest_cpuid_has(vcpu, X86_FEATURE_PERFMON_V2)) {
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_PERFMON_V2)) {
pmu->version = 2;
/*
* Note, PERFMON_V2 is also in 0x80000022.0x0, i.e. the guest
@@ -189,7 +189,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
x86_feature_cpuid(X86_FEATURE_PERFMON_V2).index);
ebx.full = kvm_find_cpuid_entry_index(vcpu, 0x80000022, 0)->ebx;
pmu->nr_arch_gp_counters = ebx.split.num_core_pmc;
- } else if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
+ } else if (guest_cpu_cap_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
} else {
pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 87ed8cde68a7..799f8494b599 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -4435,8 +4435,8 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
- bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
- guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
+ bool v_tsc_aux = guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID);
set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux);
}
@@ -4445,16 +4445,15 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
* For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if
* the host/guest supports its use.
*
- * guest_can_use() checks a number of requirements on the host/guest to
- * ensure that MSR_IA32_XSS is available, but it might report true even
- * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host
- * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better
- * to further check that the guest CPUID actually supports
- * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved
- * guests will still get intercepted and caught in the normal
- * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths.
+ * KVM treats the guest as being capable of using XSAVES even if XSAVES
+ * isn't enabled in guest CPUID as there is no intercept for XSAVES,
+ * i.e. the guest can use XSAVES/XRSTOR to read/write XSS if XSAVE is
+ * exposed to the guest and XSAVES is supported in hardware. Condition
+ * full XSS passthrough on the guest being able to use XSAVES *and*
+ * XSAVES being exposed to the guest so that KVM can at least honor
+ * guest CPUID for RDMSR and WRMSR.
*/
- if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1);
else
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9a741e657fe0..7640a84e554a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1048,7 +1048,7 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
- (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+ (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
if (enable_lbrv == current_enable_lbrv)
@@ -1186,14 +1186,14 @@ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
*/
if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) {
if (!npt_enabled ||
- !guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID))
+ !guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_INVPCID))
svm_set_intercept(svm, INTERCEPT_INVPCID);
else
svm_clr_intercept(svm, INTERCEPT_INVPCID);
}
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
- if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP))
svm_clr_intercept(svm, INTERCEPT_RDTSCP);
else
svm_set_intercept(svm, INTERCEPT_RDTSCP);
@@ -2860,7 +2860,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) {
case MSR_AMD64_TSC_RATIO:
if (!msr_info->host_initiated &&
- !guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR))
return 1;
msr_info->data = svm->tsc_ratio_msr;
break;
@@ -2936,7 +2936,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_AMD64_VIRT_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_VIRT_SSBD))
return 1;
msr_info->data = svm->virt_spec_ctrl;
@@ -3020,7 +3020,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
- if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) {
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR)) {
if (!msr->host_initiated)
return 1;
@@ -3042,7 +3042,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->tsc_ratio_msr = data;
- if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR) &&
is_guest_mode(vcpu))
nested_svm_update_tsc_ratio_msr(vcpu);
@@ -3087,7 +3087,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_AMD64_VIRT_SPEC_CTRL:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_VIRT_SSBD))
return 1;
if (data & ~SPEC_CTRL_SSBD)
@@ -3259,7 +3259,7 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
unsigned long type;
gva_t gva;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_INVPCID)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -3529,6 +3529,21 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
*error_code = 0;
}
+static void svm_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info,
+ u32 *error_code)
+{
+ struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+
+ *intr_info = control->event_inj;
+
+ if ((*intr_info & SVM_EXITINTINFO_VALID) &&
+ (*intr_info & SVM_EXITINTINFO_VALID_ERR))
+ *error_code = control->event_inj_err;
+ else
+ *error_code = 0;
+
+}
+
static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4388,27 +4403,17 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* XSS on VM-Enter/VM-Exit. Failure to do so would effectively give
* the guest read/write access to the host's XSS.
*/
- if (boot_cpu_has(X86_FEATURE_XSAVE) &&
- boot_cpu_has(X86_FEATURE_XSAVES) &&
- guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
- kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES);
-
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS);
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
+ guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES,
+ boot_cpu_has(X86_FEATURE_XSAVES) &&
+ guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE));
/*
* Intercept VMLOAD if the vCPU model is Intel in order to emulate that
* VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
* SVM on Intel is bonkers and extremely unlikely to work).
*/
- if (!guest_cpuid_is_intel_compatible(vcpu))
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
-
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF);
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI);
+ if (guest_cpuid_is_intel_compatible(vcpu))
+ guest_cpu_cap_clear(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
svm_recalc_instruction_intercepts(vcpu, svm);
@@ -4418,7 +4423,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0,
- !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
+ !!guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
if (sev_guest(vcpu->kvm))
sev_vcpu_after_set_cpuid(svm);
@@ -4669,7 +4674,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
* responsible for ensuring nested SVM and SMIs are mutually exclusive.
*/
- if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
return 1;
smram->smram64.svm_guest_flag = 1;
@@ -4716,14 +4721,14 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
const struct kvm_smram_state_64 *smram64 = &smram->smram64;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
return 0;
/* Non-zero if SMI arrived while vCPU was in guest mode. */
if (!smram64->svm_guest_flag)
return 0;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SVM))
return 1;
if (!(smram64->efer & EFER_SVME))
@@ -4786,9 +4791,15 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len)
{
+ struct vcpu_svm *svm = to_svm(vcpu);
bool smep, smap, is_user;
u64 error_code;
+ /* Check that emulation is possible during event vectoring */
+ if ((svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK) &&
+ !kvm_can_emulate_event_vectoring(emul_type))
+ return X86EMUL_UNHANDLEABLE_VECTORING;
+
/* Emulation is always possible when KVM has access to all guest state. */
if (!sev_guest(vcpu->kvm))
return X86EMUL_CONTINUE;
@@ -4885,7 +4896,7 @@ static int svm_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
* In addition, don't apply the erratum workaround if the #NPF occurred
* while translating guest page tables (see below).
*/
- error_code = to_svm(vcpu)->vmcb->control.exit_info_1;
+ error_code = svm->vmcb->control.exit_info_1;
if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
goto resume_guest;
@@ -5073,6 +5084,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
.get_exit_info = svm_get_exit_info,
+ .get_entry_info = svm_get_entry_info,
.vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 2f5d8b105eb0..9d7cdb8fbf87 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -495,7 +495,7 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
{
- return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
+ return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VGIF) &&
(svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
}
@@ -547,7 +547,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
{
- return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
+ return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VNMI) &&
(svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index d3aeffd6ae75..0b844cb97978 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -22,15 +22,22 @@ TRACE_EVENT(kvm_entry,
__field( unsigned int, vcpu_id )
__field( unsigned long, rip )
__field( bool, immediate_exit )
+ __field( u32, intr_info )
+ __field( u32, error_code )
),
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->rip = kvm_rip_read(vcpu);
__entry->immediate_exit = force_immediate_exit;
+
+ kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info,
+ &__entry->error_code);
),
- TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip,
+ TP_printk("vcpu %u, rip 0x%lx intr_info 0x%08x error_code 0x%08x%s",
+ __entry->vcpu_id, __entry->rip,
+ __entry->intr_info, __entry->error_code,
__entry->immediate_exit ? "[immediate exit]" : "")
);
@@ -308,12 +315,14 @@ TRACE_EVENT(name, \
__field( u32, intr_info ) \
__field( u32, error_code ) \
__field( unsigned int, vcpu_id ) \
+ __field( u64, requests ) \
), \
\
TP_fast_assign( \
__entry->guest_rip = kvm_rip_read(vcpu); \
__entry->isa = isa; \
__entry->vcpu_id = vcpu->vcpu_id; \
+ __entry->requests = READ_ONCE(vcpu->requests); \
kvm_x86_call(get_exit_info)(vcpu, \
&__entry->exit_reason, \
&__entry->info1, \
@@ -323,11 +332,13 @@ TRACE_EVENT(name, \
), \
\
TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \
- "info2 0x%016llx intr_info 0x%08x error_code 0x%08x", \
+ "info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \
+ "requests 0x%016llx", \
__entry->vcpu_id, \
kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \
__entry->guest_rip, __entry->info1, __entry->info2, \
- __entry->intr_info, __entry->error_code) \
+ __entry->intr_info, __entry->error_code, \
+ __entry->requests) \
)
/*
diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h
index a87407412615..11a339009781 100644
--- a/arch/x86/kvm/vmx/hyperv.h
+++ b/arch/x86/kvm/vmx/hyperv.h
@@ -42,7 +42,7 @@ static inline struct hv_enlightened_vmcs *nested_vmx_evmcs(struct vcpu_vmx *vmx)
return vmx->nested.hv_evmcs;
}
-static inline bool guest_cpuid_has_evmcs(struct kvm_vcpu *vcpu)
+static inline bool guest_cpu_cap_has_evmcs(struct kvm_vcpu *vcpu)
{
/*
* eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 28360c350454..2427f918e763 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -110,6 +110,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.get_mt_mask = vmx_get_mt_mask,
.get_exit_info = vmx_get_exit_info,
+ .get_entry_info = vmx_get_entry_info,
.vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a0ea5317cec8..8a7af02d466e 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -257,7 +257,7 @@ static bool nested_evmcs_handle_vmclear(struct kvm_vcpu *vcpu, gpa_t vmptr)
* state. It is possible that the area will stay mapped as
* vmx->nested.hv_evmcs but this shouldn't be a problem.
*/
- if (!guest_cpuid_has_evmcs(vcpu) ||
+ if (!guest_cpu_cap_has_evmcs(vcpu) ||
!evmptr_is_valid(nested_get_evmptr(vcpu)))
return false;
@@ -2089,7 +2089,7 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
bool evmcs_gpa_changed = false;
u64 evmcs_gpa;
- if (likely(!guest_cpuid_has_evmcs(vcpu)))
+ if (likely(!guest_cpu_cap_has_evmcs(vcpu)))
return EVMPTRLD_DISABLED;
evmcs_gpa = nested_get_evmptr(vcpu);
@@ -2992,7 +2992,7 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
return -EINVAL;
#ifdef CONFIG_KVM_HYPERV
- if (guest_cpuid_has_evmcs(vcpu))
+ if (guest_cpu_cap_has_evmcs(vcpu))
return nested_evmcs_check_controls(vmcs12);
#endif
@@ -3287,7 +3287,7 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
* L2 was running), map it here to make sure vmcs12 changes are
* properly reflected.
*/
- if (guest_cpuid_has_evmcs(vcpu) &&
+ if (guest_cpu_cap_has_evmcs(vcpu) &&
vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
enum nested_evmptrld_status evmptrld_status =
nested_vmx_handle_enlightened_vmptrld(vcpu, false);
@@ -5026,7 +5026,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
* doesn't isolate different VMCSs, i.e. in this case, doesn't provide
* separate modes for L2 vs L1.
*/
- if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL))
indirect_branch_prediction_barrier();
/* Update any VMCS fields that might have changed while L2 ran */
@@ -6295,7 +6295,7 @@ static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
{
u32 encls_leaf;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) ||
!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
return false;
@@ -6633,7 +6633,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu);
- if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
@@ -6774,7 +6774,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
return -EINVAL;
} else {
- if (!guest_can_use(vcpu, X86_FEATURE_VMX))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
return -EINVAL;
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
@@ -6808,7 +6808,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
- (!guest_can_use(vcpu, X86_FEATURE_VMX) ||
+ (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX) ||
!vmx->nested.enlightened_vmcs_enabled))
return -EINVAL;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 9c9d4a336166..77012b2eca0e 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -110,7 +110,7 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu)
{
- if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
return 0;
return vcpu->arch.perf_capabilities;
@@ -160,7 +160,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
break;
case MSR_IA32_DS_AREA:
- ret = guest_cpuid_has(vcpu, X86_FEATURE_DS);
+ ret = guest_cpu_cap_has(vcpu, X86_FEATURE_DS);
break;
case MSR_PEBS_DATA_CFG:
perf_capabilities = vcpu_get_perf_capabilities(vcpu);
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index b352a3ba7354..9961e07cf071 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -122,7 +122,7 @@ static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
* likely than a bad userspace address.
*/
if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
- guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
+ guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2)) {
memset(&ex, 0, sizeof(ex));
ex.vector = PF_VECTOR;
ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
@@ -365,7 +365,7 @@ static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
return true;
if (leaf >= EAUG && leaf <= EMODT)
- return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
+ return guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2);
return false;
}
@@ -381,8 +381,8 @@ int handle_encls(struct kvm_vcpu *vcpu)
{
u32 leaf = (u32)kvm_rax_read(vcpu);
- if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
- !guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
+ if (!enable_sgx || !guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) ||
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_SGX1)) {
kvm_queue_exception(vcpu, UD_VECTOR);
} else if (!encls_leaf_enabled_in_guest(vcpu, leaf) ||
!sgx_enabled_in_guest_bios(vcpu) || !is_paging(vcpu)) {
@@ -479,15 +479,15 @@ void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (!cpu_has_vmx_encls_vmexit())
return;
- if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX) &&
sgx_enabled_in_guest_bios(vcpu)) {
- if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX1)) {
bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
if (sgx_intercept_encls_ecreate(vcpu))
bitmap |= (1 << ECREATE);
}
- if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX2))
bitmap &= ~GENMASK_ULL(EMODT, EAUG);
/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 72435b1769f6..f72835e85b6d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1706,6 +1706,12 @@ int vmx_check_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
kvm_queue_exception(vcpu, UD_VECTOR);
return X86EMUL_PROPAGATE_FAULT;
}
+
+ /* Check that emulation is possible during event vectoring */
+ if ((to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ !kvm_can_emulate_event_vectoring(emul_type))
+ return X86EMUL_UNHANDLEABLE_VECTORING;
+
return X86EMUL_CONTINUE;
}
@@ -1909,8 +1915,8 @@ static void vmx_setup_uret_msrs(struct vcpu_vmx *vmx)
vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
- guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
- guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
+ guest_cpu_cap_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
+ guest_cpu_cap_has(&vmx->vcpu, X86_FEATURE_RDPID));
/*
* hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new
@@ -2063,7 +2069,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_MPX)))
return 1;
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
@@ -2079,13 +2085,13 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_SGX_LC))
return 1;
msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
break;
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
- if (!guest_can_use(vcpu, X86_FEATURE_VMX))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
return 1;
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data))
@@ -2098,7 +2104,7 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* sanity checking and refuse to boot. Filter all unsupported
* features out.
*/
- if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu))
+ if (!msr_info->host_initiated && guest_cpu_cap_has_evmcs(vcpu))
nested_evmcs_filter_control_msr(vcpu, msr_info->index,
&msr_info->data);
#endif
@@ -2168,7 +2174,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
u64 data)
{
#ifdef CONFIG_X86_64
- if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
return (u32)data;
#endif
return (unsigned long)data;
@@ -2179,7 +2185,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
u64 debugctl = 0;
if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) &&
- (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
+ (host_initiated || guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) &&
@@ -2283,7 +2289,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_MPX)))
return 1;
if (is_noncanonical_msr_address(data & PAGE_MASK, vcpu) ||
(data & MSR_IA32_BNDCFGS_RSVD))
@@ -2385,7 +2391,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* behavior, but it's close enough.
*/
if (!msr_info->host_initiated &&
- (!guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC) ||
+ (!guest_cpu_cap_has(vcpu, X86_FEATURE_SGX_LC) ||
((vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED) &&
!(vmx->msr_ia32_feature_control & FEAT_CTL_SGX_LC_ENABLED))))
return 1;
@@ -2395,7 +2401,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!msr_info->host_initiated)
return 1; /* they are read-only */
- if (!guest_can_use(vcpu, X86_FEATURE_VMX))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_RTIT_CTL:
@@ -2469,9 +2475,9 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((data & PERF_CAP_PEBS_MASK) !=
(kvm_caps.supported_perf_cap & PERF_CAP_PEBS_MASK))
return 1;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_DS))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_DS))
return 1;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_DTES64))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_DTES64))
return 1;
if (!cpuid_model_is_consistent(vcpu))
return 1;
@@ -4591,10 +4597,7 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
bool __enabled; \
\
if (cpu_has_vmx_##name()) { \
- if (kvm_is_governed_feature(X86_FEATURE_##feat_name)) \
- __enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name); \
- else \
- __enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name); \
+ __enabled = guest_cpu_cap_has(__vcpu, X86_FEATURE_##feat_name); \
vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\
__enabled, exiting); \
} \
@@ -4670,8 +4673,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
*/
if (cpu_has_vmx_rdtscp()) {
bool rdpid_or_rdtscp_enabled =
- guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
- guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
+ guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) ||
+ guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID);
vmx_adjust_secondary_exec_control(vmx, &exec_control,
SECONDARY_EXEC_ENABLE_RDTSCP,
@@ -5960,7 +5963,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
} operand;
int gpr_index;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_INVPCID)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -6192,6 +6195,15 @@ void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
}
}
+void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code)
+{
+ *intr_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+ if (is_exception_with_error_code(*intr_info))
+ *error_code = vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
+ else
+ *error_code = 0;
+}
+
static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
{
if (vmx->pml_pg) {
@@ -6552,33 +6564,15 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
return 0;
}
- /*
- * Note:
- * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
- * delivery event since it indicates guest is accessing MMIO.
- * The vm-exit can be triggered again after return to guest that
- * will cause infinite loop.
- */
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason.basic != EXIT_REASON_EXCEPTION_NMI &&
exit_reason.basic != EXIT_REASON_EPT_VIOLATION &&
exit_reason.basic != EXIT_REASON_PML_FULL &&
exit_reason.basic != EXIT_REASON_APIC_ACCESS &&
exit_reason.basic != EXIT_REASON_TASK_SWITCH &&
- exit_reason.basic != EXIT_REASON_NOTIFY)) {
- int ndata = 3;
-
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
- vcpu->run->internal.data[0] = vectoring_info;
- vcpu->run->internal.data[1] = exit_reason.full;
- vcpu->run->internal.data[2] = vmx_get_exit_qual(vcpu);
- if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) {
- vcpu->run->internal.data[ndata++] =
- vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- }
- vcpu->run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu;
- vcpu->run->internal.ndata = ndata;
+ exit_reason.basic != EXIT_REASON_NOTIFY &&
+ exit_reason.basic != EXIT_REASON_EPT_MISCONFIG)) {
+ kvm_prepare_event_vectoring_exit(vcpu, INVALID_GPA);
return 0;
}
@@ -7844,12 +7838,8 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* to the guest. XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
* set if and only if XSAVE is supported.
*/
- if (boot_cpu_has(X86_FEATURE_XSAVE) &&
- guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
-
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
- kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LAM);
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE))
+ guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES);
vmx_setup_uret_msrs(vmx);
@@ -7857,7 +7847,7 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmcs_set_secondary_exec_control(vmx,
vmx_secondary_exec_control(vmx));
- if (guest_can_use(vcpu, X86_FEATURE_VMX))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
vmx->msr_ia32_feature_control_valid_bits |=
FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
@@ -7866,25 +7856,25 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
- if (guest_can_use(vcpu, X86_FEATURE_VMX))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_VMX))
nested_vmx_cr_fixed1_bits_update(vcpu);
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
- guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
+ guest_cpu_cap_has(vcpu, X86_FEATURE_INTEL_PT))
update_intel_pt_cfg(vcpu);
if (boot_cpu_has(X86_FEATURE_RTM)) {
struct vmx_uret_msr *msr;
msr = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
if (msr) {
- bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
+ bool enabled = guest_cpu_cap_has(vcpu, X86_FEATURE_RTM);
vmx_set_guest_uret_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
}
}
if (kvm_cpu_cap_has(X86_FEATURE_XFD))
vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
- !guest_cpuid_has(vcpu, X86_FEATURE_XFD));
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD));
if (boot_cpu_has(X86_FEATURE_IBPB))
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
@@ -7892,17 +7882,17 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
- !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
set_cr4_guest_host_mask(vmx);
vmx_write_encls_bitmap(vcpu, NULL);
- if (guest_cpuid_has(vcpu, X86_FEATURE_SGX))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX))
vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_SGX_ENABLED;
else
vmx->msr_ia32_feature_control_valid_bits &= ~FEAT_CTL_SGX_ENABLED;
- if (guest_cpuid_has(vcpu, X86_FEATURE_SGX_LC))
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_SGX_LC))
vmx->msr_ia32_feature_control_valid_bits |=
FEAT_CTL_SGX_LC_ENABLED;
else
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 600c3bc89213..ce3295a67c04 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -103,8 +103,11 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr);
u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+
void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
+void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code);
+
u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3af0f58f6f06..b1beb59af6cc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -119,8 +119,6 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#endif
-static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
-
#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
#define KVM_CAP_PMU_VALID_MASK KVM_PMU_CAP_DISABLE
@@ -1179,7 +1177,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
if (vcpu->arch.xcr0 != kvm_host.xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
- if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != kvm_host.xss)
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
@@ -1188,7 +1186,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
vcpu->arch.pkru != vcpu->arch.host_pkru &&
((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
- write_pkru(vcpu->arch.pkru);
+ wrpkru(vcpu->arch.pkru);
}
EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
@@ -1202,7 +1200,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) {
vcpu->arch.pkru = rdpkru();
if (vcpu->arch.pkru != vcpu->arch.host_pkru)
- write_pkru(vcpu->arch.host_pkru);
+ wrpkru(vcpu->arch.host_pkru);
}
if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
@@ -1210,7 +1208,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
if (vcpu->arch.xcr0 != kvm_host.xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, kvm_host.xcr0);
- if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != kvm_host.xss)
wrmsrl(MSR_IA32_XSS, kvm_host.xss);
}
@@ -1283,18 +1281,6 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
-bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
-{
- if (cr4 & cr4_reserved_bits)
- return false;
-
- if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
- return false;
-
- return true;
-}
-EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4);
-
static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
return __kvm_is_valid_cr4(vcpu, cr4) &&
@@ -1516,10 +1502,10 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
{
u64 fixed = DR6_FIXED_1;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_RTM))
fixed |= DR6_RTM;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
fixed |= DR6_BUS_LOCK;
return fixed;
}
@@ -1695,20 +1681,20 @@ static int do_get_feature_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
- if (efer & EFER_AUTOIBRS && !guest_cpuid_has(vcpu, X86_FEATURE_AUTOIBRS))
+ if (efer & EFER_AUTOIBRS && !guest_cpu_cap_has(vcpu, X86_FEATURE_AUTOIBRS))
return false;
- if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
+ if (efer & EFER_FFXSR && !guest_cpu_cap_has(vcpu, X86_FEATURE_FXSR_OPT))
return false;
- if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
+ if (efer & EFER_SVME && !guest_cpu_cap_has(vcpu, X86_FEATURE_SVM))
return false;
if (efer & (EFER_LME | EFER_LMA) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_LM))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
return false;
- if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
+ if (efer & EFER_NX && !guest_cpu_cap_has(vcpu, X86_FEATURE_NX))
return false;
return true;
@@ -1850,8 +1836,8 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
return 1;
if (!host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID))
return 1;
/*
@@ -1908,8 +1894,8 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
return 1;
if (!host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_RDTSCP) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID))
return 1;
break;
}
@@ -2095,7 +2081,7 @@ EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
{
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_MWAIT))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_MWAIT))
return kvm_handle_invalid_op(vcpu);
pr_warn_once("%s instruction emulated as NOP!\n", insn);
@@ -3767,13 +3753,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_ARCH_CAPABILITIES:
if (!msr_info->host_initiated ||
- !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
return KVM_MSR_RET_UNSUPPORTED;
vcpu->arch.arch_capabilities = data;
break;
case MSR_IA32_PERF_CAPABILITIES:
if (!msr_info->host_initiated ||
- !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
return KVM_MSR_RET_UNSUPPORTED;
if (data & ~kvm_caps.supported_perf_cap)
@@ -3797,11 +3783,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((!guest_has_pred_cmd_msr(vcpu)))
return 1;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
- !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBPB))
reserved_bits |= PRED_CMD_IBPB;
- if (!guest_cpuid_has(vcpu, X86_FEATURE_SBPB))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SBPB))
reserved_bits |= PRED_CMD_SBPB;
}
@@ -3822,7 +3808,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
case MSR_IA32_FLUSH_CMD:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D))
return 1;
if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D) || (data & ~L1D_FLUSH))
@@ -3873,7 +3859,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_set_lapic_tscdeadline_msr(vcpu, data);
break;
case MSR_IA32_TSC_ADJUST:
- if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
if (!msr_info->host_initiated) {
s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
adjust_tsc_offset_guest(vcpu, adj);
@@ -3900,7 +3886,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
((old_val ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
- if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
kvm_update_cpuid_runtime(vcpu);
@@ -4077,12 +4063,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_pr_unimpl_wrmsr(vcpu, msr, data);
break;
case MSR_AMD64_OSVW_ID_LENGTH:
- if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW))
return 1;
vcpu->arch.osvw.length = data;
break;
case MSR_AMD64_OSVW_STATUS:
- if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW))
return 1;
vcpu->arch.osvw.status = data;
break;
@@ -4101,7 +4087,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
#ifdef CONFIG_X86_64
case MSR_IA32_XFD:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD))
return 1;
if (data & ~kvm_guest_supported_xfd(vcpu))
@@ -4111,7 +4097,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_XFD_ERR:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD))
return 1;
if (data & ~kvm_guest_supported_xfd(vcpu))
@@ -4226,12 +4212,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.microcode_version;
break;
case MSR_IA32_ARCH_CAPABILITIES:
- if (!guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
return KVM_MSR_RET_UNSUPPORTED;
msr_info->data = vcpu->arch.arch_capabilities;
break;
case MSR_IA32_PERF_CAPABILITIES:
- if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_PDCM))
return KVM_MSR_RET_UNSUPPORTED;
msr_info->data = vcpu->arch.perf_capabilities;
break;
@@ -4432,12 +4418,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0xbe702111;
break;
case MSR_AMD64_OSVW_ID_LENGTH:
- if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW))
return 1;
msr_info->data = vcpu->arch.osvw.length;
break;
case MSR_AMD64_OSVW_STATUS:
- if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
+ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_OSVW))
return 1;
msr_info->data = vcpu->arch.osvw.status;
break;
@@ -4456,14 +4442,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
#ifdef CONFIG_X86_64
case MSR_IA32_XFD:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD))
return 1;
msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd;
break;
case MSR_IA32_XFD_ERR:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD))
return 1;
msr_info->data = vcpu->arch.guest_fpu.xfd_err;
@@ -4545,6 +4531,20 @@ static inline bool kvm_can_mwait_in_guest(void)
boot_cpu_has(X86_FEATURE_ARAT);
}
+static u64 kvm_get_allowed_disable_exits(void)
+{
+ u64 r = KVM_X86_DISABLE_EXITS_PAUSE;
+
+ if (!mitigate_smt_rsb) {
+ r |= KVM_X86_DISABLE_EXITS_HLT |
+ KVM_X86_DISABLE_EXITS_CSTATE;
+
+ if (kvm_can_mwait_in_guest())
+ r |= KVM_X86_DISABLE_EXITS_MWAIT;
+ }
+ return r;
+}
+
#ifdef CONFIG_KVM_HYPERV
static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 __user *cpuid_arg)
@@ -4687,15 +4687,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_CLOCK_VALID_FLAGS;
break;
case KVM_CAP_X86_DISABLE_EXITS:
- r = KVM_X86_DISABLE_EXITS_PAUSE;
-
- if (!mitigate_smt_rsb) {
- r |= KVM_X86_DISABLE_EXITS_HLT |
- KVM_X86_DISABLE_EXITS_CSTATE;
-
- if (kvm_can_mwait_in_guest())
- r |= KVM_X86_DISABLE_EXITS_MWAIT;
- }
+ r = kvm_get_allowed_disable_exits();
break;
case KVM_CAP_X86_SMM:
if (!IS_ENABLED(CONFIG_KVM_SMM))
@@ -5822,9 +5814,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
vcpu->arch.pv_cpuid.enforce = cap->args[0];
- if (vcpu->arch.pv_cpuid.enforce)
- kvm_update_pv_runtime(vcpu);
-
return 0;
default:
return -EINVAL;
@@ -6542,30 +6531,32 @@ split_irqchip_unlock:
break;
case KVM_CAP_X86_DISABLE_EXITS:
r = -EINVAL;
- if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
+ if (cap->args[0] & ~kvm_get_allowed_disable_exits())
break;
- if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
- kvm->arch.pause_in_guest = true;
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus)
+ goto disable_exits_unlock;
#define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \
"KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests."
- if (!mitigate_smt_rsb) {
- if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() &&
- (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
- pr_warn_once(SMT_RSB_MSG);
-
- if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
- kvm_can_mwait_in_guest())
- kvm->arch.mwait_in_guest = true;
- if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
- kvm->arch.hlt_in_guest = true;
- if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
- kvm->arch.cstate_in_guest = true;
- }
+ if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) &&
+ cpu_smt_possible() &&
+ (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE))
+ pr_warn_once(SMT_RSB_MSG);
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
+ kvm->arch.pause_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT)
+ kvm->arch.mwait_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
+ kvm->arch.hlt_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
+ kvm->arch.cstate_in_guest = true;
r = 0;
+disable_exits_unlock:
+ mutex_unlock(&kvm->lock);
break;
case KVM_CAP_MSR_PLATFORM_INFO:
kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
@@ -8511,17 +8502,17 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
{
- return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
+ return guest_cpu_cap_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
}
static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
{
- return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
+ return guest_cpu_cap_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
}
static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt)
{
- return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
+ return guest_cpu_cap_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
}
static bool emulator_guest_cpuid_is_intel_compatible(struct x86_emulate_ctxt *ctxt)
@@ -8813,6 +8804,28 @@ void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
+void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+ u32 reason, intr_info, error_code;
+ struct kvm_run *run = vcpu->run;
+ u64 info1, info2;
+ int ndata = 0;
+
+ kvm_x86_call(get_exit_info)(vcpu, &reason, &info1, &info2,
+ &intr_info, &error_code);
+
+ run->internal.data[ndata++] = info2;
+ run->internal.data[ndata++] = reason;
+ run->internal.data[ndata++] = info1;
+ run->internal.data[ndata++] = gpa;
+ run->internal.data[ndata++] = vcpu->arch.last_vmentry_cpu;
+
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
+ run->internal.ndata = ndata;
+}
+EXPORT_SYMBOL_GPL(kvm_prepare_event_vectoring_exit);
+
static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
{
struct kvm *kvm = vcpu->kvm;
@@ -9085,6 +9098,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (r == X86EMUL_RETRY_INSTR || r == X86EMUL_PROPAGATE_FAULT)
return 1;
+ if (kvm_unprotect_and_retry_on_failure(vcpu, cr2_or_gpa,
+ emulation_type))
+ return 1;
+
+ if (r == X86EMUL_UNHANDLEABLE_VECTORING) {
+ kvm_prepare_event_vectoring_exit(vcpu, cr2_or_gpa);
+ return 0;
+ }
+
WARN_ON_ONCE(r != X86EMUL_UNHANDLEABLE);
return handle_emulation_failure(vcpu, emulation_type);
}
@@ -9773,10 +9795,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;
-#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
- cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
-#undef __kvm_cpu_cap_has
-
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
@@ -12276,9 +12294,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto free_emulate_ctxt;
}
- vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
- vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
-
kvm_async_pf_hash_reset(vcpu);
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
@@ -12301,6 +12316,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_xen_init_vcpu(vcpu);
vcpu_load(vcpu);
+ kvm_vcpu_after_set_cpuid(vcpu);
kvm_set_tsc_khz(vcpu, vcpu->kvm->arch.default_tsc_khz);
kvm_vcpu_reset(vcpu, false);
kvm_init_mmu(vcpu);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index ec623d23d13d..7a87c5fc57f1 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -550,7 +550,6 @@ static inline void kvm_machine_check(void)
void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
int kvm_spec_ctrl_test_value(u64 value);
-bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
struct x86_exception *e);
int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
@@ -577,6 +576,11 @@ enum kvm_msr_access {
#define KVM_MSR_RET_UNSUPPORTED 2
#define KVM_MSR_RET_FILTERED 3
+static inline bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+ return !(cr4 & vcpu->arch.cr4_guest_rsvd_bits);
+}
+
#define __cr4_reserved_bits(__cpu_has, __c) \
({ \
u64 __reserved_bits = CR4_RESERVED_BITS; \
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 343de0a51797..45e6d8fca9b9 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -617,10 +617,6 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
-#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
- KVM_X86_DISABLE_EXITS_HLT | \
- KVM_X86_DISABLE_EXITS_PAUSE | \
- KVM_X86_DISABLE_EXITS_CSTATE)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 9ec984cf8674..d60da8966772 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -27,6 +27,8 @@ extern uint64_t guest_tsc_khz;
#define MAX_NR_CPUID_ENTRIES 100
#endif
+#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull
+
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
@@ -569,6 +571,11 @@ static inline void set_cr4(uint64_t val)
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
+static inline void set_idt(const struct desc_ptr *idt_desc)
+{
+ __asm__ __volatile__("lidt %0"::"m"(*idt_desc));
+}
+
static inline u64 xgetbv(u32 index)
{
u32 eax, edx;
@@ -1010,10 +1017,19 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
+static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
+{
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
uint32_t function,
uint32_t index)
{
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)");
+
+ vcpu_get_cpuid(vcpu);
+
return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
function, index);
}
@@ -1034,7 +1050,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
return r;
/* On success, refresh the cache to pick up adjustments made by KVM. */
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+ vcpu_get_cpuid(vcpu);
return 0;
}
@@ -1044,12 +1060,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
/* Refresh the cache to pick up adjustments made by KVM. */
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-}
-
-static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
-{
- vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+ vcpu_get_cpuid(vcpu);
}
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 86ee3385e860..bc440d5aba57 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -235,7 +235,7 @@ static void guest_code_delete_memory_region(void)
* in the guest will never succeed, and so isn't an option.
*/
memset(&idt, 0, sizeof(idt));
- __asm__ __volatile__("lidt %0" :: "m"(idt));
+ set_idt(&idt);
GUEST_SYNC(0);
@@ -553,6 +553,56 @@ static void test_add_overlapping_private_memory_regions(void)
close(memfd);
kvm_vm_free(vm);
}
+
+static void guest_code_mmio_during_vectoring(void)
+{
+ const struct desc_ptr idt_desc = {
+ .address = MEM_REGION_GPA,
+ .size = 0xFFF,
+ };
+
+ set_idt(&idt_desc);
+
+ /* Generate a #GP by dereferencing a non-canonical address */
+ *((uint8_t *)NONCANONICAL) = 0x1;
+
+ GUEST_ASSERT(0);
+}
+
+/*
+ * This test points the IDT descriptor base to an MMIO address. It should cause
+ * a KVM internal error when an event occurs in the guest.
+ */
+static void test_mmio_during_vectoring(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ u64 expected_gpa;
+
+ pr_info("Testing MMIO during vectoring error handling\n");
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring);
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1);
+
+ run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV,
+ "Unexpected suberror = %d", vcpu->run->internal.suberror);
+ TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d",
+ run->internal.ndata);
+
+ /* The reported GPA should be IDT base + offset of the GP vector */
+ expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry);
+
+ TEST_ASSERT(run->internal.data[3] == expected_gpa,
+ "Unexpected GPA = %llx (expected %lx)",
+ vcpu->run->internal.data[3], expected_gpa);
+
+ kvm_vm_free(vm);
+}
#endif
int main(int argc, char *argv[])
@@ -568,6 +618,7 @@ int main(int argc, char *argv[])
* KVM_RUN fails with ENOEXEC or EFAULT.
*/
test_zero_memory_regions();
+ test_mmio_during_vectoring();
#endif
test_invalid_memory_region_flags();
diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
index 78878b3a2725..1b805cbdb47b 100644
--- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
@@ -139,10 +139,12 @@ static void test_pv_unhalt(void)
struct kvm_vm *vm;
struct kvm_cpuid_entry2 *ent;
u32 kvm_sig_old;
+ int r;
- pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+ if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT))
+ return;
- TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+ pr_info("testing KVM_FEATURE_PV_UNHALT\n");
/* KVM_PV_UNHALT test */
vm = vm_create_with_one_vcpu(&vcpu, guest_main);
@@ -151,19 +153,45 @@ static void test_pv_unhalt(void)
TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
"Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
- /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+ /* Verify KVM disallows disabling exits after vCPU creation. */
+ r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ TEST_ASSERT(r && errno == EINVAL,
+ "Disabling exits after vCPU creation didn't fail as expected");
+
+ kvm_vm_free(vm);
+
+ /* Verify that KVM clear PV_UNHALT from guest CPUID. */
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+
+ vcpu = vm_vcpu_add(vm, 0, NULL);
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "vCPU created with PV_UNHALT set by default");
+
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
+
+ /*
+ * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT
+ * when KVM PV is not present, and DOES clear PV_UNHALT when switching
+ * back to the correct signature..
+ */
ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
kvm_sig_old = ent->ebx;
ent->ebx = 0xdeadbeef;
vcpu_set_cpuid(vcpu);
- vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+ TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "PV_UNHALT cleared when using bogus KVM PV signature");
+
ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
ent->ebx = kvm_sig_old;
vcpu_set_cpuid(vcpu);
TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
- "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+ "PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
/* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c
index c021c0795a96..f4095a3d1278 100644
--- a/tools/testing/selftests/kvm/x86/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c
@@ -41,13 +41,15 @@ do { \
TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
} while (0)
+#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
+ X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
+ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
+ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+
static uint64_t calc_supported_cr4_feature_bits(void)
{
- uint64_t cr4;
+ uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4;
- cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
- X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
- X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
if (kvm_cpu_has(X86_FEATURE_UMIP))
cr4 |= X86_CR4_UMIP;
if (kvm_cpu_has(X86_FEATURE_LA57))
@@ -72,36 +74,31 @@ static uint64_t calc_supported_cr4_feature_bits(void)
return cr4;
}
-int main(int argc, char *argv[])
+static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4)
{
struct kvm_sregs sregs;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t cr4;
int rc, i;
- /*
- * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
- * use it to verify all supported CR4 bits can be set prior to defining
- * the vCPU model, i.e. without doing KVM_SET_CPUID2.
- */
- vm = vm_create_barebones();
- vcpu = __vm_vcpu_add(vm, 0);
-
vcpu_sregs_get(vcpu, &sregs);
-
- sregs.cr0 = 0;
- sregs.cr4 |= calc_supported_cr4_feature_bits();
- cr4 = sregs.cr4;
-
+ sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+ sregs.cr4 |= cr4;
rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+ TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) ==
+ (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)),
+ "KVM didn't %s OSXSAVE in CPUID as expected",
+ (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear");
+
+ TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) ==
+ (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)),
+ "KVM didn't %s OSPKE in CPUID as expected",
+ (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear");
+
vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
sregs.cr4, cr4);
- /* Verify all unsupported features are rejected by KVM. */
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
@@ -119,10 +116,28 @@ int main(int argc, char *argv[])
/* NW without CD is illegal, as is PG without PE. */
TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+ /*
+ * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+ * use it to verify KVM enforces guest CPUID even if *userspace* never
+ * sets CPUID.
+ */
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
+ test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4);
kvm_vm_free(vm);
- /* Create a "real" VM and verify APIC_BASE can be set. */
+ /* Create a "real" VM with a fully populated guest CPUID and verify
+ * APIC_BASE and all supported CR4 can be set.
+ */
vm = vm_create_with_one_vcpu(&vcpu, NULL);
vcpu_sregs_get(vcpu, &sregs);
@@ -135,6 +150,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
sregs.apic_base);
+ test_cr_bits(vcpu, calc_supported_cr4_feature_bits());
+
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index ae77698e6e97..a1a688e75266 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -155,7 +155,7 @@ static void guest_shutdown_code(void)
/* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
memset(&idt, 0, sizeof(idt));
- __asm__ __volatile__("lidt %0" :: "m"(idt));
+ set_idt(&idt);
__asm__ __volatile__("ud2");
}