summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/kvm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/kvm.c')
-rw-r--r--arch/x86/kernel/kvm.c100
1 files changed, 7 insertions, 93 deletions
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c6a82f9f537f..b3d9b0d7a37d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -75,7 +75,6 @@ struct kvm_task_sleep_node {
struct swait_queue_head wq;
u32 token;
int cpu;
- bool use_halt;
};
static struct kvm_task_sleep_head {
@@ -98,8 +97,7 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
return NULL;
}
-static bool kvm_async_pf_queue_task(u32 token, bool use_halt,
- struct kvm_task_sleep_node *n)
+static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
{
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
@@ -117,7 +115,6 @@ static bool kvm_async_pf_queue_task(u32 token, bool use_halt,
n->token = token;
n->cpu = smp_processor_id();
- n->use_halt = use_halt;
init_swait_queue_head(&n->wq);
hlist_add_head(&n->link, &b->list);
raw_spin_unlock(&b->lock);
@@ -138,7 +135,7 @@ void kvm_async_pf_task_wait_schedule(u32 token)
lockdep_assert_irqs_disabled();
- if (!kvm_async_pf_queue_task(token, false, &n))
+ if (!kvm_async_pf_queue_task(token, &n))
return;
for (;;) {
@@ -154,91 +151,10 @@ void kvm_async_pf_task_wait_schedule(u32 token)
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
-/*
- * Invoked from the async page fault handler.
- */
-static void kvm_async_pf_task_wait_halt(u32 token)
-{
- struct kvm_task_sleep_node n;
-
- if (!kvm_async_pf_queue_task(token, true, &n))
- return;
-
- for (;;) {
- if (hlist_unhashed(&n.link))
- break;
- /*
- * No point in doing anything about RCU here. Any RCU read
- * side critical section or RCU watching section can be
- * interrupted by VMEXITs and the host is free to keep the
- * vCPU scheduled out as long as it sees fit. This is not
- * any different just because of the halt induced voluntary
- * VMEXIT.
- *
- * Also the async page fault could have interrupted any RCU
- * watching context, so invoking rcu_irq_exit()/enter()
- * around this is not gaining anything.
- */
- native_safe_halt();
- local_irq_disable();
- }
-}
-
-/* Invoked from the async page fault handler */
-static void kvm_async_pf_task_wait(u32 token, bool usermode)
-{
- bool can_schedule;
-
- /*
- * No need to check whether interrupts were disabled because the
- * host will (hopefully) only inject an async page fault into
- * interrupt enabled regions.
- *
- * If CONFIG_PREEMPTION is enabled then check whether the code
- * which triggered the page fault is preemptible. This covers user
- * mode as well because preempt_count() is obviously 0 there.
- *
- * The check for rcu_preempt_depth() is also required because
- * voluntary scheduling inside a rcu read locked section is not
- * allowed.
- *
- * The idle task is already covered by this because idle always
- * has a preempt count > 0.
- *
- * If CONFIG_PREEMPTION is disabled only allow scheduling when
- * coming from user mode as there is no indication whether the
- * context which triggered the page fault could schedule or not.
- */
- if (IS_ENABLED(CONFIG_PREEMPTION))
- can_schedule = preempt_count() + rcu_preempt_depth() == 0;
- else
- can_schedule = usermode;
-
- /*
- * If the kernel context is allowed to schedule then RCU is
- * watching because no preemptible code in the kernel is inside RCU
- * idle state. So it can be treated like user mode. User mode is
- * safe because the #PF entry invoked enter_from_user_mode().
- *
- * For the non schedulable case invoke rcu_irq_enter() for
- * now. This will be moved out to the pagefault entry code later
- * and only invoked when really needed.
- */
- if (can_schedule) {
- kvm_async_pf_task_wait_schedule(token);
- } else {
- rcu_irq_enter();
- kvm_async_pf_task_wait_halt(token);
- rcu_irq_exit();
- }
-}
-
static void apf_task_wake_one(struct kvm_task_sleep_node *n)
{
hlist_del_init(&n->link);
- if (n->use_halt)
- smp_send_reschedule(n->cpu);
- else if (swq_has_sleeper(&n->wq))
+ if (swq_has_sleeper(&n->wq))
swake_up_one(&n->wq);
}
@@ -337,8 +253,10 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
panic("Host injected async #PF in interrupt disabled region\n");
if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
- /* page is swapped out by the host. */
- kvm_async_pf_task_wait(token, user_mode(regs));
+ if (unlikely(!(user_mode(regs))))
+ panic("Host injected async #PF in kernel mode\n");
+ /* Page is swapped out by the host. */
+ kvm_async_pf_task_wait_schedule(token);
} else {
rcu_irq_enter();
kvm_async_pf_task_wake(token);
@@ -397,10 +315,6 @@ static void kvm_guest_cpu_init(void)
WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
-
-#ifdef CONFIG_PREEMPTION
- pa |= KVM_ASYNC_PF_SEND_ALWAYS;
-#endif
pa |= KVM_ASYNC_PF_ENABLED;
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))