1 files changed, 7 insertions, 93 deletions
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c6a82f9f537f..b3d9b0d7a37d 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -75,7 +75,6 @@ struct kvm_task_sleep_node {
 	struct swait_queue_head wq;
 	u32 token;
 	int cpu;
-	bool use_halt;
 };
 
 static struct kvm_task_sleep_head {
@@ -98,8 +97,7 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
 	return NULL;
 }
 
-static bool kvm_async_pf_queue_task(u32 token, bool use_halt,
-				    struct kvm_task_sleep_node *n)
+static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
 {
 	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
 	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
@@ -117,7 +115,6 @@ static bool kvm_async_pf_queue_task(u32 token, bool use_halt,
 
 	n->token = token;
 	n->cpu = smp_processor_id();
-	n->use_halt = use_halt;
 	init_swait_queue_head(&n->wq);
 	hlist_add_head(&n->link, &b->list);
 	raw_spin_unlock(&b->lock);
@@ -138,7 +135,7 @@ void kvm_async_pf_task_wait_schedule(u32 token)
 
 	lockdep_assert_irqs_disabled();
 
-	if (!kvm_async_pf_queue_task(token, false, &n))
+	if (!kvm_async_pf_queue_task(token, &n))
 		return;
 
 	for (;;) {
@@ -154,91 +151,10 @@ void kvm_async_pf_task_wait_schedule(u32 token)
 }
 EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
 
-/*
- * Invoked from the async page fault handler.
- */
-static void kvm_async_pf_task_wait_halt(u32 token)
-{
-	struct kvm_task_sleep_node n;
-
-	if (!kvm_async_pf_queue_task(token, true, &n))
-		return;
-
-	for (;;) {
-		if (hlist_unhashed(&n.link))
-			break;
-		/*
-		 * No point in doing anything about RCU here. Any RCU read
-		 * side critical section or RCU watching section can be
-		 * interrupted by VMEXITs and the host is free to keep the
-		 * vCPU scheduled out as long as it sees fit. This is not
-		 * any different just because of the halt induced voluntary
-		 * VMEXIT.
-		 *
-		 * Also the async page fault could have interrupted any RCU
-		 * watching context, so invoking rcu_irq_exit()/enter()
-		 * around this is not gaining anything.
-		 */
-		native_safe_halt();
-		local_irq_disable();
-	}
-}
-
-/* Invoked from the async page fault handler */
-static void kvm_async_pf_task_wait(u32 token, bool usermode)
-{
-	bool can_schedule;
-
-	/*
-	 * No need to check whether interrupts were disabled because the
-	 * host will (hopefully) only inject an async page fault into
-	 * interrupt enabled regions.
-	 *
-	 * If CONFIG_PREEMPTION is enabled then check whether the code
-	 * which triggered the page fault is preemptible. This covers user
-	 * mode as well because preempt_count() is obviously 0 there.
-	 *
-	 * The check for rcu_preempt_depth() is also required because
-	 * voluntary scheduling inside a rcu read locked section is not
-	 * allowed.
-	 *
-	 * The idle task is already covered by this because idle always
-	 * has a preempt count > 0.
-	 *
-	 * If CONFIG_PREEMPTION is disabled only allow scheduling when
-	 * coming from user mode as there is no indication whether the
-	 * context which triggered the page fault could schedule or not.
-	 */
-	if (IS_ENABLED(CONFIG_PREEMPTION))
-		can_schedule = preempt_count() + rcu_preempt_depth() == 0;
-	else
-		can_schedule = usermode;
-
-	/*
-	 * If the kernel context is allowed to schedule then RCU is
-	 * watching because no preemptible code in the kernel is inside RCU
-	 * idle state. So it can be treated like user mode. User mode is
-	 * safe because the #PF entry invoked enter_from_user_mode().
-	 *
-	 * For the non schedulable case invoke rcu_irq_enter() for
-	 * now. This will be moved out to the pagefault entry code later
-	 * and only invoked when really needed.
-	 */
-	if (can_schedule) {
-		kvm_async_pf_task_wait_schedule(token);
-	} else {
-		rcu_irq_enter();
-		kvm_async_pf_task_wait_halt(token);
-		rcu_irq_exit();
-	}
-}
-
 static void apf_task_wake_one(struct kvm_task_sleep_node *n)
 {
 	hlist_del_init(&n->link);
-	if (n->use_halt)
-		smp_send_reschedule(n->cpu);
-	else if (swq_has_sleeper(&n->wq))
+	if (swq_has_sleeper(&n->wq))
 		swake_up_one(&n->wq);
 }
 
@@ -337,8 +253,10 @@ bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 		panic("Host injected async #PF in interrupt disabled region\n");
 
 	if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) {
-		/* page is swapped out by the host. */
-		kvm_async_pf_task_wait(token, user_mode(regs));
+		if (unlikely(!(user_mode(regs))))
+			panic("Host injected async #PF in kernel mode\n");
+		/* Page is swapped out by the host. */
+		kvm_async_pf_task_wait_schedule(token);
 	} else {
 		rcu_irq_enter();
 		kvm_async_pf_task_wake(token);
@@ -397,10 +315,6 @@ static void kvm_guest_cpu_init(void)
 		WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
 
 		pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
-
-#ifdef CONFIG_PREEMPTION
-		pa |= KVM_ASYNC_PF_SEND_ALWAYS;
-#endif
 		pa |= KVM_ASYNC_PF_ENABLED;
 
 		if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))