summaryrefslogtreecommitdiff
path: root/arch/s390/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm/fault.c')
-rw-r--r--arch/s390/mm/fault.c80
1 files changed, 51 insertions, 29 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index cce577feab1e..661d9fe63c43 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -24,7 +24,7 @@
#include <linux/kdebug.h>
#include <linux/init.h>
#include <linux/console.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/hardirq.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
@@ -250,6 +250,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
report_user_fault(regs, SIGSEGV, 1);
si.si_signo = SIGSEGV;
+ si.si_errno = 0;
si.si_code = si_code;
si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
force_sig_info(SIGSEGV, &si, current);
@@ -417,6 +418,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
(struct gmap *) S390_lowcore.gmap : NULL;
if (gmap) {
current->thread.gmap_addr = address;
+ current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
+ current->thread.gmap_int_code = regs->int_code & 0xffff;
address = __gmap_translate(gmap, address);
if (address == -EFAULT) {
fault = VM_FAULT_BADMAP;
@@ -455,7 +458,7 @@ retry:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(mm, vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags);
/* No reason to continue if interrupted by SIGKILL. */
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
fault = VM_FAULT_SIGNAL;
@@ -623,7 +626,7 @@ void pfault_fini(void)
diag_stat_inc(DIAG_STAT_X258);
asm volatile(
" diag %0,0,0x258\n"
- "0:\n"
+ "0: nopr %%r7\n"
EX_TABLE(0b,0b)
: : "a" (&refbk), "m" (refbk) : "cc");
}
@@ -631,6 +634,29 @@ void pfault_fini(void)
static DEFINE_SPINLOCK(pfault_lock);
static LIST_HEAD(pfault_list);
+#define PF_COMPLETE 0x0080
+
+/*
+ * The mechanism of our pfault code: if Linux is running as guest, runs a user
+ * space process and the user space process accesses a page that the host has
+ * paged out we get a pfault interrupt.
+ *
+ * This allows us, within the guest, to schedule a different process. Without
+ * this mechanism the host would have to suspend the whole virtual cpu until
+ * the page has been paged in.
+ *
+ * So when we get such an interrupt then we set the state of the current task
+ * to uninterruptible and also set the need_resched flag. Both happens within
+ * interrupt context(!). If we later on want to return to user space we
+ * recognize the need_resched flag and then call schedule(). It's not very
+ * obvious how this works...
+ *
+ * Of course we have a lot of additional fun with the completion interrupt (->
+ * host signals that a page of a process has been paged in and the process can
+ * continue to run). This interrupt can arrive on any cpu and, since we have
+ * virtual cpus, actually appear before the interrupt that signals that a page
+ * is missing.
+ */
static void pfault_interrupt(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
@@ -639,10 +665,9 @@ static void pfault_interrupt(struct ext_code ext_code,
pid_t pid;
/*
- * Get the external interruption subcode & pfault
- * initial/completion signal bit. VM stores this
- * in the 'cpu address' field associated with the
- * external interrupt.
+ * Get the external interruption subcode & pfault initial/completion
+ * signal bit. VM stores this in the 'cpu address' field associated
+ * with the external interrupt.
*/
subcode = ext_code.subcode;
if ((subcode & 0xff00) != __SUBCODE_MASK)
@@ -658,7 +683,7 @@ static void pfault_interrupt(struct ext_code ext_code,
if (!tsk)
return;
spin_lock(&pfault_lock);
- if (subcode & 0x0080) {
+ if (subcode & PF_COMPLETE) {
/* signal bit is set -> a page has been swapped in by VM */
if (tsk->thread.pfault_wait == 1) {
/* Initial interrupt was faster than the completion
@@ -687,8 +712,7 @@ static void pfault_interrupt(struct ext_code ext_code,
goto out;
if (tsk->thread.pfault_wait == 1) {
/* Already on the list with a reference: put to sleep */
- __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- set_tsk_need_resched(tsk);
+ goto block;
} else if (tsk->thread.pfault_wait == -1) {
/* Completion interrupt was faster than the initial
* interrupt (pfault_wait == -1). Set pfault_wait
@@ -703,7 +727,11 @@ static void pfault_interrupt(struct ext_code ext_code,
get_task_struct(tsk);
tsk->thread.pfault_wait = 1;
list_add(&tsk->thread.list, &pfault_list);
- __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+block:
+ /* Since this must be a userspace fault, there
+ * is no kernel task state to trample. Rely on the
+ * return to userspace schedule() to block. */
+ __set_current_state(TASK_UNINTERRUPTIBLE);
set_tsk_need_resched(tsk);
}
}
@@ -712,28 +740,21 @@ out:
put_task_struct(tsk);
}
-static int pfault_cpu_notify(struct notifier_block *self, unsigned long action,
- void *hcpu)
+static int pfault_cpu_dead(unsigned int cpu)
{
struct thread_struct *thread, *next;
struct task_struct *tsk;
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DEAD:
- spin_lock_irq(&pfault_lock);
- list_for_each_entry_safe(thread, next, &pfault_list, list) {
- thread->pfault_wait = 0;
- list_del(&thread->list);
- tsk = container_of(thread, struct task_struct, thread);
- wake_up_process(tsk);
- put_task_struct(tsk);
- }
- spin_unlock_irq(&pfault_lock);
- break;
- default:
- break;
+ spin_lock_irq(&pfault_lock);
+ list_for_each_entry_safe(thread, next, &pfault_list, list) {
+ thread->pfault_wait = 0;
+ list_del(&thread->list);
+ tsk = container_of(thread, struct task_struct, thread);
+ wake_up_process(tsk);
+ put_task_struct(tsk);
}
- return NOTIFY_OK;
+ spin_unlock_irq(&pfault_lock);
+ return 0;
}
static int __init pfault_irq_init(void)
@@ -747,7 +768,8 @@ static int __init pfault_irq_init(void)
if (rc)
goto out_pfault;
irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
- hotcpu_notifier(pfault_cpu_notify, 0);
+ cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
+ NULL, pfault_cpu_dead);
return 0;
out_pfault: