From 1717f2096b543cede7a380c858c765c41936bc35 Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Mon, 14 Dec 2015 11:19:09 +0100 Subject: panic, x86: Fix re-entrance problem due to panic on NMI If panic on NMI happens just after panic() on the same CPU, panic() is recursively called. Kernel stalls, as a result, after failing to acquire panic_lock. To avoid this problem, don't call panic() in NMI context if we've already entered panic(). For that, introduce nmi_panic() macro to reduce code duplication. In the case of panic on NMI, don't return from NMI handlers if another CPU already panicked. Signed-off-by: Hidehiro Kawai Acked-by: Michal Hocko Cc: Aaron Tomlin Cc: Andrew Morton Cc: Andy Lutomirski Cc: Baoquan He Cc: Chris Metcalf Cc: David Hildenbrand Cc: Don Zickus Cc: "Eric W. Biederman" Cc: Frederic Weisbecker Cc: Gobinda Charan Maji Cc: HATAYAMA Daisuke Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Javi Merino Cc: Jonathan Corbet Cc: kexec@lists.infradead.org Cc: linux-doc@vger.kernel.org Cc: lkml Cc: Masami Hiramatsu Cc: Michal Nazarewicz Cc: Nicolas Iooss Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Rasmus Villemoes Cc: Rusty Russell Cc: Seth Jennings Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ulrich Obergfell Cc: Vitaly Kuznetsov Cc: Vivek Goyal Link: http://lkml.kernel.org/r/20151210014626.25437.13302.stgit@softrs [ Cleanup comments, fixup formatting. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- include/linux/kernel.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 350dfb08aee3..750cc5c7c999 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -445,6 +445,26 @@ extern int sysctl_panic_on_stackoverflow; extern bool crash_kexec_post_notifiers; +/* + * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It + * holds a CPU number which is executing panic() currently. A value of + * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). + */ +extern atomic_t panic_cpu; +#define PANIC_CPU_INVALID -1 + +/* + * A variant of panic() called from NMI context. We return if we've already + * panicked on this CPU. + */ +#define nmi_panic(fmt, ...) \ +do { \ + int cpu = raw_smp_processor_id(); \ + \ + if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu) \ + panic(fmt, ##__VA_ARGS__); \ +} while (0) + /* * Only to be used by arch init code. If the user over-wrote the default * CONFIG_PANIC_TIMEOUT, honor it. -- cgit v1.2.3 From 58c5661f2144c089bbc2e5d87c9ec1dc1d2964fe Mon Sep 17 00:00:00 2001 From: Hidehiro Kawai Date: Mon, 14 Dec 2015 11:19:10 +0100 Subject: panic, x86: Allow CPUs to save registers even if looping in NMI context Currently, kdump_nmi_shootdown_cpus(), a subroutine of crash_kexec(), sends an NMI IPI to CPUs which haven't called panic() to stop them, save their register information and do some cleanups for crash dumping. However, if such a CPU is infinitely looping in NMI context, we fail to save its register information into the crash dump. For example, this can happen when unknown NMIs are broadcast to all CPUs as follows: CPU 0 CPU 1 =========================== ========================== receive an unknown NMI unknown_nmi_error() panic() receive an unknown NMI spin_trylock(&panic_lock) unknown_nmi_error() crash_kexec() panic() spin_trylock(&panic_lock) panic_smp_self_stop() infinite loop kdump_nmi_shootdown_cpus() issue NMI IPI -----------> blocked until IRET infinite loop... Here, since CPU 1 is in NMI context, the second NMI from CPU 0 is blocked until CPU 1 executes IRET. However, CPU 1 never executes IRET, so the NMI is not handled and the callback function to save registers is never called. In practice, this can happen on some servers which broadcast NMIs to all CPUs when the NMI button is pushed. To save registers in this case, we need to: a) Return from NMI handler instead of looping infinitely or b) Call the callback function directly from the infinite loop Inherently, a) is risky because NMI is also used to prevent corrupted data from being propagated to devices. So, we chose b). This patch does the following: 1. Move the infinite looping of CPUs which haven't called panic() in NMI context (actually done by panic_smp_self_stop()) outside of panic() to enable us to refer pt_regs. Please note that panic_smp_self_stop() is still used for normal context. 2. Call a callback of kdump_nmi_shootdown_cpus() directly to save registers and do some cleanups after setting waiting_for_crash_ipi which is used for counting down the number of CPUs which handled the callback Signed-off-by: Hidehiro Kawai Acked-by: Michal Hocko Cc: Aaron Tomlin Cc: Andrew Morton Cc: Andy Lutomirski Cc: Baoquan He Cc: Chris Metcalf Cc: Dave Young Cc: David Hildenbrand Cc: Don Zickus Cc: Eric Biederman Cc: Frederic Weisbecker Cc: Gobinda Charan Maji Cc: HATAYAMA Daisuke Cc: Hidehiro Kawai Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Javi Merino Cc: Jiang Liu Cc: Jonathan Corbet Cc: kexec@lists.infradead.org Cc: linux-doc@vger.kernel.org Cc: lkml Cc: Masami Hiramatsu Cc: Michal Nazarewicz Cc: Nicolas Iooss Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Rasmus Villemoes Cc: Seth Jennings Cc: Stefan Lippers-Hollmann Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Ulrich Obergfell Cc: Vitaly Kuznetsov Cc: Vivek Goyal Cc: Yasuaki Ishimatsu Link: http://lkml.kernel.org/r/20151210014628.25437.75256.stgit@softrs [ Cleanup comments, fixup formatting. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/kernel/nmi.c | 6 +++--- arch/x86/kernel/reboot.c | 20 ++++++++++++++++++++ include/linux/kernel.h | 16 ++++++++++++---- kernel/panic.c | 9 +++++++++ kernel/watchdog.c | 2 +- 5 files changed, 45 insertions(+), 8 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index fca87938d739..424aec4a4c71 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -231,7 +231,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) #endif if (panic_on_unrecovered_nmi) - nmi_panic("NMI: Not continuing"); + nmi_panic(regs, "NMI: Not continuing"); pr_emerg("Dazed and confused, but trying to continue\n"); @@ -256,7 +256,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) show_regs(regs); if (panic_on_io_nmi) { - nmi_panic("NMI IOCK error: Not continuing"); + nmi_panic(regs, "NMI IOCK error: Not continuing"); /* * If we end up here, it means we have received an NMI while @@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) pr_emerg("Do you have a strange power saving mode enabled?\n"); if (unknown_nmi_panic || panic_on_unrecovered_nmi) - nmi_panic("NMI: Not continuing"); + nmi_panic(regs, "NMI: Not continuing"); pr_emerg("Dazed and confused, but trying to continue\n"); } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 02693dd9a079..1da13022d544 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -718,6 +718,7 @@ static int crashing_cpu; static nmi_shootdown_cb shootdown_callback; static atomic_t waiting_for_crash_ipi; +static int crash_ipi_issued; static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) { @@ -780,6 +781,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) smp_send_nmi_allbutself(); + /* Kick CPUs looping in NMI context. */ + WRITE_ONCE(crash_ipi_issued, 1); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { mdelay(1); @@ -788,6 +792,22 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) /* Leave the nmi callback set */ } + +/* Override the weak function in kernel/panic.c */ +void nmi_panic_self_stop(struct pt_regs *regs) +{ + while (1) { + /* + * Wait for the crash dumping IPI to be issued, and then + * call its callback directly. + */ + if (READ_ONCE(crash_ipi_issued)) + crash_nmi_callback(0, regs); /* Don't return */ + + cpu_relax(); + } +} + #else /* !CONFIG_SMP */ void nmi_shootdown_cpus(nmi_shootdown_cb callback) { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 750cc5c7c999..7311c3294e25 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -255,6 +255,7 @@ extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; +void nmi_panic_self_stop(struct pt_regs *); extern void oops_enter(void); extern void oops_exit(void); void print_oops_end_marker(void); @@ -455,14 +456,21 @@ extern atomic_t panic_cpu; /* * A variant of panic() called from NMI context. We return if we've already - * panicked on this CPU. + * panicked on this CPU. If another CPU already panicked, loop in + * nmi_panic_self_stop() which can provide architecture dependent code such + * as saving register state for crash dump. */ -#define nmi_panic(fmt, ...) \ +#define nmi_panic(regs, fmt, ...) \ do { \ - int cpu = raw_smp_processor_id(); \ + int old_cpu, cpu; \ \ - if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu) \ + cpu = raw_smp_processor_id(); \ + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); \ + \ + if (old_cpu == PANIC_CPU_INVALID) \ panic(fmt, ##__VA_ARGS__); \ + else if (old_cpu != cpu) \ + nmi_panic_self_stop(regs); \ } while (0) /* diff --git a/kernel/panic.c b/kernel/panic.c index 3344524cf6ff..06f31b49b3b4 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -61,6 +61,15 @@ void __weak panic_smp_self_stop(void) cpu_relax(); } +/* + * Stop ourselves in NMI context if another CPU has already panicked. Arch code + * may override this to prepare for crash dumping, e.g. save regs info. + */ +void __weak nmi_panic_self_stop(struct pt_regs *regs) +{ + panic_smp_self_stop(); +} + atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); /** diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b9be18fae154..84b5035cb6a5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -351,7 +351,7 @@ static void watchdog_overflow_callback(struct perf_event *event, trigger_allbutself_cpu_backtrace(); if (hardlockup_panic) - nmi_panic("Hard LOCKUP"); + nmi_panic(regs, "Hard LOCKUP"); __this_cpu_write(hard_watchdog_warn, true); return; -- cgit v1.2.3