From 9b00ac06978c54788f13eefd34a07b77db48d567 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:10 +0000 Subject: powerpc: Remove broken and complicated kdump system reset code We have a lot of complicated logic that handles possible recursion between kdump and a system reset exception. We can solve this in a much simpler way using the same setjmp/longjmp tricks xmon does. As a first step, this patch removes the old system reset code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 87 +++++++-------------------------------------- 1 file changed, 12 insertions(+), 75 deletions(-) (limited to 'arch/powerpc/kernel/crash.c') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index d879809d5c45..3d87b205d5f5 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -47,7 +47,6 @@ /* This keeps a track of which one is crashing cpu. */ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; -cpumask_t cpus_in_sr = CPU_MASK_NONE; #define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ @@ -55,7 +54,6 @@ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); #ifdef CONFIG_SMP -static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); void crash_ipi_callback(struct pt_regs *regs) { @@ -69,24 +67,9 @@ void crash_ipi_callback(struct pt_regs *regs) crash_save_cpu(regs, cpu); cpumask_set_cpu(cpu, &cpus_in_crash); - /* - * Entered via soft-reset - could be the kdump - * process is invoked using soft-reset or user activated - * it if some CPU did not respond to an IPI. - * For soft-reset, the secondary CPU can enter this func - * twice. 1 - using IPI, and 2. soft-reset. - * Tell the kexec CPU that entered via soft-reset and ready - * to go down. - */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) { - cpumask_clear_cpu(cpu, &cpus_in_sr); - atomic_inc(&enter_on_soft_reset); - } - /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. - * If not, soft-reset will be invoked to bring other CPUs. */ while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) cpu_relax(); @@ -103,25 +86,14 @@ void crash_ipi_callback(struct pt_regs *regs) /* NOTREACHED */ } -/* - * Wait until all CPUs are entered via soft-reset. - */ -static void crash_soft_reset_check(int cpu) -{ - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - - cpumask_clear_cpu(cpu, &cpus_in_sr); - while (atomic_read(&enter_on_soft_reset) != ncpus) - cpu_relax(); -} - - static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + printk(KERN_EMERG "Sending IPI to other CPUs\n"); + crash_send_ipi(crash_ipi_callback); smp_wmb(); @@ -131,7 +103,6 @@ static void crash_kexec_prepare_cpus(int cpu) * respond. * Delay of at least 10 seconds. */ - printk(KERN_EMERG "Sending IPI to other cpus...\n"); msecs = 10000; while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { cpu_relax(); @@ -140,69 +111,36 @@ static void crash_kexec_prepare_cpus(int cpu) /* Would it be better to replace the trap vector here? */ - /* - * FIXME: In case if we do not get all CPUs, one possibility: ask the - * user to do soft reset such that we get all. - * Soft-reset will be used until better mechanism is implemented. - */ if (cpumask_weight(&cpus_in_crash) < ncpus) { - printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", + printk(KERN_EMERG "ERROR: %d CPU(s) not responding\n", ncpus - cpumask_weight(&cpus_in_crash)); - printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpumask_clear(&cpus_in_sr); - atomic_set(&enter_on_soft_reset, 0); - while (cpumask_weight(&cpus_in_crash) < ncpus) - cpu_relax(); } - /* - * Make sure all CPUs are entered via soft-reset if the kdump is - * invoked using soft-reset. - */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) - crash_soft_reset_check(cpu); - /* Leave the IPI callback set */ + + printk(KERN_EMERG "IPI complete\n"); } /* - * This function will be called by secondary cpus or by kexec cpu - * if soft-reset is activated to stop some CPUs. + * This function will be called by secondary cpus. */ void crash_kexec_secondary(struct pt_regs *regs) { - int cpu = smp_processor_id(); unsigned long flags; - int msecs = 5; + int msecs = 500; local_irq_save(flags); - /* Wait 5ms if the kexec CPU is not entered yet. */ + + /* Wait 500ms for the primary crash CPU to signal its progress */ while (crashing_cpu < 0) { if (--msecs < 0) { - /* - * Either kdump image is not loaded or - * kdump process is not started - Probably xmon - * exited using 'x'(exit and recover) or - * kexec_should_crash() failed for all running tasks. - */ - cpumask_clear_cpu(cpu, &cpus_in_sr); + /* No response, kdump image may not have been loaded */ local_irq_restore(flags); return; } + mdelay(1); cpu_relax(); } - if (cpu == crashing_cpu) { - /* - * Panic CPU will enter this func only via soft-reset. - * Wait until all secondary CPUs entered and - * then start kexec boot. - */ - crash_soft_reset_check(cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 0); - machine_kexec(kexec_crash_image); - /* NOTREACHED */ - } + crash_ipi_callback(regs); } @@ -225,7 +163,6 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpumask_clear(&cpus_in_sr); } #endif /* CONFIG_SMP */ -- cgit v1.2.3 From 07fe0c6132578186773e01ffb0f63ded222effe7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:11 +0000 Subject: powerpc/kdump: Use setjmp/longjmp to handle kdump and system reset recursion We can handle recursion caused by system reset by reusing the crash shutdown fault handler. Since we don't have an OS triggerable NMI, if all CPUs don't make it into kdump then we tell the user to issue a system reset. However if we have a panic timeout set we cannot wait forever and must continue the kdump. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 72 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/kernel/crash.c') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 3d87b205d5f5..a8b6e2d705a4 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -53,6 +53,16 @@ static cpumask_t cpus_in_crash = CPU_MASK_NONE; static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; + +static int handle_fault(struct pt_regs *regs) +{ + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); + return 0; +} + #ifdef CONFIG_SMP void crash_ipi_callback(struct pt_regs *regs) @@ -89,14 +99,16 @@ void crash_ipi_callback(struct pt_regs *regs) static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + int tries = 0; + int (*old_handler)(struct pt_regs *regs); printk(KERN_EMERG "Sending IPI to other CPUs\n"); crash_send_ipi(crash_ipi_callback); smp_wmb(); +again: /* * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to @@ -111,12 +123,52 @@ static void crash_kexec_prepare_cpus(int cpu) /* Would it be better to replace the trap vector here? */ - if (cpumask_weight(&cpus_in_crash) < ncpus) { - printk(KERN_EMERG "ERROR: %d CPU(s) not responding\n", - ncpus - cpumask_weight(&cpus_in_crash)); + if (cpumask_weight(&cpus_in_crash) >= ncpus) { + printk(KERN_EMERG "IPI complete\n"); + return; + } + + printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", + ncpus - cpumask_weight(&cpus_in_crash)); + + /* + * If we have a panic timeout set then we can't wait indefinitely + * for someone to activate system reset. We also give up on the + * second time through if system reset fail to work. + */ + if ((panic_timeout > 0) || (tries > 0)) + return; + + /* + * A system reset will cause all CPUs to take an 0x100 exception. + * The primary CPU returns here via setjmp, and the secondary + * CPUs reexecute the crash_kexec_secondary path. + */ + old_handler = __debugger; + __debugger = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + + if (setjmp(crash_shutdown_buf) == 0) { + printk(KERN_EMERG "Activate system reset (dumprestart) " + "to stop other cpu(s)\n"); + + /* + * A system reset will force all CPUs to execute the + * crash code again. We need to reset cpus_in_crash so we + * wait for everyone to do this. + */ + cpus_in_crash = CPU_MASK_NONE; + smp_mb(); + + while (cpumask_weight(&cpus_in_crash) < ncpus) + cpu_relax(); } - printk(KERN_EMERG "IPI complete\n"); + crash_shutdown_cpu = -1; + __debugger = old_handler; + + tries++; + goto again; } /* @@ -245,16 +297,6 @@ int crash_shutdown_unregister(crash_shutdown_t handler) } EXPORT_SYMBOL(crash_shutdown_unregister); -static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; -static int crash_shutdown_cpu = -1; - -static int handle_fault(struct pt_regs *regs) -{ - if (crash_shutdown_cpu == smp_processor_id()) - longjmp(crash_shutdown_buf, 1); - return 0; -} - void default_machine_crash_shutdown(struct pt_regs *regs) { unsigned int i; -- cgit v1.2.3 From 8c27474a252e84e8a71ae4a43e18f0193a08e3f7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:12 +0000 Subject: powerpc: Cleanup crash/kexec code Remove some unnecessary defines and fix some spelling mistakes. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/kexec.h | 1 - arch/powerpc/kernel/crash.c | 21 +++------------------ 2 files changed, 3 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kernel/crash.c') diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 209ed8ba1637..16d7e33d35e9 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -49,7 +49,6 @@ #define KEXEC_STATE_REAL_MODE 2 #ifndef __ASSEMBLY__ -#include #include typedef void (*crash_shutdown_t)(void); diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index a8b6e2d705a4..d4467557b00e 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -10,41 +10,27 @@ * */ -#undef DEBUG - #include #include #include #include -#include #include #include #include -#include -#include #include #include #include -#include #include #include #include #include #include -#include #include #include #include -#ifdef DEBUG -#include -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -/* This keeps a track of which one is crashing cpu. */ +/* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; @@ -201,7 +187,7 @@ void crash_kexec_secondary(struct pt_regs *regs) static void crash_kexec_prepare_cpus(int cpu) { /* - * move the secondarys to us so that we can copy + * move the secondaries to us so that we can copy * the new kernel 0-0x100 safely * * do this if kexec in setup.c ? @@ -302,7 +288,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) unsigned int i; int (*old_handler)(struct pt_regs *regs); - /* * This function is only called after the system * has panicked or is otherwise in a critical state. @@ -328,7 +313,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) machine_kexec_mask_interrupts(); /* - * Call registered shutdown routines savely. Swap out + * Call registered shutdown routines safely. Swap out * __debugger_fault_handler, and replace on exit. */ old_handler = __debugger_fault_handler; -- cgit v1.2.3 From 549e88a134b3b393a4312e8d76628b9260eee57f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:16 +0000 Subject: powerpc/kdump: Delay before sending IPI on a system reset If we enter the kdump code via system reset, wait a bit before sending the IPI to capture all secondary CPUs. Without it we race with the hypervisor that is issuing the system reset to each CPU. If the IPI gets there first the system reset oops output then shows the register state of the IPI handler which is not what we want. I took the opportunity to add defines for all the various delays we have. There's no need for cpu_relax when we are doing an mdelay, so remove them too. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel/crash.c') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index d4467557b00e..b942980e9650 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -30,6 +30,20 @@ #include #include +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT 500 +#define SECONDARY_TIMEOUT 1000 + +#define IPI_TIMEOUT 10000 +#define REAL_MODE_TIMEOUT 10000 + /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; @@ -99,11 +113,9 @@ again: * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to * respond. - * Delay of at least 10 seconds. */ - msecs = 10000; + msecs = IPI_TIMEOUT; while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { - cpu_relax(); mdelay(1); } @@ -163,11 +175,11 @@ again: void crash_kexec_secondary(struct pt_regs *regs) { unsigned long flags; - int msecs = 500; + int msecs = SECONDARY_TIMEOUT; local_irq_save(flags); - /* Wait 500ms for the primary crash CPU to signal its progress */ + /* Wait for the primary crash CPU to signal its progress */ while (crashing_cpu < 0) { if (--msecs < 0) { /* No response, kdump image may not have been loaded */ @@ -176,7 +188,6 @@ void crash_kexec_secondary(struct pt_regs *regs) } mdelay(1); - cpu_relax(); } crash_ipi_callback(regs); @@ -211,7 +222,7 @@ static void crash_kexec_wait_realmode(int cpu) unsigned int msecs; int i; - msecs = 10000; + msecs = REAL_MODE_TIMEOUT; for (i=0; i < nr_cpu_ids && msecs > 0; i++) { if (i == cpu) continue; @@ -306,6 +317,14 @@ void default_machine_crash_shutdown(struct pt_regs *regs) */ crashing_cpu = smp_processor_id(); crash_save_cpu(regs, crashing_cpu); + + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(regs) == 0x100) + mdelay(PRIMARY_TIMEOUT); + crash_kexec_prepare_cpus(crashing_cpu); cpumask_set_cpu(crashing_cpu, &cpus_in_crash); crash_kexec_wait_realmode(crashing_cpu); -- cgit v1.2.3 From 2440c01e10f07adcbc2094ba12ae4ad6094bd2b6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:17 +0000 Subject: powerpc/kdump: Only save CPU state first time through the secondary CPU capture code We might enter the secondary CPU capture code twice, eg if we have to unstick some CPUs with a system reset. In this case we don't want to overwrite the state on CPUs that had made it into the capture code OK, so use the cpus_state_saved cpumask for that and make it local to crash_ipi_callback. For controlling progress now use atomic_t cpus_in_crash to count how many CPUs have made it into the kdump code, and time_to_dump to tell everyone it's time to dump. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel/crash.c') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index b942980e9650..28be3452e67a 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -46,7 +46,8 @@ /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; -static cpumask_t cpus_in_crash = CPU_MASK_NONE; +static atomic_t cpus_in_crash; +static int time_to_dump; #define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ @@ -67,21 +68,27 @@ static int handle_fault(struct pt_regs *regs) void crash_ipi_callback(struct pt_regs *regs) { + static cpumask_t cpus_state_saved = CPU_MASK_NONE; + int cpu = smp_processor_id(); if (!cpu_online(cpu)) return; hard_irq_disable(); - if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); - cpumask_set_cpu(cpu, &cpus_in_crash); + cpumask_set_cpu(cpu, &cpus_state_saved); + } + + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic_inc(); /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. */ - while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) + while (!time_to_dump) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -115,19 +122,18 @@ again: * respond. */ msecs = IPI_TIMEOUT; - while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) mdelay(1); - } /* Would it be better to replace the trap vector here? */ - if (cpumask_weight(&cpus_in_crash) >= ncpus) { + if (atomic_read(&cpus_in_crash) >= ncpus) { printk(KERN_EMERG "IPI complete\n"); return; } printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", - ncpus - cpumask_weight(&cpus_in_crash)); + ncpus - atomic_read(&cpus_in_crash)); /* * If we have a panic timeout set then we can't wait indefinitely @@ -155,10 +161,10 @@ again: * crash code again. We need to reset cpus_in_crash so we * wait for everyone to do this. */ - cpus_in_crash = CPU_MASK_NONE; + atomic_set(&cpus_in_crash, 0); smp_mb(); - while (cpumask_weight(&cpus_in_crash) < ncpus) + while (atomic_read(&cpus_in_crash) < ncpus) cpu_relax(); } @@ -316,7 +322,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * such that another IPI will not be sent. */ crashing_cpu = smp_processor_id(); - crash_save_cpu(regs, crashing_cpu); /* * If we came in via system reset, wait a while for the secondary @@ -326,7 +331,11 @@ void default_machine_crash_shutdown(struct pt_regs *regs) mdelay(PRIMARY_TIMEOUT); crash_kexec_prepare_cpus(crashing_cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + crash_kexec_wait_realmode(crashing_cpu); machine_kexec_mask_interrupts(); -- cgit v1.2.3 From 897e01a08c08d86bc76bebb0ca14588b406500e5 Mon Sep 17 00:00:00 2001 From: Christian Kujau Date: Tue, 17 Jan 2012 19:13:05 +0000 Subject: powerpc/crash: Fix build error without SMP I could not find cpus_in_crash anywhere in the sourcetree, except for arch/powerpc/kernel/crash.c. Moving the definition into the CONFIG_SMP fixes it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel/crash.c') diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 28be3452e67a..abef75176c07 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -46,7 +46,6 @@ /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; -static atomic_t cpus_in_crash; static int time_to_dump; #define CRASH_HANDLER_MAX 3 @@ -66,6 +65,7 @@ static int handle_fault(struct pt_regs *regs) #ifdef CONFIG_SMP +static atomic_t cpus_in_crash; void crash_ipi_callback(struct pt_regs *regs) { static cpumask_t cpus_state_saved = CPU_MASK_NONE; -- cgit v1.2.3