diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-11-30 11:34:46 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-11-30 11:34:46 +1100 |
commit | b339e5cdf803e85f8c765418ff3f01408a577bb6 (patch) | |
tree | 7ee28c52c6c166e7f8d183b5bb76506caa17e50d /arch | |
parent | 051bc26320414918b69a4513614c3987f3e29bc6 (diff) |
Revert "x86, nmi_watchdog: Remove all stub function calls from old nmi_watchdog"
This reverts commit 072b198a4ad48bd722ec6d203d65422a4698eae7.
Conflicts:
arch/x86/kernel/smpboot.c
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/nmi.h | 47 | ||||
-rw-r--r-- | arch/x86/include/asm/smpboot_hooks.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/timer.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/apic/hw_nmi.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 46 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perfctr-watchdog.c | 642 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/time.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 2 | ||||
-rw-r--r-- | arch/x86/oprofile/nmi_timer_int.c | 3 |
12 files changed, 809 insertions, 1 deletions
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 3545838cddeb..33292ec848ca 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -7,13 +7,35 @@ #ifdef ARCH_HAS_NMI_WATCHDOG +/** + * do_nmi_callback + * + * Check to see if a callback exists and execute it. Return 1 + * if the handler exists and was handled successfully. + */ +int do_nmi_callback(struct pt_regs *regs, int cpu); + extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); +extern int check_nmi_watchdog(void); extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int); +extern void setup_apic_nmi_watchdog(void *); +extern void stop_apic_nmi_watchdog(void *); +extern void disable_timer_nmi_watchdog(void); +extern void enable_timer_nmi_watchdog(void); +extern void cpu_nmi_set_wd_enabled(void); + +extern atomic_t nmi_active; +extern unsigned int nmi_watchdog; +#define NMI_NONE 0 +#define NMI_IO_APIC 1 +#define NMI_LOCAL_APIC 2 +#define NMI_INVALID 3 + struct ctl_table; extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); @@ -21,8 +43,33 @@ extern int unknown_nmi_panic; void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace + +static inline void localise_nmi_watchdog(void) +{ + if (nmi_watchdog == NMI_IO_APIC) + nmi_watchdog = NMI_LOCAL_APIC; +} + +/* check if nmi_watchdog is active (ie was specified at boot) */ +static inline int nmi_watchdog_active(void) +{ + /* + * actually it should be: + * return (nmi_watchdog == NMI_LOCAL_APIC || + * nmi_watchdog == NMI_IO_APIC) + * but since they are power of two we could use a + * cheaper way --cvg + */ + return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); +} #endif +void lapic_watchdog_stop(void); +int lapic_watchdog_init(unsigned nmi_hz); +int lapic_wd_event(unsigned nmi_hz); +unsigned lapic_adjust_nmi_hz(unsigned hz); +void disable_lapic_nmi_watchdog(void); +void enable_lapic_nmi_watchdog(void); void stop_nmi(void); void restart_nmi(void); diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h index 6c22bf353f26..1def60114906 100644 --- a/arch/x86/include/asm/smpboot_hooks.h +++ b/arch/x86/include/asm/smpboot_hooks.h @@ -48,6 +48,7 @@ static inline void __init smpboot_setup_io_apic(void) setup_IO_APIC(); else { nr_ioapics = 0; + localise_nmi_watchdog(); } #endif } diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index fa7b9176b76c..5469630b27f5 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -10,6 +10,12 @@ unsigned long long native_sched_clock(void); extern int recalibrate_cpu_khz(void); +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) +extern int timer_ack; +#else +# define timer_ack (0) +#endif + extern int no_timer_check; /* Accelerators for sched_clock() diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 9f368dcb4a8b..360b37dc1e46 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -31,6 +31,7 @@ #include <linux/init.h> #include <linux/cpu.h> #include <linux/dmi.h> +#include <linux/nmi.h> #include <linux/smp.h> #include <linux/mm.h> @@ -799,7 +800,11 @@ void __init setup_boot_APIC_clock(void) * PIT/HPET going. Otherwise register lapic as a dummy * device. */ - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + pr_warning("APIC timer registered as dummy," + " due to nmi_watchdog=%d!\n", nmi_watchdog); /* Setup the lapic or request the broadcast */ setup_APIC_timer(); @@ -1383,6 +1388,7 @@ void __cpuinit end_local_APIC_setup(void) } #endif + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } @@ -1766,10 +1772,17 @@ int __init APIC_init_uniprocessor(void) setup_IO_APIC(); else { nr_ioapics = 0; + localise_nmi_watchdog(); } +#else + localise_nmi_watchdog(); #endif x86_init.timers.setup_percpu_clockev(); +#ifdef CONFIG_X86_64 + check_nmi_watchdog(); +#endif + return 0; } diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index a0e71cb4fa9c..f4a37eb89b49 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -95,4 +95,14 @@ early_initcall(register_trigger_all_cpu_backtrace); #endif /* STUB calls to mimic old nmi_watchdog behaviour */ +#if defined(CONFIG_X86_LOCAL_APIC) +unsigned int nmi_watchdog = NMI_NONE; +EXPORT_SYMBOL(nmi_watchdog); +#endif +atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ +EXPORT_SYMBOL(nmi_active); int unknown_nmi_panic; +void cpu_nmi_set_wd_enabled(void) { return; } +void stop_apic_nmi_watchdog(void *unused) { return; } +void setup_apic_nmi_watchdog(void *unused) { return; } +int __init check_nmi_watchdog(void) { return 0; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ea51151e1ad8..d2e643ddbb95 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -54,6 +54,7 @@ #include <asm/dma.h> #include <asm/timer.h> #include <asm/i8259.h> +#include <asm/nmi.h> #include <asm/msidef.h> #include <asm/hypertransport.h> #include <asm/setup.h> @@ -2642,6 +2643,24 @@ static void lapic_register_intr(int irq) "edge"); } +static void __init setup_nmi(void) +{ + /* + * Dirty trick to enable the NMI watchdog ... + * We put the 8259A master into AEOI mode and + * unmask on all local APICs LVT0 as NMI. + * + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') + * is from Maciej W. Rozycki - so we do not have to EOI from + * the NMI handler or the timer interrupt. + */ + apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); + + enable_NMI_through_LVT0(); + + apic_printk(APIC_VERBOSE, " done.\n"); +} + /* * This looks a bit hackish but it's about the only one way of sending * a few INTA cycles to 8259As and any associated glue logic. ICR does @@ -2747,6 +2766,15 @@ static inline void __init check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); legacy_pic->init(1); +#ifdef CONFIG_X86_32 + { + unsigned int ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + } +#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2794,6 +2822,10 @@ static inline void __init check_timer(void) unmask_ioapic(cfg); } if (timer_irq_works()) { + if (nmi_watchdog == NMI_IO_APIC) { + setup_nmi(); + legacy_pic->unmask(0); + } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); goto out; @@ -2819,6 +2851,11 @@ static inline void __init check_timer(void) if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; + if (nmi_watchdog == NMI_IO_APIC) { + legacy_pic->mask(0); + setup_nmi(); + legacy_pic->unmask(0); + } goto out; } /* @@ -2830,6 +2867,15 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } + if (nmi_watchdog == NMI_IO_APIC) { + apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " + "through the IO-APIC - disabling NMI Watchdog!\n"); + nmi_watchdog = NMI_NONE; + } +#ifdef CONFIG_X86_32 + timer_ack = 0; +#endif + apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 817d2b195e8e..7231bb95e630 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -330,6 +330,9 @@ static bool reserve_pmc_hardware(void) { int i; + if (nmi_watchdog == NMI_LOCAL_APIC) + disable_lapic_nmi_watchdog(); + for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; @@ -352,6 +355,9 @@ perfctr_fail: for (i--; i >= 0; i--) release_perfctr_nmi(x86_pmu.perfctr + i); + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); + return false; } @@ -363,6 +369,9 @@ static void release_pmc_hardware(void) release_perfctr_nmi(x86_pmu.perfctr + i); release_evntsel_nmi(x86_pmu.eventsel + i); } + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); } #else diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 14d45928c282..d9f4ff8fcd69 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -22,6 +22,26 @@ #include <asm/apic.h> #include <asm/perf_event.h> +struct nmi_watchdog_ctlblk { + unsigned int cccr_msr; + unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ + unsigned int evntsel_msr; /* the MSR to select the events to handle */ +}; + +/* Interface defining a CPU specific perfctr watchdog */ +struct wd_ops { + int (*reserve)(void); + void (*unreserve)(void); + int (*setup)(unsigned nmi_hz); + void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); + void (*stop)(void); + unsigned perfctr; + unsigned evntsel; + u64 checkbit; +}; + +static const struct wd_ops *wd_ops; + /* * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. @@ -40,6 +60,8 @@ static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); +static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); + /* converts an msr to an appropriate reservation bit */ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) { @@ -150,3 +172,623 @@ void release_evntsel_nmi(unsigned int msr) clear_bit(counter, evntsel_nmi_owner); } EXPORT_SYMBOL(release_evntsel_nmi); + +void disable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + if (atomic_read(&nmi_active) <= 0) + return; + + on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); + + if (wd_ops) + wd_ops->unreserve(); + + BUG_ON(atomic_read(&nmi_active) != 0); +} + +void enable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + /* are we already enabled */ + if (atomic_read(&nmi_active) != 0) + return; + + /* are we lapic aware */ + if (!wd_ops) + return; + if (!wd_ops->reserve()) { + printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); + return; + } + + on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); + touch_nmi_watchdog(); +} + +/* + * Activate the NMI watchdog via the local APIC. + */ + +static unsigned int adjust_for_32bit_ctr(unsigned int hz) +{ + u64 counter_val; + unsigned int retval = hz; + + /* + * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter + * are writable, with higher bits sign extending from bit 31. + * So, we can only program the counter with 31 bit values and + * 32nd bit should be 1, for 33.. to be 1. + * Find the appropriate nmi_hz + */ + counter_val = (u64)cpu_khz * 1000; + do_div(counter_val, retval); + if (counter_val > 0x7fffffffULL) { + u64 count = (u64)cpu_khz * 1000; + do_div(count, 0x7fffffffUL); + retval = count + 1; + } + return retval; +} + +static void write_watchdog_counter(unsigned int perfctr_msr, + const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if (descr) + pr_debug("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(perfctr_msr, 0 - count); +} + +static void write_watchdog_counter32(unsigned int perfctr_msr, + const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if (descr) + pr_debug("setting %s to -0x%08Lx\n", descr, count); + wrmsr(perfctr_msr, (u32)(-count), 0); +} + +/* + * AMD K7/K8/Family10h/Family11h support. + * AMD keeps this interface nicely stable so there is not much variety + */ +#define K7_EVNTSEL_ENABLE (1 << 22) +#define K7_EVNTSEL_INT (1 << 20) +#define K7_EVNTSEL_OS (1 << 17) +#define K7_EVNTSEL_USR (1 << 16) +#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 +#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING + +static int setup_k7_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = K7_EVNTSEL_INT + | K7_EVNTSEL_OS + | K7_EVNTSEL_USR + | K7_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); + + /* initialize the wd struct before enabling */ + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + return 1; +} + +static void single_msr_stop_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int single_msr_reserve(void) +{ + if (!reserve_perfctr_nmi(wd_ops->perfctr)) + return 0; + + if (!reserve_evntsel_nmi(wd_ops->evntsel)) { + release_perfctr_nmi(wd_ops->perfctr); + return 0; + } + return 1; +} + +static void single_msr_unreserve(void) +{ + release_evntsel_nmi(wd_ops->evntsel); + release_perfctr_nmi(wd_ops->perfctr); +} + +static void __kprobes +single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static const struct wd_ops k7_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_k7_watchdog, + .rearm = single_msr_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_K7_PERFCTR0, + .evntsel = MSR_K7_EVNTSEL0, + .checkbit = 1ULL << 47, +}; + +/* + * Intel Model 6 (PPro+,P2,P3,P-M,Core1) + */ +#define P6_EVNTSEL0_ENABLE (1 << 22) +#define P6_EVNTSEL_INT (1 << 20) +#define P6_EVNTSEL_OS (1 << 17) +#define P6_EVNTSEL_USR (1 << 16) +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED + +static int setup_p6_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + /* KVM doesn't implement this MSR */ + if (wrmsr_safe(perfctr_msr, 0, 0) < 0) + return 0; + + evntsel = P6_EVNTSEL_INT + | P6_EVNTSEL_OS + | P6_EVNTSEL_USR + | P6_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); + + /* initialize the wd struct before enabling */ + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + return 1; +} + +static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* + * P6 based Pentium M need to re-unmask + * the apic vector but it doesn't hurt + * other P6 variant. + * ArchPerfom/Core Duo also needs this + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + + /* P6/ARCH_PERFMON has 32 bit counter write */ + write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); +} + +static const struct wd_ops p6_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_p6_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_P6_PERFCTR0, + .evntsel = MSR_P6_EVNTSEL0, + .checkbit = 1ULL << 39, +}; + +/* + * Intel P4 performance counters. + * By far the most complicated of all. + */ +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7) +#define P4_ESCR_EVENT_SELECT(N) ((N) << 25) +#define P4_ESCR_OS (1 << 3) +#define P4_ESCR_USR (1 << 2) +#define P4_CCCR_OVF_PMI0 (1 << 26) +#define P4_CCCR_OVF_PMI1 (1 << 27) +#define P4_CCCR_THRESHOLD(N) ((N) << 20) +#define P4_CCCR_COMPLEMENT (1 << 19) +#define P4_CCCR_COMPARE (1 << 18) +#define P4_CCCR_REQUIRED (3 << 16) +#define P4_CCCR_ESCR_SELECT(N) ((N) << 13) +#define P4_CCCR_ENABLE (1 << 12) +#define P4_CCCR_OVF (1 << 31) + +#define P4_CONTROLS 18 +static unsigned int p4_controls[18] = { + MSR_P4_BPU_CCCR0, + MSR_P4_BPU_CCCR1, + MSR_P4_BPU_CCCR2, + MSR_P4_BPU_CCCR3, + MSR_P4_MS_CCCR0, + MSR_P4_MS_CCCR1, + MSR_P4_MS_CCCR2, + MSR_P4_MS_CCCR3, + MSR_P4_FLAME_CCCR0, + MSR_P4_FLAME_CCCR1, + MSR_P4_FLAME_CCCR2, + MSR_P4_FLAME_CCCR3, + MSR_P4_IQ_CCCR0, + MSR_P4_IQ_CCCR1, + MSR_P4_IQ_CCCR2, + MSR_P4_IQ_CCCR3, + MSR_P4_IQ_CCCR4, + MSR_P4_IQ_CCCR5, +}; +/* + * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter + * CRU_ESCR0 (with any non-null event selector) through a complemented + * max threshold. [IA32-Vol3, Section 14.9.9] + */ +static int setup_p4_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr, cccr_msr; + unsigned int evntsel, cccr_val; + unsigned int misc_enable, dummy; + unsigned int ht_num; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); + if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) + return 0; + +#ifdef CONFIG_SMP + /* detect which hyperthread we are on */ + if (smp_num_siblings == 2) { + unsigned int ebx, apicid; + + ebx = cpuid_ebx(1); + apicid = (ebx >> 24) & 0xff; + ht_num = apicid & 1; + } else +#endif + ht_num = 0; + + /* + * performance counters are shared resources + * assign each hyperthread its own set + * (re-use the ESCR0 register, seems safe + * and keeps the cccr_val the same) + */ + if (!ht_num) { + /* logical cpu 0 */ + perfctr_msr = MSR_P4_IQ_PERFCTR0; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR0; + cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + + /* + * If we're on the kdump kernel or other situation, we may + * still have other performance counter registers set to + * interrupt and they'll keep interrupting forever because + * of the P4_CCCR_OVF quirk. So we need to ACK all the + * pending interrupts and disable all the registers here, + * before reenabling the NMI delivery. Refer to p4_rearm() + * about the P4_CCCR_OVF quirk. + */ + if (reset_devices) { + unsigned int low, high; + int i; + + for (i = 0; i < P4_CONTROLS; i++) { + rdmsr(p4_controls[i], low, high); + low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); + wrmsr(p4_controls[i], low, high); + } + } + } else { + /* logical cpu 1 */ + perfctr_msr = MSR_P4_IQ_PERFCTR1; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR1; + + /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ + if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) + cccr_val = P4_CCCR_OVF_PMI0; + else + cccr_val = P4_CCCR_OVF_PMI1; + cccr_val |= P4_CCCR_ESCR_SELECT(4); + } + + evntsel = P4_ESCR_EVENT_SELECT(0x3F) + | P4_ESCR_OS + | P4_ESCR_USR; + + cccr_val |= P4_CCCR_THRESHOLD(15) + | P4_CCCR_COMPLEMENT + | P4_CCCR_COMPARE + | P4_CCCR_REQUIRED; + + wrmsr(evntsel_msr, evntsel, 0); + wrmsr(cccr_msr, cccr_val, 0); + write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = cccr_msr; + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); + return 1; +} + +static void stop_p4_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + wrmsr(wd->cccr_msr, 0, 0); + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int p4_reserve(void) +{ + if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) + return 0; +#ifdef CONFIG_SMP + if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) + goto fail1; +#endif + if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) + goto fail2; + /* RED-PEN why is ESCR1 not reserved here? */ + return 1; + fail2: +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); + fail1: +#endif + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); + return 0; +} + +static void p4_unreserve(void) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); +#endif + release_evntsel_nmi(MSR_P4_CRU_ESCR0); + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); +} + +static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + unsigned dummy; + /* + * P4 quirks: + * - An overflown perfctr will assert its interrupt + * until the OVF flag in its CCCR is cleared. + * - LVTPC is masked on interrupt and must be + * unmasked by the LVTPC handler. + */ + rdmsrl(wd->cccr_msr, dummy); + dummy &= ~P4_CCCR_OVF; + wrmsrl(wd->cccr_msr, dummy); + apic_write(APIC_LVTPC, APIC_DM_NMI); + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static const struct wd_ops p4_wd_ops = { + .reserve = p4_reserve, + .unreserve = p4_unreserve, + .setup = setup_p4_watchdog, + .rearm = p4_rearm, + .stop = stop_p4_watchdog, + /* RED-PEN this is wrong for the other sibling */ + .perfctr = MSR_P4_BPU_PERFCTR0, + .evntsel = MSR_P4_BSU_ESCR0, + .checkbit = 1ULL << 39, +}; + +/* + * Watchdog using the Intel architected PerfMon. + * Used for Core2 and hopefully all future Intel CPUs. + */ +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + +static struct wd_ops intel_arch_wd_ops; + +static int setup_intel_arch_watchdog(unsigned nmi_hz) +{ + unsigned int ebx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebx indicates event present. + */ + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if ((eax.split.mask_length < + (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return 0; + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = ARCH_PERFMON_EVENTSEL_INT + | ARCH_PERFMON_EVENTSEL_OS + | ARCH_PERFMON_EVENTSEL_USR + | ARCH_PERFMON_NMI_EVENT_SEL + | ARCH_PERFMON_NMI_EVENT_UMASK; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); + return 1; +} + +static struct wd_ops intel_arch_wd_ops __read_mostly = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_intel_arch_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_ARCH_PERFMON_PERFCTR1, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, +}; + +static void probe_nmi_watchdog(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 == 6 || + (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) + wd_ops = &k7_wd_ops; + return; + case X86_VENDOR_INTEL: + /* Work around where perfctr1 doesn't have a working enable + * bit as described in the following errata: + * AE49 Core Duo and Intel Core Solo 65 nm + * AN49 Intel Pentium Dual-Core + * AF49 Dual-Core Intel Xeon Processor LV + */ + if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || + ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && + boot_cpu_data.x86_mask == 4))) { + intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; + intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; + } + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + wd_ops = &intel_arch_wd_ops; + break; + } + switch (boot_cpu_data.x86) { + case 6: + if (boot_cpu_data.x86_model > 13) + return; + + wd_ops = &p6_wd_ops; + break; + case 15: + wd_ops = &p4_wd_ops; + break; + default: + return; + } + break; + } +} + +/* Interface to nmi.c */ + +int lapic_watchdog_init(unsigned nmi_hz) +{ + if (!wd_ops) { + probe_nmi_watchdog(); + if (!wd_ops) { + printk(KERN_INFO "NMI watchdog: CPU not supported\n"); + return -1; + } + + if (!wd_ops->reserve()) { + printk(KERN_ERR + "NMI watchdog: cannot reserve perfctrs\n"); + return -1; + } + } + + if (!(wd_ops->setup(nmi_hz))) { + printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", + raw_smp_processor_id()); + return -1; + } + + return 0; +} + +void lapic_watchdog_stop(void) +{ + if (wd_ops) + wd_ops->stop(); +} + +unsigned lapic_adjust_nmi_hz(unsigned hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + if (wd->perfctr_msr == MSR_P6_PERFCTR0 || + wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) + hz = adjust_for_32bit_ctr(hz); + return hz; +} + +int __kprobes lapic_wd_event(unsigned nmi_hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + u64 ctr; + + rdmsrl(wd->perfctr_msr, ctr); + if (ctr & wd_ops->checkbit) /* perfctr still running? */ + return 0; + + wd_ops->rearm(wd, nmi_hz); + return 1; +} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 68f61ac632e1..7ca217e4b042 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -323,6 +323,12 @@ notrace static void __cpuinit start_secondary(void *unused) */ check_tsc_sync_target(); + if (nmi_watchdog == NMI_IO_APIC) { + legacy_pic->mask(0); + enable_NMI_through_LVT0(); + legacy_pic->unmask(0); + } + /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of @@ -1058,6 +1064,8 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_INFO "SMP mode deactivated.\n"); smpboot_clear_io_apic(); + localise_nmi_watchdog(); + connect_bsp_APIC(); setup_local_APIC(); end_local_APIC_setup(); @@ -1191,6 +1199,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif + check_nmi_watchdog(); mtrr_aps_init(); } @@ -1335,6 +1344,8 @@ int native_cpu_disable(void) if (cpu == 0) return -EBUSY; + if (nmi_watchdog == NMI_LOCAL_APIC) + stop_apic_nmi_watchdog(NULL); clear_local_APIC(); cpu_disable_common(); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 25a28a245937..fb5cc5e14cfa 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -22,6 +22,10 @@ #include <asm/hpet.h> #include <asm/time.h> +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) +int timer_ack; +#endif + #ifdef CONFIG_X86_64 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; #endif @@ -59,6 +63,20 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) /* Keep nmi watchdog up to date */ inc_irq_stat(irq0_irqs); + /* Optimized out for !IO_APIC and x86_64 */ + if (timer_ack) { + /* + * Subtle, when I/O APICs are used we have to ack timer IRQ + * manually to deassert NMI lines for the watchdog if run + * on an 82489DX-based system. + */ + raw_spin_lock(&i8259A_lock); + outb(0x0c, PIC_MASTER_OCW3); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(PIC_MASTER_POLL); + raw_spin_unlock(&i8259A_lock); + } + global_clock_event->event_handler(global_clock_event); /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f02c179c2552..db30d9cb9dd6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -437,12 +437,14 @@ do_nmi(struct pt_regs *regs, long error_code) void stop_nmi(void) { + acpi_nmi_disable(); ignore_nmis++; } void restart_nmi(void) { ignore_nmis--; + acpi_nmi_enable(); } /* May run on IST stack. */ diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index 0636dd93cef8..e3ecb71b5790 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -58,6 +58,9 @@ static void timer_stop(void) int __init op_nmi_timer_init(struct oprofile_operations *ops) { + if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) + return -ENODEV; + ops->start = timer_start; ops->stop = timer_stop; ops->cpu_type = "timer"; |