summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c44
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c179
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.h8
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c37
7 files changed, 233 insertions, 68 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d43cad74f166..c0f7d68d318f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1044,6 +1044,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
+DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
+EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
+
/*
* Special IST stacks which the CPU switches to when it calls
* an IST-marked descriptor entry. Up to 7 stacks (hardware
@@ -1111,6 +1114,8 @@ void debug_stack_reset(void)
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
+DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
+EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6b45e5e7a901..73d08ed98a64 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -326,8 +326,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
}
-static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
- int index)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
{
int node;
@@ -725,14 +724,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
#ifdef CONFIG_SMP
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+
+static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
{
- struct _cpuid4_info *this_leaf, *sibling_leaf;
- unsigned long num_threads_sharing;
- int index_msb, i, sibling;
+ struct _cpuid4_info *this_leaf;
+ int ret, i, sibling;
struct cpuinfo_x86 *c = &cpu_data(cpu);
- if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
+ ret = 0;
+ if (index == 3) {
+ ret = 1;
for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
@@ -743,8 +744,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
set_bit(sibling, this_leaf->shared_cpu_map);
}
}
- return;
+ } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
+ ret = 1;
+ for_each_cpu(i, cpu_sibling_mask(cpu)) {
+ if (!per_cpu(ici_cpuid4_info, i))
+ continue;
+ this_leaf = CPUID4_INFO_IDX(i, index);
+ for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
+ if (!cpu_online(sibling))
+ continue;
+ set_bit(sibling, this_leaf->shared_cpu_map);
+ }
+ }
}
+
+ return ret;
+}
+
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+ struct _cpuid4_info *this_leaf, *sibling_leaf;
+ unsigned long num_threads_sharing;
+ int index_msb, i;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (cache_shared_amd_cpu_map_setup(cpu, index))
+ return;
+ }
+
this_leaf = CPUID4_INFO_IDX(cpu, index);
num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 7395d5f4272d..0c82091b1652 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -54,7 +54,14 @@ static struct severity {
#define MASK(x, y) .mask = x, .result = y
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
+#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
#define MCACOD 0xffff
+/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
+#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
+#define MCACOD_SCRUBMSK 0xfff0
+#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
+#define MCACOD_DATA 0x0134 /* Data Load */
+#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
MCESEV(
NO, "Invalid",
@@ -102,11 +109,24 @@ static struct severity {
SER, BITCLR(MCI_STATUS_S)
),
- /* AR add known MCACODs here */
MCESEV(
PANIC, "Action required with lost events",
SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
),
+
+ /* known AR MCACODs: */
+#ifdef CONFIG_MEMORY_FAILURE
+ MCESEV(
+ KEEP, "HT thread notices Action required: data load error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ MCGMASK(MCG_STATUS_EIPV, 0)
+ ),
+ MCESEV(
+ AR, "Action required: data load error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ USER
+ ),
+#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
@@ -115,11 +135,11 @@ static struct severity {
/* known AO MCACODs: */
MCESEV(
AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
),
MCESEV(
AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
),
MCESEV(
SOME, "Action optional: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 87c56ba8080c..c614bd4de0f3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -540,6 +540,27 @@ static void mce_report_event(struct pt_regs *regs)
irq_work_queue(&__get_cpu_var(mce_irq_work));
}
+/*
+ * Read ADDR and MISC registers.
+ */
+static void mce_read_aux(struct mce *m, int i)
+{
+ if (m->status & MCI_STATUS_MISCV)
+ m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
+ if (m->status & MCI_STATUS_ADDRV) {
+ m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+
+ /*
+ * Mask the reported address by the reported granularity.
+ */
+ if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
+ u8 shift = MCI_MISC_ADDR_LSB(m->misc);
+ m->addr >>= shift;
+ m->addr <<= shift;
+ }
+ }
+}
+
DEFINE_PER_CPU(unsigned, mce_poll_count);
/*
@@ -590,10 +611,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
- if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
- if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ mce_read_aux(&m, i);
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
@@ -917,6 +935,49 @@ static void mce_clear_state(unsigned long *toclear)
}
/*
+ * Need to save faulting physical address associated with a process
+ * in the machine check handler some place where we can grab it back
+ * later in mce_notify_process()
+ */
+#define MCE_INFO_MAX 16
+
+struct mce_info {
+ atomic_t inuse;
+ struct task_struct *t;
+ __u64 paddr;
+} mce_info[MCE_INFO_MAX];
+
+static void mce_save_info(__u64 addr)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
+ if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
+ mi->t = current;
+ mi->paddr = addr;
+ return;
+ }
+ }
+
+ mce_panic("Too many concurrent recoverable errors", NULL, NULL);
+}
+
+static struct mce_info *mce_find_info(void)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
+ if (atomic_read(&mi->inuse) && mi->t == current)
+ return mi;
+ return NULL;
+}
+
+static void mce_clear_info(struct mce_info *mi)
+{
+ atomic_set(&mi->inuse, 0);
+}
+
+/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
*
@@ -969,7 +1030,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
barrier();
/*
- * When no restart IP must always kill or panic.
+ * When no restart IP might need to kill or panic.
+ * Assume the worst for now, but if we find the
+ * severity is MCE_AR_SEVERITY we have other options.
*/
if (!(m.mcgstatus & MCG_STATUS_RIPV))
kill_it = 1;
@@ -1023,16 +1086,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
continue;
}
- /*
- * Kill on action required.
- */
- if (severity == MCE_AR_SEVERITY)
- kill_it = 1;
-
- if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
- if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ mce_read_aux(&m, i);
/*
* Action optional error. Queue address for later processing.
@@ -1052,6 +1106,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
}
}
+ /* mce_clear_state will clear *final, save locally for use later */
+ m = *final;
+
if (!no_way_out)
mce_clear_state(toclear);
@@ -1063,27 +1120,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
no_way_out = worst >= MCE_PANIC_SEVERITY;
/*
- * If we have decided that we just CAN'T continue, and the user
- * has not set tolerant to an insane level, give up and die.
- *
- * This is mainly used in the case when the system doesn't
- * support MCE broadcasting or it has been disabled.
- */
- if (no_way_out && tolerant < 3)
- mce_panic("Fatal machine check on current CPU", final, msg);
-
- /*
- * If the error seems to be unrecoverable, something should be
- * done. Try to kill as little as possible. If we can kill just
- * one task, do that. If the user has set the tolerance very
- * high, don't try to do anything at all.
+ * At insane "tolerant" levels we take no action. Otherwise
+ * we only die if we have no other choice. For less serious
+ * issues we try to recover, or limit damage to the current
+ * process.
*/
-
- if (kill_it && tolerant < 3)
- force_sig(SIGBUS, current);
-
- /* notify userspace ASAP */
- set_thread_flag(TIF_MCE_NOTIFY);
+ if (tolerant < 3) {
+ if (no_way_out)
+ mce_panic("Fatal machine check on current CPU", &m, msg);
+ if (worst == MCE_AR_SEVERITY) {
+ /* schedule action before return to userland */
+ mce_save_info(m.addr);
+ set_thread_flag(TIF_MCE_NOTIFY);
+ } else if (kill_it) {
+ force_sig(SIGBUS, current);
+ }
+ }
if (worst > 0)
mce_report_event(regs);
@@ -1094,34 +1146,57 @@ out:
}
EXPORT_SYMBOL_GPL(do_machine_check);
-/* dummy to break dependency. actual code is in mm/memory-failure.c */
-void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
+#ifndef CONFIG_MEMORY_FAILURE
+int memory_failure(unsigned long pfn, int vector, int flags)
{
- printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
+ /* mce_severity() should not hand us an ACTION_REQUIRED error */
+ BUG_ON(flags & MF_ACTION_REQUIRED);
+ printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
+ "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
+
+ return 0;
}
+#endif
/*
- * Called after mce notification in process context. This code
- * is allowed to sleep. Call the high level VM handler to process
- * any corrupted pages.
- * Assume that the work queue code only calls this one at a time
- * per CPU.
- * Note we don't disable preemption, so this code might run on the wrong
- * CPU. In this case the event is picked up by the scheduled work queue.
- * This is merely a fast path to expedite processing in some common
- * cases.
+ * Called in process context that interrupted by MCE and marked with
+ * TIF_MCE_NOTIFY, just before returning to erroneous userland.
+ * This code is allowed to sleep.
+ * Attempt possible recovery such as calling the high level VM handler to
+ * process any corrupted pages, and kill/signal current process if required.
+ * Action required errors are handled here.
*/
void mce_notify_process(void)
{
unsigned long pfn;
- mce_notify_irq();
- while (mce_ring_get(&pfn))
- memory_failure(pfn, MCE_VECTOR);
+ struct mce_info *mi = mce_find_info();
+
+ if (!mi)
+ mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
+ pfn = mi->paddr >> PAGE_SHIFT;
+
+ clear_thread_flag(TIF_MCE_NOTIFY);
+
+ pr_err("Uncorrected hardware memory error in user-access at %llx",
+ mi->paddr);
+ if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
+ pr_err("Memory error not recovered");
+ force_sig(SIGBUS, current);
+ }
+ mce_clear_info(mi);
}
+/*
+ * Action optional processing happens here (picking up
+ * from the list of faulting pages that do_machine_check()
+ * placed into the "ring").
+ */
static void mce_process_work(struct work_struct *dummy)
{
- mce_notify_process();
+ unsigned long pfn;
+
+ while (mce_ring_get(&pfn))
+ memory_failure(pfn, MCE_VECTOR, 0);
}
#ifdef CONFIG_X86_MCE_INTEL
@@ -1211,8 +1286,6 @@ int mce_notify_irq(void)
/* Not more than two messages every minute */
static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
- clear_thread_flag(TIF_MCE_NOTIFY);
-
if (test_and_clear_bit(0, &mce_need_notify)) {
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index a4bf9d23cdba..99b57179f912 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -528,6 +528,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
sprintf(name, "threshold_bank%i", bank);
+#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
i = cpumask_first(cpu_llc_shared_mask(cpu));
@@ -553,6 +554,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
}
+#endif
b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
if (!b) {
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8944062f46e2..c30c807ddc72 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -147,7 +147,9 @@ struct cpu_hw_events {
/*
* AMD specific bits
*/
- struct amd_nb *amd_nb;
+ struct amd_nb *amd_nb;
+ /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
+ u64 perf_ctr_virt_mask;
void *kfree_on_online;
};
@@ -417,9 +419,11 @@ void x86_pmu_disable_all(void);
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
+ u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
+
if (hwc->extra_reg.reg)
wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
- wrmsrl(hwc->config_base, hwc->config | enable_mask);
+ wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
}
void x86_pmu_enable_all(int added);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 0397b23be8e9..67250a52430b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,4 +1,5 @@
#include <linux/perf_event.h>
+#include <linux/export.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -357,7 +358,9 @@ static void amd_pmu_cpu_starting(int cpu)
struct amd_nb *nb;
int i, nb_id;
- if (boot_cpu_data.x86_max_cores < 2)
+ cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+
+ if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15)
return;
nb_id = amd_get_nb_id(cpu);
@@ -587,9 +590,9 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
.put_event_constraints = amd_put_event_constraints,
.cpu_prepare = amd_pmu_cpu_prepare,
- .cpu_starting = amd_pmu_cpu_starting,
.cpu_dead = amd_pmu_cpu_dead,
#endif
+ .cpu_starting = amd_pmu_cpu_starting,
};
__init int amd_pmu_init(void)
@@ -621,3 +624,33 @@ __init int amd_pmu_init(void)
return 0;
}
+
+void amd_pmu_enable_virt(void)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ cpuc->perf_ctr_virt_mask = 0;
+
+ /* Reload all events */
+ x86_pmu_disable_all();
+ x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
+
+void amd_pmu_disable_virt(void)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ /*
+ * We only mask out the Host-only bit so that host-only counting works
+ * when SVM is disabled. If someone sets up a guest-only counter when
+ * SVM is disabled the Guest-only bits still gets set and the counter
+ * will not count anything.
+ */
+ cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+
+ /* Reload all events */
+ x86_pmu_disable_all();
+ x86_pmu_enable_all(0);
+}
+EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);