summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2008-07-25 15:49:02 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2008-07-25 15:49:02 +1000
commitaeb852cd5f44f865d679bd756c624c67716deef2 (patch)
tree14cc3ec6bc854777ae41e4c092f1475aae4ba1c0 /arch
parentc1fce54cb25c1c47a14e2cd16421482d413eb22e (diff)
parent996e75a47a1a54e6da02aea22fda1a73e99a8cbe (diff)
Merge commit 'kmemcheck/auto-kmemcheck-next'
Conflicts: kernel/sysctl.c
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig.debug109
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/stacktrace.c7
-rw-r--r--arch/x86/kernel/traps_32.c11
-rw-r--r--arch/x86/kernel/traps_64.c11
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c18
-rw-r--r--arch/x86/mm/kmemcheck/Makefile3
-rw-r--r--arch/x86/mm/kmemcheck/error.c216
-rw-r--r--arch/x86/mm/kmemcheck/error.h15
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c485
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c72
-rw-r--r--arch/x86/mm/kmemcheck/opcode.h9
-rw-r--r--arch/x86/mm/kmemcheck/pte.c22
-rw-r--r--arch/x86/mm/kmemcheck/pte.h10
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c176
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h16
-rw-r--r--arch/x86/mm/kmemcheck/smp.c80
-rw-r--r--arch/x86/mm/kmemcheck/smp.h23
-rw-r--r--arch/x86/mm/kmemcheck/string.c95
22 files changed, 1386 insertions, 12 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index ab46c9a701e5..1d43119d7bb2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -275,6 +275,115 @@ config DEFAULT_IO_DELAY_TYPE
default IO_DELAY_TYPE_NONE
endif
+menuconfig KMEMCHECK
+ bool "kmemcheck: trap use of uninitialized memory"
+ depends on X86
+ depends on !X86_USE_3DNOW
+ depends on SLUB || (SLAB && !DEBUG_SLAB)
+ depends on !CC_OPTIMIZE_FOR_SIZE
+ depends on !DEBUG_PAGEALLOC
+ select FRAME_POINTER
+ select STACKTRACE
+ default n
+ help
+ This option enables tracing of dynamically allocated kernel memory
+ to see if memory is used before it has been given an initial value.
+ Be aware that this requires half of your memory for bookkeeping and
+ will insert extra code at *every* read and write to tracked memory
+ thus slow down the kernel code (but user code is unaffected).
+
+ The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable
+ or enable kmemcheck at boot-time. If the kernel is started with
+ kmemcheck=0, the large memory and CPU overhead is not incurred.
+
+choice
+ prompt "kmemcheck: default mode at boot"
+ depends on KMEMCHECK
+ default KMEMCHECK_ONESHOT_BY_DEFAULT
+ help
+ This option controls the default behaviour of kmemcheck when the
+ kernel boots and no kmemcheck= parameter is given.
+
+config KMEMCHECK_DISABLED_BY_DEFAULT
+ bool "disabled"
+ depends on KMEMCHECK
+
+config KMEMCHECK_ENABLED_BY_DEFAULT
+ bool "enabled"
+ depends on KMEMCHECK
+
+config KMEMCHECK_ONESHOT_BY_DEFAULT
+ bool "one-shot"
+ depends on KMEMCHECK
+ help
+ In one-shot mode, only the first error detected is reported before
+ kmemcheck is disabled.
+
+endchoice
+
+config KMEMCHECK_USE_SMP
+ bool "kmemcheck: use multiple CPUs"
+ depends on KMEMCHECK
+ depends on SMP
+ depends on BROKEN
+ default n
+ help
+ This option will prevent kmemcheck from disabling all but one CPU
+ on boot. This means that whenever a page fault is taken, all the
+ other CPUs in the system are halted. This is potentially extremely
+ expensive, depending on the number of CPUs in the system (the more
+ the worse).
+
+ The upside is that kmemcheck can be compiled into the kernel with
+ very little overhead by default if kmemcheck is disabled at run-
+ time.
+
+ If you want to compile a kernel specifically for the purpose of
+ playing with kmemcheck, you should say N here. If you want a normal
+ kernel with the possibility of enabling kmemcheck without
+ recompiling, you should say Y here.
+
+config KMEMCHECK_QUEUE_SIZE
+ int "kmemcheck: error queue size"
+ depends on KMEMCHECK
+ default 64
+ help
+ Select the maximum number of errors to store in the queue. This
+ queue will be emptied once every second, so this is effectively a
+ limit on how many reports to print in one go. Note however, that
+ if the number of errors occuring between two bursts is larger than
+ this number, the extra error reports will get lost.
+
+config KMEMCHECK_SHADOW_COPY_SHIFT
+ int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)"
+ depends on KMEMCHECK
+ range 2 8
+ default 6
+ help
+ Select the number of shadow bytes to save along with each entry of
+ the queue. These bytes indicate what parts of an allocation are
+ initialized, uninitialized, etc. and will be displayed when an
+ error is detected to help the debugging of a particular problem.
+
+config KMEMCHECK_PARTIAL_OK
+ bool "kmemcheck: allow partially uninitialized memory"
+ depends on KMEMCHECK
+ default y
+ help
+ This option works around certain GCC optimizations that produce
+ 32-bit reads from 16-bit variables where the upper 16 bits are
+ thrown away afterwards. This may of course also hide some real
+ bugs.
+
+config KMEMCHECK_BITOPS_OK
+ bool "kmemcheck: allow bit-field manipulation"
+ depends on KMEMCHECK
+ default n
+ help
+ This option silences warnings that would be generated for bit-field
+ accesses where not all the bits are initialized at the same time.
+ This may also hide some real bugs.
+
config DEBUG_BOOT_PARAMS
bool "Debug boot parameters"
depends on DEBUG_KERNEL
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7fc4d5b0a6a0..472348922d62 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -50,7 +50,7 @@ void arch_task_cache_init(void)
task_xstate_cachep =
kmem_cache_create("task_xstate", xstate_size,
__alignof__(union thread_xstate),
- SLAB_PANIC, NULL);
+ SLAB_PANIC | SLAB_NOTRACK, NULL);
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 53bc653ed5ca..09cd686c8860 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -154,7 +154,7 @@ void cpu_idle(void)
}
}
-void __show_registers(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -215,7 +215,7 @@ void __show_registers(struct pt_regs *regs, int all)
void show_regs(struct pt_regs *regs)
{
- __show_registers(regs, 1);
+ __show_regs(regs, 1);
show_trace(NULL, regs, &regs->sp, regs->bp);
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 35a06ca4e4bc..9abe0279534a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -164,7 +164,7 @@ void cpu_idle(void)
}
/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs * regs)
+void __show_regs(struct pt_regs * regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -203,13 +203,17 @@ void __show_regs(struct pt_regs * regs)
rdmsrl(MSR_GS_BASE, gs);
rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+ fs,fsindex,gs,gsindex,shadowgs);
+
+ if (!all)
+ return;
+
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4();
- printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
- fs,fsindex,gs,gsindex,shadowgs);
printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
@@ -226,7 +230,7 @@ void __show_regs(struct pt_regs * regs)
void show_regs(struct pt_regs *regs)
{
printk("CPU %d:", smp_processor_id());
- __show_regs(regs);
+ __show_regs(regs, 1);
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..d1d850a8c3f5 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -76,6 +76,13 @@ void save_stack_trace(struct stack_trace *trace)
}
EXPORT_SYMBOL_GPL(save_stack_trace);
+void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
+{
+ dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index d87ad8ad395e..ff469b585ca9 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -59,6 +59,7 @@
#include <asm/smp.h>
#include <asm/io.h>
#include <asm/traps.h>
+#include <asm/kmemcheck.h>
#include "mach_traps.h"
@@ -311,7 +312,7 @@ void show_registers(struct pt_regs *regs)
int i;
print_modules();
- __show_registers(regs, 0);
+ __show_regs(regs, 0);
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
TASK_COMM_LEN, current->comm, task_pid_nr(current),
@@ -897,6 +898,14 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
get_debugreg(condition, 6);
+ /* Catch kmemcheck conditions first of all! */
+ if (condition & DR_STEP) {
+ if (kmemcheck_active(regs)) {
+ kmemcheck_hide(regs);
+ return;
+ }
+ }
+
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 8d961fcce9b8..cb436d8820a0 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -48,6 +48,7 @@
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/io.h>
+#include <asm/kmemcheck.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pda.h>
@@ -426,7 +427,7 @@ void show_registers(struct pt_regs *regs)
sp = regs->sp;
printk("CPU %d ", cpu);
- __show_regs(regs);
+ __show_regs(regs, 1);
printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
cur->comm, cur->pid, task_thread_info(cur), cur);
@@ -903,6 +904,14 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
get_debugreg(condition, 6);
+ /* Catch kmemcheck conditions first of all! */
+ if (condition & DR_STEP) {
+ if (kmemcheck_active(regs)) {
+ kmemcheck_hide(regs);
+ return;
+ }
+ }
+
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 1fbb844c3d7a..b371052cc976 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,6 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
+obj-$(CONFIG_KMEMCHECK) += kmemcheck/
+
obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
mmiotrace-y := pf_in.o mmio-mod.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 302e5feddfea..28623cd667be 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -35,6 +35,7 @@
#include <asm/smp.h>
#include <asm/tlbflush.h>
#include <asm/proto.h>
+#include <asm/kmemcheck.h>
#include <asm-generic/sections.h>
/*
@@ -610,6 +611,13 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
si_code = SEGV_MAPERR;
+ /*
+ * Detect and handle instructions that would cause a page fault for
+ * both a tracked kernel page and a userspace page.
+ */
+ if(kmemcheck_active(regs))
+ kmemcheck_hide(regs);
+
if (notify_page_fault(regs))
return;
if (unlikely(kmmio_fault(regs, address)))
@@ -633,9 +641,13 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
#else
if (unlikely(address >= TASK_SIZE64)) {
#endif
- if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
- vmalloc_fault(address) >= 0)
- return;
+ if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
+ if (vmalloc_fault(address) >= 0)
+ return;
+
+ if (kmemcheck_fault(regs, address, error_code))
+ return;
+ }
/* Can handle a stale RO->RW TLB */
if (spurious_fault(address, error_code))
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile
new file mode 100644
index 000000000000..f888b5c934be
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/Makefile
@@ -0,0 +1,3 @@
+obj-y := error.o kmemcheck.o opcode.o pte.o shadow.o string.o
+
+obj-$(CONFIG_KMEMCHECK_USE_SMP) += smp.o
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
new file mode 100644
index 000000000000..56410c63b465
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -0,0 +1,216 @@
+#include <linux/interrupt.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+
+#include "error.h"
+#include "shadow.h"
+
+enum kmemcheck_error_type {
+ KMEMCHECK_ERROR_INVALID_ACCESS,
+ KMEMCHECK_ERROR_BUG,
+};
+
+#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT)
+
+struct kmemcheck_error {
+ enum kmemcheck_error_type type;
+
+ union {
+ /* KMEMCHECK_ERROR_INVALID_ACCESS */
+ struct {
+ /* Kind of access that caused the error */
+ enum kmemcheck_shadow state;
+ /* Address and size of the erroneous read */
+ unsigned long address;
+ unsigned int size;
+ };
+ };
+
+ struct pt_regs regs;
+ struct stack_trace trace;
+ unsigned long trace_entries[32];
+
+ /* We compress it to a char. */
+ unsigned char shadow_copy[SHADOW_COPY_SIZE];
+};
+
+/*
+ * Create a ring queue of errors to output. We can't call printk() directly
+ * from the kmemcheck traps, since this may call the console drivers and
+ * result in a recursive fault.
+ */
+static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE];
+static unsigned int error_count;
+static unsigned int error_rd;
+static unsigned int error_wr;
+static unsigned int error_missed_count;
+
+static struct kmemcheck_error *error_next_wr(void)
+{
+ struct kmemcheck_error *e;
+
+ if (error_count == ARRAY_SIZE(error_fifo)) {
+ ++error_missed_count;
+ return NULL;
+ }
+
+ e = &error_fifo[error_wr];
+ if (++error_wr == ARRAY_SIZE(error_fifo))
+ error_wr = 0;
+ ++error_count;
+ return e;
+}
+
+static struct kmemcheck_error *error_next_rd(void)
+{
+ struct kmemcheck_error *e;
+
+ if (error_count == 0)
+ return NULL;
+
+ e = &error_fifo[error_rd];
+ if (++error_rd == ARRAY_SIZE(error_fifo))
+ error_rd = 0;
+ --error_count;
+ return e;
+}
+
+static void do_wakeup(unsigned long);
+static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0);
+
+/*
+ * Save the context of an error report.
+ */
+void kmemcheck_error_save(enum kmemcheck_shadow state,
+ unsigned long address, unsigned int size, struct pt_regs *regs)
+{
+ static unsigned long prev_ip;
+
+ struct kmemcheck_error *e;
+ enum shadow *shadow_copy;
+
+ /* Don't report several adjacent errors from the same EIP. */
+ if (regs->ip == prev_ip)
+ return;
+ prev_ip = regs->ip;
+
+ e = error_next_wr();
+ if (!e)
+ return;
+
+ e->type = KMEMCHECK_ERROR_INVALID_ACCESS;
+
+ e->state = state;
+ e->address = address;
+ e->size = size;
+
+ /* Save regs */
+ memcpy(&e->regs, regs, sizeof(*regs));
+
+ /* Save stack trace */
+ e->trace.nr_entries = 0;
+ e->trace.entries = e->trace_entries;
+ e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
+ e->trace.skip = 0;
+ save_stack_trace_bp(&e->trace, regs->bp);
+
+ /* Round address down to nearest 16 bytes */
+ shadow_copy = kmemcheck_shadow_lookup(address
+ & ~(SHADOW_COPY_SIZE - 1));
+ BUG_ON(!shadow_copy);
+
+ memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE);
+
+ tasklet_hi_schedule_first(&kmemcheck_tasklet);
+}
+
+/*
+ * Save the context of a kmemcheck bug.
+ */
+void kmemcheck_error_save_bug(struct pt_regs *regs)
+{
+ struct kmemcheck_error *e;
+
+ e = error_next_wr();
+ if (!e)
+ return;
+
+ e->type = KMEMCHECK_ERROR_BUG;
+
+ memcpy(&e->regs, regs, sizeof(*regs));
+
+ e->trace.nr_entries = 0;
+ e->trace.entries = e->trace_entries;
+ e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
+ e->trace.skip = 1;
+ save_stack_trace(&e->trace);
+
+ tasklet_hi_schedule_first(&kmemcheck_tasklet);
+}
+
+void kmemcheck_error_recall(void)
+{
+ static const char *desc[] = {
+ [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated",
+ [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized",
+ [KMEMCHECK_SHADOW_INITIALIZED] = "initialized",
+ [KMEMCHECK_SHADOW_FREED] = "freed",
+ };
+
+ static const char short_desc[] = {
+ [KMEMCHECK_SHADOW_UNALLOCATED] = 'a',
+ [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u',
+ [KMEMCHECK_SHADOW_INITIALIZED] = 'i',
+ [KMEMCHECK_SHADOW_FREED] = 'f',
+ };
+
+ struct kmemcheck_error *e;
+ unsigned int i;
+
+ e = error_next_rd();
+ if (!e)
+ return;
+
+ switch (e->type) {
+ case KMEMCHECK_ERROR_INVALID_ACCESS:
+ printk(KERN_ERR "kmemcheck: Caught %d-bit read "
+ "from %s memory (%p)\n",
+ e->size, e->state < ARRAY_SIZE(desc) ?
+ desc[e->state] : "(invalid shadow state)",
+ (void *) e->address);
+
+ printk(KERN_INFO);
+ for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
+ if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
+ printk("%c", short_desc[e->shadow_copy[i]]);
+ else
+ printk("?");
+ }
+ printk("\n");
+ printk(KERN_INFO "%*c\n",
+ 1 + (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
+ break;
+ case KMEMCHECK_ERROR_BUG:
+ printk(KERN_EMERG "kmemcheck: Fatal error\n");
+ break;
+ }
+
+ __show_regs(&e->regs, 1);
+ print_stack_trace(&e->trace, 0);
+}
+
+static void do_wakeup(unsigned long data)
+{
+ while (error_count > 0)
+ kmemcheck_error_recall();
+
+ if (error_missed_count > 0) {
+ printk(KERN_WARNING "kmemcheck: Lost %d error reports because "
+ "the queue was too small\n", error_missed_count);
+ error_missed_count = 0;
+ }
+}
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
new file mode 100644
index 000000000000..0efc2e8d0a20
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/error.h
@@ -0,0 +1,15 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H
+#define ARCH__X86__MM__KMEMCHECK__ERROR_H
+
+#include <linux/ptrace.h>
+
+#include "shadow.h"
+
+void kmemcheck_error_save(enum kmemcheck_shadow state,
+ unsigned long address, unsigned int size, struct pt_regs *regs);
+
+void kmemcheck_error_save_bug(struct pt_regs *regs);
+
+void kmemcheck_error_recall(void);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
new file mode 100644
index 000000000000..37949c3a5859
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -0,0 +1,485 @@
+/**
+ * kmemcheck - a heavyweight memory checker for the linux kernel
+ * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
+ * (With a lot of help from Ingo Molnar and Pekka Enberg.)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2) as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/page-flags.h>
+#include <linux/percpu.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/cacheflush.h>
+#include <asm/kmemcheck.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+#include "error.h"
+#include "opcode.h"
+#include "pte.h"
+#include "shadow.h"
+#include "smp.h"
+
+void __init kmemcheck_init(void)
+{
+ printk(KERN_INFO "kmemcheck: \"Bugs, beware!\"\n");
+
+ kmemcheck_smp_init();
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_KMEMCHECK_USE_SMP)
+ /*
+ * Limit SMP to use a single CPU. We rely on the fact that this code
+ * runs before SMP is set up.
+ */
+ if (setup_max_cpus > 1) {
+ printk(KERN_INFO
+ "kmemcheck: Limiting number of CPUs to 1.\n");
+ setup_max_cpus = 1;
+ }
+#endif
+}
+
+#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
+int kmemcheck_enabled = 0;
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
+int kmemcheck_enabled = 1;
+#endif
+
+#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
+int kmemcheck_enabled = 2;
+#endif
+
+/*
+ * We need to parse the kmemcheck= option before any memory is allocated.
+ */
+static int __init param_kmemcheck(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ sscanf(str, "%d", &kmemcheck_enabled);
+ return 0;
+}
+
+early_param("kmemcheck", param_kmemcheck);
+
+int kmemcheck_show_addr(unsigned long address)
+{
+ pte_t *pte;
+
+ pte = kmemcheck_pte_lookup(address);
+ if (!pte)
+ return 0;
+
+ set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+ __flush_tlb_one(address);
+ return 1;
+}
+
+int kmemcheck_hide_addr(unsigned long address)
+{
+ pte_t *pte;
+
+ pte = kmemcheck_pte_lookup(address);
+ if (!pte)
+ return 0;
+
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+ __flush_tlb_one(address);
+ return 1;
+}
+
+struct kmemcheck_context {
+ bool busy;
+ int balance;
+
+ unsigned long addr1;
+ unsigned long addr2;
+ unsigned long flags;
+};
+
+static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
+
+bool kmemcheck_active(struct pt_regs *regs)
+{
+ struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+ return data->balance > 0;
+}
+
+/*
+ * Called from the #PF handler.
+ */
+void kmemcheck_show(struct pt_regs *regs)
+{
+ struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+ int n;
+
+ BUG_ON(!irqs_disabled());
+
+ kmemcheck_pause_allbutself();
+
+ if (unlikely(data->balance != 0)) {
+ kmemcheck_show_addr(data->addr1);
+ kmemcheck_show_addr(data->addr2);
+ kmemcheck_error_save_bug(regs);
+ data->balance = 0;
+ kmemcheck_resume();
+ return;
+ }
+
+ n = 0;
+ n += kmemcheck_show_addr(data->addr1);
+ n += kmemcheck_show_addr(data->addr2);
+
+ /*
+ * None of the addresses actually belonged to kmemcheck. Note that
+ * this is not an error.
+ */
+ if (n == 0) {
+ kmemcheck_resume();
+ return;
+ }
+
+ ++data->balance;
+
+ /*
+ * The IF needs to be cleared as well, so that the faulting
+ * instruction can run "uninterrupted". Otherwise, we might take
+ * an interrupt and start executing that before we've had a chance
+ * to hide the page again.
+ *
+ * NOTE: In the rare case of multiple faults, we must not override
+ * the original flags:
+ */
+ if (!(regs->flags & X86_EFLAGS_TF))
+ data->flags = regs->flags;
+
+ regs->flags |= X86_EFLAGS_TF;
+ regs->flags &= ~X86_EFLAGS_IF;
+}
+
+/*
+ * Called from the #DB handler.
+ */
+void kmemcheck_hide(struct pt_regs *regs)
+{
+ struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+ int n;
+
+ BUG_ON(!irqs_disabled());
+
+ if (data->balance == 0) {
+ kmemcheck_resume();
+ return;
+ }
+
+ if (unlikely(data->balance != 1)) {
+ kmemcheck_show_addr(data->addr1);
+ kmemcheck_show_addr(data->addr2);
+ kmemcheck_error_save_bug(regs);
+ data->addr1 = 0;
+ data->addr2 = 0;
+ data->balance = 0;
+
+ if (!(data->flags & X86_EFLAGS_TF))
+ regs->flags &= ~X86_EFLAGS_TF;
+ if (data->flags & X86_EFLAGS_IF)
+ regs->flags |= X86_EFLAGS_IF;
+ kmemcheck_resume();
+ return;
+ }
+
+ n = 0;
+ if (kmemcheck_enabled) {
+ n += kmemcheck_hide_addr(data->addr1);
+ n += kmemcheck_hide_addr(data->addr2);
+ } else {
+ n += kmemcheck_show_addr(data->addr1);
+ n += kmemcheck_show_addr(data->addr2);
+ }
+
+ if (n == 0) {
+ kmemcheck_resume();
+ return;
+ }
+
+ --data->balance;
+
+ data->addr1 = 0;
+ data->addr2 = 0;
+
+ if (!(data->flags & X86_EFLAGS_TF))
+ regs->flags &= ~X86_EFLAGS_TF;
+ if (data->flags & X86_EFLAGS_IF)
+ regs->flags |= X86_EFLAGS_IF;
+ kmemcheck_resume();
+}
+
+void kmemcheck_show_pages(struct page *p, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; ++i) {
+ unsigned long address;
+ pte_t *pte;
+ unsigned int level;
+
+ address = (unsigned long) page_address(&p[i]);
+ pte = lookup_address(address, &level);
+ BUG_ON(!pte);
+ BUG_ON(level != PG_LEVEL_4K);
+
+ set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
+ __flush_tlb_one(address);
+ }
+}
+
+bool kmemcheck_page_is_tracked(struct page *p)
+{
+ /* This will also check the "hidden" flag of the PTE. */
+ return kmemcheck_pte_lookup((unsigned long) page_address(p));
+}
+
+void kmemcheck_hide_pages(struct page *p, unsigned int n)
+{
+ unsigned int i;
+
+ set_memory_4k((unsigned long) page_address(p), n);
+
+ for (i = 0; i < n; ++i) {
+ unsigned long address;
+ pte_t *pte;
+ unsigned int level;
+
+ address = (unsigned long) page_address(&p[i]);
+ pte = lookup_address(address, &level);
+ BUG_ON(!pte);
+ BUG_ON(level != PG_LEVEL_4K);
+
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+ set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
+ __flush_tlb_one(address);
+ }
+}
+
+/*
+ * Check that an access does not span across two different pages, because
+ * that will mess up our shadow lookup.
+ */
+static bool check_page_boundary(struct pt_regs *regs,
+ unsigned long addr, unsigned int size)
+{
+ if (size == 8)
+ return false;
+ if (size == 16 && (addr & PAGE_MASK) == ((addr + 1) & PAGE_MASK))
+ return false;
+ if (size == 32 && (addr & PAGE_MASK) == ((addr + 3) & PAGE_MASK))
+ return false;
+#ifdef CONFIG_X86_64
+ if (size == 64 && (addr & PAGE_MASK) == ((addr + 7) & PAGE_MASK))
+ return false;
+#endif
+
+ /*
+ * XXX: The addr/size data is also really interesting if this
+ * case ever triggers. We should make a separate class of errors
+ * for this case. -Vegard
+ */
+ kmemcheck_error_save_bug(regs);
+ return true;
+}
+
+static void kmemcheck_read(struct pt_regs *regs,
+ unsigned long address, unsigned int size)
+{
+ void *shadow;
+ enum kmemcheck_shadow status;
+
+ shadow = kmemcheck_shadow_lookup(address);
+ if (!shadow)
+ return;
+
+ if (check_page_boundary(regs, address, size))
+ return;
+
+ status = kmemcheck_shadow_test(shadow, size);
+ if (status == KMEMCHECK_SHADOW_INITIALIZED)
+ return;
+
+ if (kmemcheck_enabled)
+ kmemcheck_error_save(status, address, size, regs);
+
+ if (kmemcheck_enabled == 2)
+ kmemcheck_enabled = 0;
+
+ /* Don't warn about it again. */
+ kmemcheck_shadow_set(shadow, size);
+}
+
+static void kmemcheck_write(struct pt_regs *regs,
+ unsigned long address, unsigned int size)
+{
+ void *shadow;
+
+ shadow = kmemcheck_shadow_lookup(address);
+ if (!shadow)
+ return;
+
+ if (check_page_boundary(regs, address, size))
+ return;
+
+ kmemcheck_shadow_set(shadow, size);
+}
+
+enum kmemcheck_method {
+ KMEMCHECK_READ,
+ KMEMCHECK_WRITE,
+};
+
+static void kmemcheck_access(struct pt_regs *regs,
+ unsigned long fallback_address, enum kmemcheck_method fallback_method)
+{
+ const uint8_t *insn;
+ const uint8_t *insn_primary;
+ unsigned int size;
+
+ struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
+
+ /* Recursive fault -- ouch. */
+ if (data->busy) {
+ kmemcheck_show_addr(fallback_address);
+ kmemcheck_error_save_bug(regs);
+ return;
+ }
+
+ data->busy = true;
+
+ insn = (const uint8_t *) regs->ip;
+ insn_primary = kmemcheck_opcode_get_primary(insn);
+
+ size = kmemcheck_opcode_get_size(insn);
+
+ switch (insn_primary[0]) {
+#ifdef CONFIG_KMEMCHECK_BITOPS_OK
+ /* AND, OR, XOR */
+ /*
+ * Unfortunately, these instructions have to be excluded from
+ * our regular checking since they access only some (and not
+ * all) bits. This clears out "bogus" bitfield-access warnings.
+ */
+ case 0x80:
+ case 0x81:
+ case 0x82:
+ case 0x83:
+ switch ((insn_primary[1] >> 3) & 7) {
+ /* OR */
+ case 1:
+ /* AND */
+ case 4:
+ /* XOR */
+ case 6:
+ kmemcheck_write(regs, fallback_address, size);
+ data->addr1 = fallback_address;
+ data->addr2 = 0;
+ data->busy = false;
+ return;
+
+ /* ADD */
+ case 0:
+ /* ADC */
+ case 2:
+ /* SBB */
+ case 3:
+ /* SUB */
+ case 5:
+ /* CMP */
+ case 7:
+ break;
+ }
+ break;
+#endif
+
+ /* MOVS, MOVSB, MOVSW, MOVSD */
+ case 0xa4:
+ case 0xa5:
+ /*
+ * These instructions are special because they take two
+ * addresses, but we only get one page fault.
+ */
+ kmemcheck_read(regs, regs->si, size);
+ kmemcheck_write(regs, regs->di, size);
+ data->addr1 = regs->si;
+ data->addr2 = regs->di;
+ data->busy = false;
+ return;
+
+ /* CMPS, CMPSB, CMPSW, CMPSD */
+ case 0xa6:
+ case 0xa7:
+ kmemcheck_read(regs, regs->si, size);
+ kmemcheck_read(regs, regs->di, size);
+ data->addr1 = regs->si;
+ data->addr2 = regs->di;
+ data->busy = false;
+ return;
+ }
+
+ /*
+ * If the opcode isn't special in any way, we use the data from the
+ * page fault handler to determine the address and type of memory
+ * access.
+ */
+ switch (fallback_method) {
+ case KMEMCHECK_READ:
+ kmemcheck_read(regs, fallback_address, size);
+ data->addr1 = fallback_address;
+ data->addr2 = 0;
+ data->busy = false;
+ return;
+ case KMEMCHECK_WRITE:
+ kmemcheck_write(regs, fallback_address, size);
+ data->addr1 = fallback_address;
+ data->addr2 = 0;
+ data->busy = false;
+ return;
+ }
+}
+
+bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+{
+ pte_t *pte;
+ unsigned int level;
+
+ pte = lookup_address(address, &level);
+ if (!pte)
+ return false;
+ if (level != PG_LEVEL_4K)
+ return false;
+ if (!pte_hidden(*pte))
+ return false;
+
+ if (error_code & 2)
+ kmemcheck_access(regs, address, KMEMCHECK_WRITE);
+ else
+ kmemcheck_access(regs, address, KMEMCHECK_READ);
+
+ kmemcheck_show(regs);
+ return true;
+}
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
new file mode 100644
index 000000000000..194aeee366a9
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/opcode.c
@@ -0,0 +1,72 @@
+#include <linux/types.h>
+
+#include "opcode.h"
+
+static bool opcode_is_prefix(uint8_t b)
+{
+ return
+ /* Group 1 */
+ b == 0xf0 || b == 0xf2 || b == 0xf3
+ /* Group 2 */
+ || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
+ || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e
+ /* Group 3 */
+ || b == 0x66
+ /* Group 4 */
+ || b == 0x67;
+}
+
+static bool opcode_is_rex_prefix(uint8_t b)
+{
+ return (b & 0xf0) == 0x40;
+}
+
+/*
+ * This is a VERY crude opcode decoder. We only need to find the size of the
+ * load/store that caused our #PF and this should work for all the opcodes
+ * that we care about. Moreover, the ones who invented this instruction set
+ * should be shot.
+ */
+unsigned int kmemcheck_opcode_get_size(const uint8_t *op)
+{
+ /* Default operand size */
+ int operand_size_override = 32;
+
+ /* prefixes */
+ for (; opcode_is_prefix(*op); ++op) {
+ if (*op == 0x66)
+ operand_size_override = 16;
+ }
+
+#ifdef CONFIG_X86_64
+ /* REX prefix */
+ if (opcode_is_rex_prefix(*op)) {
+ if (*op & 0x08)
+ return 64;
+ ++op;
+ }
+#endif
+
+ /* escape opcode */
+ if (*op == 0x0f) {
+ ++op;
+
+ if (*op == 0xb6)
+ return 8;
+ if (*op == 0xb7)
+ return 16;
+ }
+
+ return (*op & 1) ? operand_size_override : 8;
+}
+
+const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op)
+{
+ /* skip prefixes */
+ while (opcode_is_prefix(*op))
+ ++op;
+ if (opcode_is_rex_prefix(*op))
+ ++op;
+ return op;
+}
+
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
new file mode 100644
index 000000000000..a19b8fa37660
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/opcode.h
@@ -0,0 +1,9 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H
+#define ARCH__X86__MM__KMEMCHECK__OPCODE_H
+
+#include <linux/types.h>
+
+unsigned int kmemcheck_opcode_get_size(const uint8_t *op);
+const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
new file mode 100644
index 000000000000..4ead26eeaf96
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/pte.c
@@ -0,0 +1,22 @@
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+
+#include "pte.h"
+
+pte_t *kmemcheck_pte_lookup(unsigned long address)
+{
+ pte_t *pte;
+ unsigned int level;
+
+ pte = lookup_address(address, &level);
+ if (!pte)
+ return NULL;
+ if (level != PG_LEVEL_4K)
+ return NULL;
+ if (!pte_hidden(*pte))
+ return NULL;
+
+ return pte;
+}
+
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
new file mode 100644
index 000000000000..9f5966456492
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/pte.h
@@ -0,0 +1,10 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H
+#define ARCH__X86__MM__KMEMCHECK__PTE_H
+
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+
+pte_t *kmemcheck_pte_lookup(unsigned long address);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
new file mode 100644
index 000000000000..9e3795e2b26a
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -0,0 +1,176 @@
+#include <linux/kmemcheck.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#include "pte.h"
+#include "shadow.h"
+
+/*
+ * Return the shadow address for the given address. Returns NULL if the
+ * address is not tracked.
+ *
+ * We need to be extremely careful not to follow any invalid pointers,
+ * because this function can be called for *any* possible address.
+ */
+void *kmemcheck_shadow_lookup(unsigned long address)
+{
+ pte_t *pte;
+ struct page *page;
+
+ if (!virt_addr_valid(address))
+ return NULL;
+
+ pte = kmemcheck_pte_lookup(address);
+ if (!pte)
+ return NULL;
+
+ page = virt_to_page(address);
+ if (!page->shadow)
+ return NULL;
+ return page->shadow + (address & (PAGE_SIZE - 1));
+}
+
+static void mark_shadow(void *address, unsigned int n,
+ enum kmemcheck_shadow status)
+{
+ void *shadow;
+
+ shadow = kmemcheck_shadow_lookup((unsigned long) address);
+ if (!shadow)
+ return;
+ __memset(shadow, status, n);
+}
+
+void kmemcheck_mark_unallocated(void *address, unsigned int n)
+{
+ mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED);
+}
+
+void kmemcheck_mark_uninitialized(void *address, unsigned int n)
+{
+ mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED);
+}
+
+/*
+ * Fill the shadow memory of the given address such that the memory at that
+ * address is marked as being initialized.
+ */
+void kmemcheck_mark_initialized(void *address, unsigned int n)
+{
+ mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED);
+}
+EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized);
+
+void kmemcheck_mark_freed(void *address, unsigned int n)
+{
+ mark_shadow(address, n, KMEMCHECK_SHADOW_FREED);
+}
+
+void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; ++i)
+ kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE);
+}
+
+void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; ++i)
+ kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE);
+}
+
+enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
+{
+ uint8_t *x;
+
+ x = shadow;
+
+#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
+ /*
+ * Make sure _some_ bytes are initialized. Gcc frequently generates
+ * code to access neighboring bytes.
+ */
+ switch (size) {
+#ifdef CONFIG_X86_64
+ case 64:
+ if (x[7] == KMEMCHECK_SHADOW_INITIALIZED)
+ return x[7];
+ if (x[6] == KMEMCHECK_SHADOW_INITIALIZED)
+ return x[6];
+ if (x[5] == KMEMCHECK_SHADOW_INITIALIZED)
+ return x[5];
+ if (x[4] == KMEMCHECK_SHADOW_INITIALIZED)
+ return x[4];
+#endif
+ case 32:
+ if (x[3] == KMEMCHECK_SHADOW_INITIALIZED)
+ return x[3];
+ if (x[2] == KMEMCHECK_SHADOW_INITIALIZED)
+ return x[2];
+ case 16:
+ if (x[1] == KMEMCHECK_SHADOW_INITIALIZED)
+ return x[1];
+ case 8:
+ if (x[0] == KMEMCHECK_SHADOW_INITIALIZED)
+ return x[0];
+ }
+#else
+ switch (size) {
+#ifdef CONFIG_X86_64
+ case 64:
+ if (x[7] != KMEMCHECK_SHADOW_INITIALIZED)
+ return x[7];
+ if (x[6] != KMEMCHECK_SHADOW_INITIALIZED)
+ return x[6];
+ if (x[5] != KMEMCHECK_SHADOW_INITIALIZED)
+ return x[5];
+ if (x[4] != KMEMCHECK_SHADOW_INITIALIZED)
+ return x[4];
+#endif
+ case 32:
+ if (x[3] != KMEMCHECK_SHADOW_INITIALIZED)
+ return x[3];
+ if (x[2] != KMEMCHECK_SHADOW_INITIALIZED)
+ return x[2];
+ case 16:
+ if (x[1] != KMEMCHECK_SHADOW_INITIALIZED)
+ return x[1];
+ case 8:
+ if (x[0] != KMEMCHECK_SHADOW_INITIALIZED)
+ return x[0];
+ }
+#endif
+
+ return x[0];
+}
+
+void kmemcheck_shadow_set(void *shadow, unsigned int size)
+{
+ uint8_t *x;
+
+ x = shadow;
+
+ switch (size) {
+#ifdef CONFIG_X86_64
+ case 64:
+ x[7] = KMEMCHECK_SHADOW_INITIALIZED;
+ x[6] = KMEMCHECK_SHADOW_INITIALIZED;
+ x[5] = KMEMCHECK_SHADOW_INITIALIZED;
+ x[4] = KMEMCHECK_SHADOW_INITIALIZED;
+#endif
+ case 32:
+ x[3] = KMEMCHECK_SHADOW_INITIALIZED;
+ x[2] = KMEMCHECK_SHADOW_INITIALIZED;
+ case 16:
+ x[1] = KMEMCHECK_SHADOW_INITIALIZED;
+ case 8:
+ x[0] = KMEMCHECK_SHADOW_INITIALIZED;
+ }
+
+ return;
+}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
new file mode 100644
index 000000000000..af46d9ab9d86
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -0,0 +1,16 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H
+#define ARCH__X86__MM__KMEMCHECK__SHADOW_H
+
+enum kmemcheck_shadow {
+ KMEMCHECK_SHADOW_UNALLOCATED,
+ KMEMCHECK_SHADOW_UNINITIALIZED,
+ KMEMCHECK_SHADOW_INITIALIZED,
+ KMEMCHECK_SHADOW_FREED,
+};
+
+void *kmemcheck_shadow_lookup(unsigned long address);
+
+enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
+void kmemcheck_shadow_set(void *shadow, unsigned int size);
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/smp.c b/arch/x86/mm/kmemcheck/smp.c
new file mode 100644
index 000000000000..62b604992c63
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/smp.c
@@ -0,0 +1,80 @@
+#include <linux/kdebug.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+
+#include <mach_ipi.h>
+
+#include "smp.h"
+#include <asm/irq_vectors.h>
+
+static DEFINE_SPINLOCK(nmi_spinlock);
+
+static atomic_t nmi_wait;
+static atomic_t nmi_resume;
+static atomic_t paused;
+
+static int nmi_notifier(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ if (val != DIE_NMI_IPI || !atomic_read(&nmi_wait))
+ return NOTIFY_DONE;
+
+ atomic_inc(&paused);
+
+ /* Pause until the fault has been handled */
+ while (!atomic_read(&nmi_resume))
+ cpu_relax();
+
+ atomic_dec(&paused);
+
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block nmi_nb = {
+ .notifier_call = &nmi_notifier,
+};
+
+void kmemcheck_smp_init(void)
+{
+ int err;
+
+ err = register_die_notifier(&nmi_nb);
+ BUG_ON(err);
+}
+
+void kmemcheck_pause_allbutself(void)
+{
+ int cpus;
+ cpumask_t mask = cpu_online_map;
+
+ spin_lock(&nmi_spinlock);
+
+ cpus = num_online_cpus() - 1;
+
+ atomic_set(&paused, 0);
+ atomic_set(&nmi_wait, 1);
+ atomic_set(&nmi_resume, 0);
+
+ cpu_clear(safe_smp_processor_id(), mask);
+ if (!cpus_empty(mask))
+ send_IPI_mask(mask, NMI_VECTOR);
+
+ while (atomic_read(&paused) != cpus)
+ cpu_relax();
+
+ atomic_set(&nmi_wait, 0);
+}
+
+void kmemcheck_resume(void)
+{
+ int cpus;
+
+ cpus = num_online_cpus() - 1;
+
+ atomic_set(&nmi_resume, 1);
+
+ while (atomic_read(&paused) != 0)
+ cpu_relax();
+
+ spin_unlock(&nmi_spinlock);
+}
diff --git a/arch/x86/mm/kmemcheck/smp.h b/arch/x86/mm/kmemcheck/smp.h
new file mode 100644
index 000000000000..dc65f16e3ac6
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/smp.h
@@ -0,0 +1,23 @@
+#ifndef ARCH__X86__MM__KMEMCHECK__SMP_H
+#define ARCH__X86__MM__KMEMCHECK__SMP_H
+
+#ifdef CONFIG_KMEMCHECK_USE_SMP
+void kmemcheck_smp_init(void);
+
+void kmemcheck_pause_allbutself(void);
+void kmemcheck_resume(void);
+#else
+static inline void kmemcheck_smp_init(void)
+{
+}
+
+static inline void kmemcheck_pause_allbutself(void)
+{
+}
+
+static inline void kmemcheck_resume(void)
+{
+}
+#endif
+
+#endif
diff --git a/arch/x86/mm/kmemcheck/string.c b/arch/x86/mm/kmemcheck/string.c
new file mode 100644
index 000000000000..1a62bf0479fa
--- /dev/null
+++ b/arch/x86/mm/kmemcheck/string.c
@@ -0,0 +1,95 @@
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "shadow.h"
+#include "smp.h"
+
+/*
+ * A faster implementation of memset() when tracking is enabled where the
+ * whole memory area is within a single page.
+ */
+static void memset_one_page(void *s, int c, size_t n)
+{
+ unsigned long addr;
+ void *x;
+ unsigned long flags;
+
+ addr = (unsigned long) s;
+
+ x = kmemcheck_shadow_lookup(addr);
+ if (!x) {
+ /* The page isn't being tracked. */
+ __memset(s, c, n);
+ return;
+ }
+
+ /*
+ * While we are not guarding the page in question, nobody else
+ * should be able to change them.
+ */
+ local_irq_save(flags);
+
+ kmemcheck_pause_allbutself();
+ kmemcheck_show_addr(addr);
+ __memset(s, c, n);
+ __memset(x, KMEMCHECK_SHADOW_INITIALIZED, n);
+ if (kmemcheck_enabled)
+ kmemcheck_hide_addr(addr);
+ kmemcheck_resume();
+
+ local_irq_restore(flags);
+}
+
+/*
+ * A faster implementation of memset() when tracking is enabled. We cannot
+ * assume that all pages within the range are tracked, so copying has to be
+ * split into page-sized (or smaller, for the ends) chunks.
+ */
+void *kmemcheck_memset(void *s, int c, size_t n)
+{
+ unsigned long addr;
+ unsigned long start_page, start_offset;
+ unsigned long end_page, end_offset;
+ unsigned long i;
+
+ if (!n)
+ return s;
+
+ if (!slab_is_available()) {
+ __memset(s, c, n);
+ return s;
+ }
+
+ addr = (unsigned long) s;
+
+ start_page = addr & PAGE_MASK;
+ end_page = (addr + n) & PAGE_MASK;
+
+ if (start_page == end_page) {
+ /*
+ * The entire area is within the same page. Good, we only
+ * need one memset().
+ */
+ memset_one_page(s, c, n);
+ return s;
+ }
+
+ start_offset = addr & ~PAGE_MASK;
+ end_offset = (addr + n) & ~PAGE_MASK;
+
+ /* Clear the head, body, and tail of the memory area. */
+ if (start_offset < PAGE_SIZE)
+ memset_one_page(s, c, PAGE_SIZE - start_offset);
+ for (i = start_page + PAGE_SIZE; i < end_page; i += PAGE_SIZE)
+ memset_one_page((void *) i, c, PAGE_SIZE);
+ if (end_offset > 0)
+ memset_one_page((void *) end_page, c, end_offset);
+
+ return s;
+}
+
+EXPORT_SYMBOL(kmemcheck_memset);