summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig67
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/video-vesa.c7
-rw-r--r--arch/x86/boot/video-vga.c10
-rw-r--r--arch/x86/boot/video.c5
-rw-r--r--arch/x86/boot/video.h20
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/ia32/sys_ia32.c14
-rw-r--r--arch/x86/include/asm/apic.h7
-rw-r--r--arch/x86/include/asm/current.h2
-rw-r--r--arch/x86/include/asm/desc.h13
-rw-r--r--arch/x86/include/asm/desc_defs.h6
-rw-r--r--arch/x86/include/asm/dma-mapping.h18
-rw-r--r--arch/x86/include/asm/entry_arch.h2
-rw-r--r--arch/x86/include/asm/i387.h1
-rw-r--r--arch/x86/include/asm/ioctls.h95
-rw-r--r--arch/x86/include/asm/ipcbuf.h29
-rw-r--r--arch/x86/include/asm/lguest.h5
-rw-r--r--arch/x86/include/asm/mce.h20
-rw-r--r--arch/x86/include/asm/mman.h14
-rw-r--r--arch/x86/include/asm/module.h13
-rw-r--r--arch/x86/include/asm/msgbuf.h40
-rw-r--r--arch/x86/include/asm/msr-index.h11
-rw-r--r--arch/x86/include/asm/nmi.h4
-rw-r--r--arch/x86/include/asm/param.h23
-rw-r--r--arch/x86/include/asm/paravirt.h711
-rw-r--r--arch/x86/include/asm/paravirt_types.h720
-rw-r--r--arch/x86/include/asm/percpu.h26
-rw-r--r--arch/x86/include/asm/perf_counter.h10
-rw-r--r--arch/x86/include/asm/pgtable.h16
-rw-r--r--arch/x86/include/asm/scatterlist.h27
-rw-r--r--arch/x86/include/asm/shmbuf.h52
-rw-r--r--arch/x86/include/asm/socket.h64
-rw-r--r--arch/x86/include/asm/sockios.h14
-rw-r--r--arch/x86/include/asm/stackprotector.h6
-rw-r--r--arch/x86/include/asm/system.h27
-rw-r--r--arch/x86/include/asm/termbits.h199
-rw-r--r--arch/x86/include/asm/termios.h115
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/include/asm/types.h12
-rw-r--r--arch/x86/include/asm/ucontext.h8
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/apic/apic.c84
-rw-r--r--arch/x86/kernel/apic/io_apic.c195
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/nmi.c22
-rw-r--r--arch/x86/kernel/apic/probe_64.c6
-rw-r--r--arch/x86/kernel/apm_32.c18
-rw-r--r--arch/x86/kernel/cpu/amd.c37
-rw-r--r--arch/x86/kernel/cpu/bugs.c10
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c2
-rw-r--r--arch/x86/kernel/cpu/common.c68
-rw-r--r--arch/x86/kernel/cpu/cyrix.c19
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c5
-rw-r--r--arch/x86/kernel/cpu/intel.c11
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c116
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile5
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c116
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c224
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c163
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c127
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c13
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c97
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c168
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c390
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c94
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c304
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c135
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c455
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h19
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c68
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c325
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c45
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/cpu/vmware.c18
-rw-r--r--arch/x86/kernel/doublefault_32.c4
-rw-r--r--arch/x86/kernel/ds.c6
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/ftrace.c4
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c5
-rw-r--r--arch/x86/kernel/pci-nommu.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c25
-rw-r--r--arch/x86/kernel/process_32.c30
-rw-r--r--arch/x86/kernel/process_64.c36
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/step.c9
-rw-r--r--arch/x86/kernel/traps.c54
-rw-r--r--arch/x86/mm/fault.c51
-rw-r--r--arch/x86/mm/highmem_32.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c404
-rw-r--r--arch/x86/oprofile/op_counter.h2
-rw-r--r--arch/x86/oprofile/op_model_amd.c372
-rw-r--r--arch/x86/oprofile/op_model_p4.c72
-rw-r--r--arch/x86/oprofile/op_model_ppro.c101
-rw-r--r--arch/x86/oprofile/op_x86_model.h59
103 files changed, 3204 insertions, 3978 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..cd2788c78a8a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -777,41 +777,17 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
increased on these systems.
config X86_MCE
- bool "Machine Check Exception"
+ bool "Machine Check / overheating reporting"
---help---
- Machine Check Exception support allows the processor to notify the
- kernel if it detects a problem (e.g. overheating, component failure).
+ Machine Check support allows the processor to notify the
+ kernel if it detects a problem (e.g. overheating, data corruption).
The action the kernel takes depends on the severity of the problem,
- ranging from a warning message on the console, to halting the machine.
- Your processor must be a Pentium or newer to support this - check the
- flags in /proc/cpuinfo for mce. Note that some older Pentium systems
- have a design flaw which leads to false MCE events - hence MCE is
- disabled on all P5 processors, unless explicitly enabled with "mce"
- as a boot argument. Similarly, if MCE is built in and creates a
- problem on some new non-standard machine, you can boot with "nomce"
- to disable it. MCE support simply ignores non-MCE processors like
- the 386 and 486, so nearly everyone can say Y here.
-
-config X86_OLD_MCE
- depends on X86_32 && X86_MCE
- bool "Use legacy machine check code (will go away)"
- default n
- select X86_ANCIENT_MCE
- ---help---
- Use the old i386 machine check code. This is merely intended for
- testing in a transition period. Try this if you run into any machine
- check related software problems, but report the problem to
- linux-kernel. When in doubt say no.
-
-config X86_NEW_MCE
- depends on X86_MCE
- bool
- default y if (!X86_OLD_MCE && X86_32) || X86_64
+ ranging from warning messages to halting the machine.
config X86_MCE_INTEL
def_bool y
prompt "Intel MCE features"
- depends on X86_NEW_MCE && X86_LOCAL_APIC
+ depends on X86_MCE && X86_LOCAL_APIC
---help---
Additional support for intel specific MCE features such as
the thermal monitor.
@@ -819,14 +795,14 @@ config X86_MCE_INTEL
config X86_MCE_AMD
def_bool y
prompt "AMD MCE features"
- depends on X86_NEW_MCE && X86_LOCAL_APIC
+ depends on X86_MCE && X86_LOCAL_APIC
---help---
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.
config X86_ANCIENT_MCE
def_bool n
- depends on X86_32
+ depends on X86_32 && X86_MCE
prompt "Support for old Pentium 5 / WinChip machine checks"
---help---
Include support for machine check handling on old Pentium 5 or WinChip
@@ -839,36 +815,16 @@ config X86_MCE_THRESHOLD
default y
config X86_MCE_INJECT
- depends on X86_NEW_MCE
+ depends on X86_MCE
tristate "Machine check injector support"
---help---
Provide support for injecting machine checks for testing purposes.
If you don't know what a machine check is and you don't do kernel
QA it is safe to say n.
-config X86_MCE_NONFATAL
- tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
- depends on X86_OLD_MCE
- ---help---
- Enabling this feature starts a timer that triggers every 5 seconds which
- will look at the machine check registers to see if anything happened.
- Non-fatal problems automatically get corrected (but still logged).
- Disable this if you don't want to see these messages.
- Seeing the messages this option prints out may be indicative of dying
- or out-of-spec (ie, overclocked) hardware.
- This option only does something on certain CPUs.
- (AMD Athlon/Duron and Intel Pentium 4)
-
-config X86_MCE_P4THERMAL
- bool "check for P4 thermal throttling interrupt."
- depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
- ---help---
- Enabling this feature will cause a message to be printed when the P4
- enters thermal throttling.
-
config X86_THERMAL_VECTOR
def_bool y
- depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
+ depends on X86_MCE_INTEL
config VM86
bool "Enable VM86 support" if EMBEDDED
@@ -1247,6 +1203,11 @@ config ARCH_MEMORY_PROBE
def_bool X86_64
depends on MEMORY_HOTPLUG
+config ILLEGAL_POINTER_VALUE
+ hex
+ default 0 if X86_32
+ default 0xdead000000000000 if X86_64
+
source "mm/Kconfig"
config HIGHPTE
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1b68659c41b4..1f3851a626d3 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -32,8 +32,8 @@ ifeq ($(CONFIG_X86_32),y)
# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
# a lot more stack due to the lack of sharing of stacklots:
- KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \
- echo $(call cc-option,-fno-unit-at-a-time); fi ;)
+ KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
+ $(call cc-option,-fno-unit-at-a-time))
# CPU-specific tuning. Anything which can be shared with UML should go here.
include $(srctree)/arch/x86/Makefile_32.cpu
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 275dd177f198..11e8c6eb80a1 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -31,7 +31,6 @@ static inline void vesa_store_mode_params_graphics(void) {}
static int vesa_probe(void)
{
-#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID)
struct biosregs ireg, oreg;
u16 mode;
addr_t mode_ptr;
@@ -49,8 +48,7 @@ static int vesa_probe(void)
vginfo.signature != VESA_MAGIC ||
vginfo.version < 0x0102)
return 0; /* Not present */
-#endif /* CONFIG_VIDEO_VESA || CONFIG_FIRMWARE_EDID */
-#ifdef CONFIG_VIDEO_VESA
+
set_fs(vginfo.video_mode_ptr.seg);
mode_ptr = vginfo.video_mode_ptr.off;
@@ -102,9 +100,6 @@ static int vesa_probe(void)
}
return nmodes;
-#else
- return 0;
-#endif /* CONFIG_VIDEO_VESA */
}
static int vesa_set_mode(struct mode_info *mode)
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 8f8d827e254d..819caa1f2008 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -47,14 +47,6 @@ static u8 vga_set_basic_mode(void)
initregs(&ireg);
-#ifdef CONFIG_VIDEO_400_HACK
- if (adapter >= ADAPTER_VGA) {
- ireg.ax = 0x1202;
- ireg.bx = 0x0030;
- intcall(0x10, &ireg, NULL);
- }
-#endif
-
ax = 0x0f00;
intcall(0x10, &ireg, &oreg);
mode = oreg.al;
@@ -62,11 +54,9 @@ static u8 vga_set_basic_mode(void)
set_fs(0);
rows = rdfs8(0x484); /* rows minus one */
-#ifndef CONFIG_VIDEO_400_HACK
if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
(rows == 0 || rows == 24))
return mode;
-#endif
if (mode != 3 && mode != 7)
mode = 3;
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index bad728b76fc2..d42da3802499 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -221,7 +221,6 @@ static unsigned int mode_menu(void)
}
}
-#ifdef CONFIG_VIDEO_RETAIN
/* Save screen content to the heap */
static struct saved_screen {
int x, y;
@@ -299,10 +298,6 @@ static void restore_screen(void)
ireg.dl = saved.curx;
intcall(0x10, &ireg, NULL);
}
-#else
-#define save_screen() ((void)0)
-#define restore_screen() ((void)0)
-#endif
void set_video(void)
{
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index 5bb174a997fc..ff339c5db311 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -17,19 +17,8 @@
#include <linux/types.h>
-/* Enable autodetection of SVGA adapters and modes. */
-#undef CONFIG_VIDEO_SVGA
-
-/* Enable autodetection of VESA modes */
-#define CONFIG_VIDEO_VESA
-
-/* Retain screen contents when switching modes */
-#define CONFIG_VIDEO_RETAIN
-
-/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
-#undef CONFIG_VIDEO_400_HACK
-
-/* This code uses an extended set of video mode numbers. These include:
+/*
+ * This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
* NORMAL_VGA (-1)
* EXTENDED_VGA (-2)
@@ -67,13 +56,8 @@
/* The "recalculate timings" flag */
#define VIDEO_RECALC 0x8000
-/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
-#ifdef CONFIG_VIDEO_RETAIN
void store_screen(void);
#define DO_STORE() store_screen()
-#else
-#define DO_STORE() ((void)0)
-#endif /* CONFIG_VIDEO_RETAIN */
/*
* Mode table structures
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e590261ba059..ba331bfd1112 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -537,7 +537,7 @@ ia32_sys_call_table:
.quad sys_mkdir
.quad sys_rmdir /* 40 */
.quad sys_dup
- .quad sys32_pipe
+ .quad sys_pipe
.quad compat_sys_times
.quad quiet_ni_syscall /* old prof syscall holder */
.quad sys_brk /* 45 */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 085a8c35f149..9f5527198825 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -189,20 +189,6 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len,
return sys_mprotect(start, len, prot);
}
-asmlinkage long sys32_pipe(int __user *fd)
-{
- int retval;
- int fds[2];
-
- retval = do_pipe_flags(fds, 0);
- if (retval)
- goto out;
- if (copy_to_user(fd, fds, sizeof(fds)))
- retval = -EFAULT;
-out:
- return retval;
-}
-
asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
struct sigaction32 __user *oact,
unsigned int sigsetsize)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index bb7d47925847..586b7adb8e53 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -183,6 +183,10 @@ static inline int x2apic_enabled(void)
}
#define x2apic_supported() (cpu_has_x2apic)
+static inline void x2apic_force_phys(void)
+{
+ x2apic_phys = 1;
+}
#else
static inline void check_x2apic(void)
{
@@ -194,6 +198,9 @@ static inline int x2apic_enabled(void)
{
return 0;
}
+static inline void x2apic_force_phys(void)
+{
+}
#define x2apic_preenabled 0
#define x2apic_supported() 0
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index c68c361697e1..4d447b732d82 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
static __always_inline struct task_struct *get_current(void)
{
- return percpu_read(current_task);
+ return percpu_read_stable(current_task);
}
#define current get_current()
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index c993e9e0fed4..e8de2f6f5ca5 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -291,11 +291,24 @@ static inline unsigned long get_desc_base(const struct desc_struct *desc)
return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
}
+static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
+{
+ desc->base0 = base & 0xffff;
+ desc->base1 = (base >> 16) & 0xff;
+ desc->base2 = (base >> 24) & 0xff;
+}
+
static inline unsigned long get_desc_limit(const struct desc_struct *desc)
{
return desc->limit0 | (desc->limit << 16);
}
+static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
+{
+ desc->limit0 = limit & 0xffff;
+ desc->limit = (limit >> 16) & 0xf;
+}
+
static inline void _set_gate(int gate, unsigned type, void *addr,
unsigned dpl, unsigned ist, unsigned seg)
{
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index a6adefa28b94..9d6684849fd9 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -34,6 +34,12 @@ struct desc_struct {
};
} __attribute__((packed));
+#define GDT_ENTRY_INIT(flags, base, limit) { { { \
+ .a = ((limit) & 0xffff) | (((base) & 0xffff) << 16), \
+ .b = (((base) & 0xff0000) >> 16) | (((flags) & 0xf0ff) << 8) | \
+ ((limit) & 0xf0000) | ((base) & 0xff000000), \
+ } } }
+
enum {
GATE_INTERRUPT = 0xE,
GATE_TRAP = 0xF,
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 1c3f9435f1c9..0ee770d23d0e 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -55,6 +55,24 @@ extern int dma_set_mask(struct device *dev, u64 mask);
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag);
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+ if (!dev->dma_mask)
+ return 0;
+
+ return addr + size <= *dev->dma_mask;
+}
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return paddr;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return daddr;
+}
+
static inline void
dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction dir)
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index ff8cbfa07851..5e3f2044f0d3 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -61,7 +61,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
#endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
#endif
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 175adf58dd4f..2e7529295f55 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -26,6 +26,7 @@ extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child);
extern asmlinkage void math_state_restore(void);
+extern void __math_state_restore(void);
extern void init_thread_xstate(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/asm/ioctls.h
index 0d5b23b7b06e..ec34c760665e 100644
--- a/arch/x86/include/asm/ioctls.h
+++ b/arch/x86/include/asm/ioctls.h
@@ -1,94 +1 @@
-#ifndef _ASM_X86_IOCTLS_H
-#define _ASM_X86_IOCTLS_H
-
-#include <asm/ioctl.h>
-
-/* 0x54 is just a magic number to make these relatively unique ('T') */
-
-#define TCGETS 0x5401
-#define TCSETS 0x5402 /* Clashes with SNDCTL_TMR_START sound ioctl */
-#define TCSETSW 0x5403
-#define TCSETSF 0x5404
-#define TCGETA 0x5405
-#define TCSETA 0x5406
-#define TCSETAW 0x5407
-#define TCSETAF 0x5408
-#define TCSBRK 0x5409
-#define TCXONC 0x540A
-#define TCFLSH 0x540B
-#define TIOCEXCL 0x540C
-#define TIOCNXCL 0x540D
-#define TIOCSCTTY 0x540E
-#define TIOCGPGRP 0x540F
-#define TIOCSPGRP 0x5410
-#define TIOCOUTQ 0x5411
-#define TIOCSTI 0x5412
-#define TIOCGWINSZ 0x5413
-#define TIOCSWINSZ 0x5414
-#define TIOCMGET 0x5415
-#define TIOCMBIS 0x5416
-#define TIOCMBIC 0x5417
-#define TIOCMSET 0x5418
-#define TIOCGSOFTCAR 0x5419
-#define TIOCSSOFTCAR 0x541A
-#define FIONREAD 0x541B
-#define TIOCINQ FIONREAD
-#define TIOCLINUX 0x541C
-#define TIOCCONS 0x541D
-#define TIOCGSERIAL 0x541E
-#define TIOCSSERIAL 0x541F
-#define TIOCPKT 0x5420
-#define FIONBIO 0x5421
-#define TIOCNOTTY 0x5422
-#define TIOCSETD 0x5423
-#define TIOCGETD 0x5424
-#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */
-/* #define TIOCTTYGSTRUCT 0x5426 - Former debugging-only ioctl */
-#define TIOCSBRK 0x5427 /* BSD compatibility */
-#define TIOCCBRK 0x5428 /* BSD compatibility */
-#define TIOCGSID 0x5429 /* Return the session ID of FD */
-#define TCGETS2 _IOR('T', 0x2A, struct termios2)
-#define TCSETS2 _IOW('T', 0x2B, struct termios2)
-#define TCSETSW2 _IOW('T', 0x2C, struct termios2)
-#define TCSETSF2 _IOW('T', 0x2D, struct termios2)
-#define TIOCGRS485 0x542E
-#define TIOCSRS485 0x542F
-#define TIOCGPTN _IOR('T', 0x30, unsigned int)
- /* Get Pty Number (of pty-mux device) */
-#define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */
-#define TCGETX 0x5432 /* SYS5 TCGETX compatibility */
-#define TCSETX 0x5433
-#define TCSETXF 0x5434
-#define TCSETXW 0x5435
-
-#define FIONCLEX 0x5450
-#define FIOCLEX 0x5451
-#define FIOASYNC 0x5452
-#define TIOCSERCONFIG 0x5453
-#define TIOCSERGWILD 0x5454
-#define TIOCSERSWILD 0x5455
-#define TIOCGLCKTRMIOS 0x5456
-#define TIOCSLCKTRMIOS 0x5457
-#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
-#define TIOCSERGETLSR 0x5459 /* Get line status register */
-#define TIOCSERGETMULTI 0x545A /* Get multiport config */
-#define TIOCSERSETMULTI 0x545B /* Set multiport config */
-
-#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */
-#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */
-#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */
-#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */
-#define FIOQSIZE 0x5460
-
-/* Used for packet mode */
-#define TIOCPKT_DATA 0
-#define TIOCPKT_FLUSHREAD 1
-#define TIOCPKT_FLUSHWRITE 2
-#define TIOCPKT_STOP 4
-#define TIOCPKT_START 8
-#define TIOCPKT_NOSTOP 16
-#define TIOCPKT_DOSTOP 32
-
-#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */
-
-#endif /* _ASM_X86_IOCTLS_H */
+#include <asm-generic/ioctls.h>
diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/asm/ipcbuf.h
index ee678fd51594..84c7e51cb6d0 100644
--- a/arch/x86/include/asm/ipcbuf.h
+++ b/arch/x86/include/asm/ipcbuf.h
@@ -1,28 +1 @@
-#ifndef _ASM_X86_IPCBUF_H
-#define _ASM_X86_IPCBUF_H
-
-/*
- * The ipc64_perm structure for x86 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 32-bit mode_t and seq
- * - 2 miscellaneous 32-bit values
- */
-
-struct ipc64_perm {
- __kernel_key_t key;
- __kernel_uid32_t uid;
- __kernel_gid32_t gid;
- __kernel_uid32_t cuid;
- __kernel_gid32_t cgid;
- __kernel_mode_t mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned short __pad2;
- unsigned long __unused1;
- unsigned long __unused2;
-};
-
-#endif /* _ASM_X86_IPCBUF_H */
+#include <asm-generic/ipcbuf.h>
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index 5136dad57cbb..0d97deba1e35 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -90,8 +90,9 @@ static inline void lguest_set_ts(void)
}
/* Full 4G segment descriptors, suitable for CS and DS. */
-#define FULL_EXEC_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9b00} } })
-#define FULL_SEGMENT ((struct desc_struct){ { {0x0000ffff, 0x00cf9300} } })
+#define FULL_EXEC_SEGMENT \
+ ((struct desc_struct)GDT_ENTRY_INIT(0xc09b, 0, 0xfffff))
+#define FULL_SEGMENT ((struct desc_struct)GDT_ENTRY_INIT(0xc093, 0, 0xfffff))
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 5cdd8d100ec9..ad7535372918 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -9,7 +9,7 @@
*/
#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
-#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
+#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
@@ -115,13 +115,6 @@ void mcheck_init(struct cpuinfo_x86 *c);
static inline void mcheck_init(struct cpuinfo_x86 *c) {}
#endif
-#ifdef CONFIG_X86_OLD_MCE
-extern int nr_mce_banks;
-void amd_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
-#endif
-
#ifdef CONFIG_X86_ANCIENT_MCE
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
@@ -137,10 +130,11 @@ void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, mce_dev);
/*
- * To support more than 128 would need to escape the predefined
- * Linux defined extended banks first.
+ * Maximum banks number.
+ * This is the limit of the current register layout on
+ * Intel CPUs.
*/
-#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
+#define MAX_NR_BANKS 32
#ifdef CONFIG_X86_MCE_INTEL
extern int mce_cmci_disabled;
@@ -208,11 +202,7 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
void intel_init_thermal(struct cpuinfo_x86 *c);
-#ifdef CONFIG_X86_NEW_MCE
void mce_log_therm_throt_event(__u64 status);
-#else
-static inline void mce_log_therm_throt_event(__u64 status) {}
-#endif
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h
index 751af2550ed9..593e51d4643f 100644
--- a/arch/x86/include/asm/mman.h
+++ b/arch/x86/include/asm/mman.h
@@ -1,20 +1,8 @@
#ifndef _ASM_X86_MMAN_H
#define _ASM_X86_MMAN_H
-#include <asm-generic/mman-common.h>
-
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
-#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
-#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
-#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
-#define MAP_LOCKED 0x2000 /* pages are locked */
-#define MAP_NORESERVE 0x4000 /* don't check for reservations */
-#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
-#define MAP_NONBLOCK 0x10000 /* do not block on IO */
-#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
-
-#define MCL_CURRENT 1 /* lock all current mappings */
-#define MCL_FUTURE 2 /* lock all future mappings */
+#include <asm-generic/mman.h>
#endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 47d62743c4d5..555bc12bdcd6 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -1,18 +1,7 @@
#ifndef _ASM_X86_MODULE_H
#define _ASM_X86_MODULE_H
-/* x86_32/64 are simple */
-struct mod_arch_specific {};
-
-#ifdef CONFIG_X86_32
-# define Elf_Shdr Elf32_Shdr
-# define Elf_Sym Elf32_Sym
-# define Elf_Ehdr Elf32_Ehdr
-#else
-# define Elf_Shdr Elf64_Shdr
-# define Elf_Sym Elf64_Sym
-# define Elf_Ehdr Elf64_Ehdr
-#endif
+#include <asm-generic/module.h>
#ifdef CONFIG_X86_64
/* X86_64 does not define MODULE_PROC_FAMILY */
diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/asm/msgbuf.h
index 7e4e9481f51c..809134c644a6 100644
--- a/arch/x86/include/asm/msgbuf.h
+++ b/arch/x86/include/asm/msgbuf.h
@@ -1,39 +1 @@
-#ifndef _ASM_X86_MSGBUF_H
-#define _ASM_X86_MSGBUF_H
-
-/*
- * The msqid64_ds structure for i386 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space on i386 is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- *
- * Pad space on x8664 is left for:
- * - 2 miscellaneous 64-bit values
- */
-struct msqid64_ds {
- struct ipc64_perm msg_perm;
- __kernel_time_t msg_stime; /* last msgsnd time */
-#ifdef __i386__
- unsigned long __unused1;
-#endif
- __kernel_time_t msg_rtime; /* last msgrcv time */
-#ifdef __i386__
- unsigned long __unused2;
-#endif
- __kernel_time_t msg_ctime; /* last change time */
-#ifdef __i386__
- unsigned long __unused3;
-#endif
- unsigned long msg_cbytes; /* current number of bytes on queue */
- unsigned long msg_qnum; /* number of messages in queue */
- unsigned long msg_qbytes; /* max number of bytes on queue */
- __kernel_pid_t msg_lspid; /* pid of last msgsnd */
- __kernel_pid_t msg_lrpid; /* last receive pid */
- unsigned long __unused4;
- unsigned long __unused5;
-};
-
-#endif /* _ASM_X86_MSGBUF_H */
+#include <asm-generic/msgbuf.h>
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index bd5549034a95..4ffe09b2ad75 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -81,8 +81,15 @@
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403
+#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
+#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
+#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
+#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
+
/* These are consecutive and not in the normal 4er MCE bank block */
#define MSR_IA32_MC0_CTL2 0x00000280
+#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
+
#define CMCI_EN (1ULL << 30)
#define CMCI_THRESHOLD_MASK 0xffffULL
@@ -215,6 +222,10 @@
#define THERM_STATUS_PROCHOT (1 << 0)
+#define MSR_THERM2_CTL 0x0000019d
+
+#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
+
#define MSR_IA32_MISC_ENABLE 0x000001a0
/* MISC_ENABLE bits: architectural */
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c86e5ed4af51..e63cf7d441e1 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -45,8 +45,8 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
void __user *, size_t *, loff_t *);
extern int unknown_nmi_panic;
-void __trigger_all_cpu_backtrace(void);
-#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+void arch_trigger_all_cpu_backtrace(void);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
static inline void localise_nmi_watchdog(void)
{
diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/asm/param.h
index 6f0d0422f4ca..965d45427975 100644
--- a/arch/x86/include/asm/param.h
+++ b/arch/x86/include/asm/param.h
@@ -1,22 +1 @@
-#ifndef _ASM_X86_PARAM_H
-#define _ASM_X86_PARAM_H
-
-#ifdef __KERNEL__
-# define HZ CONFIG_HZ /* Internal kernel timer frequency */
-# define USER_HZ 100 /* some user interfaces are */
-# define CLOCKS_PER_SEC (USER_HZ) /* in "ticks" like times() */
-#endif
-
-#ifndef HZ
-#define HZ 100
-#endif
-
-#define EXEC_PAGESIZE 4096
-
-#ifndef NOGROUP
-#define NOGROUP (-1)
-#endif
-
-#define MAXHOSTNAMELEN 64 /* max length of hostname */
-
-#endif /* _ASM_X86_PARAM_H */
+#include <asm-generic/param.h>
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 4fb37c8a0832..6a07af432c81 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -7,689 +7,11 @@
#include <asm/pgtable_types.h>
#include <asm/asm.h>
-/* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_NONE 0
-#define CLBR_EAX (1 << 0)
-#define CLBR_ECX (1 << 1)
-#define CLBR_EDX (1 << 2)
-#define CLBR_EDI (1 << 3)
-
-#ifdef CONFIG_X86_32
-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
-#define CLBR_ANY ((1 << 4) - 1)
-
-#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
-#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
-#define CLBR_SCRATCH (0)
-#else
-#define CLBR_RAX CLBR_EAX
-#define CLBR_RCX CLBR_ECX
-#define CLBR_RDX CLBR_EDX
-#define CLBR_RDI CLBR_EDI
-#define CLBR_RSI (1 << 4)
-#define CLBR_R8 (1 << 5)
-#define CLBR_R9 (1 << 6)
-#define CLBR_R10 (1 << 7)
-#define CLBR_R11 (1 << 8)
-
-#define CLBR_ANY ((1 << 9) - 1)
-
-#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
- CLBR_RCX | CLBR_R8 | CLBR_R9)
-#define CLBR_RET_REG (CLBR_RAX)
-#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
-
-#include <asm/desc_defs.h>
-#endif /* X86_64 */
-
-#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
+#include <asm/paravirt_types.h>
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <linux/cpumask.h>
-#include <asm/kmap_types.h>
-#include <asm/desc_defs.h>
-
-struct page;
-struct thread_struct;
-struct desc_ptr;
-struct tss_struct;
-struct mm_struct;
-struct desc_struct;
-struct task_struct;
-
-/*
- * Wrapper type for pointers to code which uses the non-standard
- * calling convention. See PV_CALL_SAVE_REGS_THUNK below.
- */
-struct paravirt_callee_save {
- void *func;
-};
-
-/* general info */
-struct pv_info {
- unsigned int kernel_rpl;
- int shared_kernel_pmd;
- int paravirt_enabled;
- const char *name;
-};
-
-struct pv_init_ops {
- /*
- * Patch may replace one of the defined code sequences with
- * arbitrary code, subject to the same register constraints.
- * This generally means the code is not free to clobber any
- * registers other than EAX. The patch function should return
- * the number of bytes of code generated, as we nop pad the
- * rest in generic code.
- */
- unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
- unsigned long addr, unsigned len);
-
- /* Basic arch-specific setup */
- void (*arch_setup)(void);
- char *(*memory_setup)(void);
- void (*post_allocator_init)(void);
-
- /* Print a banner to identify the environment */
- void (*banner)(void);
-};
-
-
-struct pv_lazy_ops {
- /* Set deferred update mode, used for batching operations. */
- void (*enter)(void);
- void (*leave)(void);
-};
-
-struct pv_time_ops {
- void (*time_init)(void);
-
- /* Set and set time of day */
- unsigned long (*get_wallclock)(void);
- int (*set_wallclock)(unsigned long);
-
- unsigned long long (*sched_clock)(void);
- unsigned long (*get_tsc_khz)(void);
-};
-
-struct pv_cpu_ops {
- /* hooks for various privileged instructions */
- unsigned long (*get_debugreg)(int regno);
- void (*set_debugreg)(int regno, unsigned long value);
-
- void (*clts)(void);
-
- unsigned long (*read_cr0)(void);
- void (*write_cr0)(unsigned long);
-
- unsigned long (*read_cr4_safe)(void);
- unsigned long (*read_cr4)(void);
- void (*write_cr4)(unsigned long);
-
-#ifdef CONFIG_X86_64
- unsigned long (*read_cr8)(void);
- void (*write_cr8)(unsigned long);
-#endif
-
- /* Segment descriptor handling */
- void (*load_tr_desc)(void);
- void (*load_gdt)(const struct desc_ptr *);
- void (*load_idt)(const struct desc_ptr *);
- void (*store_gdt)(struct desc_ptr *);
- void (*store_idt)(struct desc_ptr *);
- void (*set_ldt)(const void *desc, unsigned entries);
- unsigned long (*store_tr)(void);
- void (*load_tls)(struct thread_struct *t, unsigned int cpu);
-#ifdef CONFIG_X86_64
- void (*load_gs_index)(unsigned int idx);
-#endif
- void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
- const void *desc);
- void (*write_gdt_entry)(struct desc_struct *,
- int entrynum, const void *desc, int size);
- void (*write_idt_entry)(gate_desc *,
- int entrynum, const gate_desc *gate);
- void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
- void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
-
- void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
-
- void (*set_iopl_mask)(unsigned mask);
-
- void (*wbinvd)(void);
- void (*io_delay)(void);
-
- /* cpuid emulation, mostly so that caps bits can be disabled */
- void (*cpuid)(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx);
-
- /* MSR, PMC and TSR operations.
- err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
- u64 (*read_msr_amd)(unsigned int msr, int *err);
- u64 (*read_msr)(unsigned int msr, int *err);
- int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
-
- u64 (*read_tsc)(void);
- u64 (*read_pmc)(int counter);
- unsigned long long (*read_tscp)(unsigned int *aux);
-
- /*
- * Atomically enable interrupts and return to userspace. This
- * is only ever used to return to 32-bit processes; in a
- * 64-bit kernel, it's used for 32-on-64 compat processes, but
- * never native 64-bit processes. (Jump, not call.)
- */
- void (*irq_enable_sysexit)(void);
-
- /*
- * Switch to usermode gs and return to 64-bit usermode using
- * sysret. Only used in 64-bit kernels to return to 64-bit
- * processes. Usermode register state, including %rsp, must
- * already be restored.
- */
- void (*usergs_sysret64)(void);
-
- /*
- * Switch to usermode gs and return to 32-bit usermode using
- * sysret. Used to return to 32-on-64 compat processes.
- * Other usermode register state, including %esp, must already
- * be restored.
- */
- void (*usergs_sysret32)(void);
-
- /* Normal iret. Jump to this with the standard iret stack
- frame set up. */
- void (*iret)(void);
-
- void (*swapgs)(void);
-
- void (*start_context_switch)(struct task_struct *prev);
- void (*end_context_switch)(struct task_struct *next);
-};
-
-struct pv_irq_ops {
- void (*init_IRQ)(void);
-
- /*
- * Get/set interrupt state. save_fl and restore_fl are only
- * expected to use X86_EFLAGS_IF; all other bits
- * returned from save_fl are undefined, and may be ignored by
- * restore_fl.
- *
- * NOTE: These functions callers expect the callee to preserve
- * more registers than the standard C calling convention.
- */
- struct paravirt_callee_save save_fl;
- struct paravirt_callee_save restore_fl;
- struct paravirt_callee_save irq_disable;
- struct paravirt_callee_save irq_enable;
-
- void (*safe_halt)(void);
- void (*halt)(void);
-
-#ifdef CONFIG_X86_64
- void (*adjust_exception_frame)(void);
-#endif
-};
-
-struct pv_apic_ops {
-#ifdef CONFIG_X86_LOCAL_APIC
- void (*setup_boot_clock)(void);
- void (*setup_secondary_clock)(void);
-
- void (*startup_ipi_hook)(int phys_apicid,
- unsigned long start_eip,
- unsigned long start_esp);
-#endif
-};
-
-struct pv_mmu_ops {
- /*
- * Called before/after init_mm pagetable setup. setup_start
- * may reset %cr3, and may pre-install parts of the pagetable;
- * pagetable setup is expected to preserve any existing
- * mapping.
- */
- void (*pagetable_setup_start)(pgd_t *pgd_base);
- void (*pagetable_setup_done)(pgd_t *pgd_base);
-
- unsigned long (*read_cr2)(void);
- void (*write_cr2)(unsigned long);
-
- unsigned long (*read_cr3)(void);
- void (*write_cr3)(unsigned long);
-
- /*
- * Hooks for intercepting the creation/use/destruction of an
- * mm_struct.
- */
- void (*activate_mm)(struct mm_struct *prev,
- struct mm_struct *next);
- void (*dup_mmap)(struct mm_struct *oldmm,
- struct mm_struct *mm);
- void (*exit_mmap)(struct mm_struct *mm);
-
-
- /* TLB operations */
- void (*flush_tlb_user)(void);
- void (*flush_tlb_kernel)(void);
- void (*flush_tlb_single)(unsigned long addr);
- void (*flush_tlb_others)(const struct cpumask *cpus,
- struct mm_struct *mm,
- unsigned long va);
-
- /* Hooks for allocating and freeing a pagetable top-level */
- int (*pgd_alloc)(struct mm_struct *mm);
- void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
-
- /*
- * Hooks for allocating/releasing pagetable pages when they're
- * attached to a pagetable
- */
- void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
- void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
- void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
- void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
- void (*release_pte)(unsigned long pfn);
- void (*release_pmd)(unsigned long pfn);
- void (*release_pud)(unsigned long pfn);
-
- /* Pagetable manipulation functions */
- void (*set_pte)(pte_t *ptep, pte_t pteval);
- void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pteval);
- void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
- void (*pte_update)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep);
- void (*pte_update_defer)(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep);
-
- pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep);
- void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte);
-
- struct paravirt_callee_save pte_val;
- struct paravirt_callee_save make_pte;
-
- struct paravirt_callee_save pgd_val;
- struct paravirt_callee_save make_pgd;
-
-#if PAGETABLE_LEVELS >= 3
-#ifdef CONFIG_X86_PAE
- void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
- void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep);
- void (*pmd_clear)(pmd_t *pmdp);
-
-#endif /* CONFIG_X86_PAE */
-
- void (*set_pud)(pud_t *pudp, pud_t pudval);
-
- struct paravirt_callee_save pmd_val;
- struct paravirt_callee_save make_pmd;
-
-#if PAGETABLE_LEVELS == 4
- struct paravirt_callee_save pud_val;
- struct paravirt_callee_save make_pud;
-
- void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
-#endif /* PAGETABLE_LEVELS == 4 */
-#endif /* PAGETABLE_LEVELS >= 3 */
-
-#ifdef CONFIG_HIGHPTE
- void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
-#endif
-
- struct pv_lazy_ops lazy_mode;
-
- /* dom0 ops */
-
- /* Sometimes the physical address is a pfn, and sometimes its
- an mfn. We can tell which is which from the index. */
- void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
- phys_addr_t phys, pgprot_t flags);
-};
-
-struct raw_spinlock;
-struct pv_lock_ops {
- int (*spin_is_locked)(struct raw_spinlock *lock);
- int (*spin_is_contended)(struct raw_spinlock *lock);
- void (*spin_lock)(struct raw_spinlock *lock);
- void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
- int (*spin_trylock)(struct raw_spinlock *lock);
- void (*spin_unlock)(struct raw_spinlock *lock);
-};
-
-/* This contains all the paravirt structures: we get a convenient
- * number for each function using the offset which we use to indicate
- * what to patch. */
-struct paravirt_patch_template {
- struct pv_init_ops pv_init_ops;
- struct pv_time_ops pv_time_ops;
- struct pv_cpu_ops pv_cpu_ops;
- struct pv_irq_ops pv_irq_ops;
- struct pv_apic_ops pv_apic_ops;
- struct pv_mmu_ops pv_mmu_ops;
- struct pv_lock_ops pv_lock_ops;
-};
-
-extern struct pv_info pv_info;
-extern struct pv_init_ops pv_init_ops;
-extern struct pv_time_ops pv_time_ops;
-extern struct pv_cpu_ops pv_cpu_ops;
-extern struct pv_irq_ops pv_irq_ops;
-extern struct pv_apic_ops pv_apic_ops;
-extern struct pv_mmu_ops pv_mmu_ops;
-extern struct pv_lock_ops pv_lock_ops;
-
-#define PARAVIRT_PATCH(x) \
- (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
-
-#define paravirt_type(op) \
- [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
- [paravirt_opptr] "i" (&(op))
-#define paravirt_clobber(clobber) \
- [paravirt_clobber] "i" (clobber)
-
-/*
- * Generate some code, and mark it as patchable by the
- * apply_paravirt() alternate instruction patcher.
- */
-#define _paravirt_alt(insn_string, type, clobber) \
- "771:\n\t" insn_string "\n" "772:\n" \
- ".pushsection .parainstructions,\"a\"\n" \
- _ASM_ALIGN "\n" \
- _ASM_PTR " 771b\n" \
- " .byte " type "\n" \
- " .byte 772b-771b\n" \
- " .short " clobber "\n" \
- ".popsection\n"
-
-/* Generate patchable code, with the default asm parameters. */
-#define paravirt_alt(insn_string) \
- _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
-
-/* Simple instruction patching code. */
-#define DEF_NATIVE(ops, name, code) \
- extern const char start_##ops##_##name[], end_##ops##_##name[]; \
- asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
-
-unsigned paravirt_patch_nop(void);
-unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
-unsigned paravirt_patch_ignore(unsigned len);
-unsigned paravirt_patch_call(void *insnbuf,
- const void *target, u16 tgt_clobbers,
- unsigned long addr, u16 site_clobbers,
- unsigned len);
-unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
- unsigned long addr, unsigned len);
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
- unsigned long addr, unsigned len);
-
-unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
- const char *start, const char *end);
-
-unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
- unsigned long addr, unsigned len);
-
-int paravirt_disable_iospace(void);
-
-/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
-#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
-
-/*
- * These macros are intended to wrap calls through one of the paravirt
- * ops structs, so that they can be later identified and patched at
- * runtime.
- *
- * Normally, a call to a pv_op function is a simple indirect call:
- * (pv_op_struct.operations)(args...).
- *
- * Unfortunately, this is a relatively slow operation for modern CPUs,
- * because it cannot necessarily determine what the destination
- * address is. In this case, the address is a runtime constant, so at
- * the very least we can patch the call to e a simple direct call, or
- * ideally, patch an inline implementation into the callsite. (Direct
- * calls are essentially free, because the call and return addresses
- * are completely predictable.)
- *
- * For i386, these macros rely on the standard gcc "regparm(3)" calling
- * convention, in which the first three arguments are placed in %eax,
- * %edx, %ecx (in that order), and the remaining arguments are placed
- * on the stack. All caller-save registers (eax,edx,ecx) are expected
- * to be modified (either clobbered or used for return values).
- * X86_64, on the other hand, already specifies a register-based calling
- * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
- * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
- * special handling for dealing with 4 arguments, unlike i386.
- * However, x86_64 also have to clobber all caller saved registers, which
- * unfortunately, are quite a bit (r8 - r11)
- *
- * The call instruction itself is marked by placing its start address
- * and size into the .parainstructions section, so that
- * apply_paravirt() in arch/i386/kernel/alternative.c can do the
- * appropriate patching under the control of the backend pv_init_ops
- * implementation.
- *
- * Unfortunately there's no way to get gcc to generate the args setup
- * for the call, and then allow the call itself to be generated by an
- * inline asm. Because of this, we must do the complete arg setup and
- * return value handling from within these macros. This is fairly
- * cumbersome.
- *
- * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
- * It could be extended to more arguments, but there would be little
- * to be gained from that. For each number of arguments, there are
- * the two VCALL and CALL variants for void and non-void functions.
- *
- * When there is a return value, the invoker of the macro must specify
- * the return type. The macro then uses sizeof() on that type to
- * determine whether its a 32 or 64 bit value, and places the return
- * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
- * 64-bit). For x86_64 machines, it just returns at %rax regardless of
- * the return value size.
- *
- * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
- * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
- * in low,high order
- *
- * Small structures are passed and returned in registers. The macro
- * calling convention can't directly deal with this, so the wrapper
- * functions must do this.
- *
- * These PVOP_* macros are only defined within this header. This
- * means that all uses must be wrapped in inline functions. This also
- * makes sure the incoming and outgoing types are always correct.
- */
-#ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS \
- unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
-#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
-#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
-#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
-
-#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \
- "=c" (__ecx)
-#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS
-
-#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx)
-#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
-
-#define EXTRA_CLOBBERS
-#define VEXTRA_CLOBBERS
-#else /* CONFIG_X86_64 */
-#define PVOP_VCALL_ARGS \
- unsigned long __edi = __edi, __esi = __esi, \
- __edx = __edx, __ecx = __ecx
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
-
-#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
-#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
-#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
-#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x))
-
-#define PVOP_VCALL_CLOBBERS "=D" (__edi), \
- "=S" (__esi), "=d" (__edx), \
- "=c" (__ecx)
-#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
-
-#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
-#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
-
-#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
-#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
-#endif /* CONFIG_X86_32 */
-
-#ifdef CONFIG_PARAVIRT_DEBUG
-#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
-#else
-#define PVOP_TEST_NULL(op) ((void)op)
-#endif
-
-#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
- pre, post, ...) \
- ({ \
- rettype __ret; \
- PVOP_CALL_ARGS; \
- PVOP_TEST_NULL(op); \
- /* This is 32-bit specific, but is okay in 64-bit */ \
- /* since this condition will never hold */ \
- if (sizeof(rettype) > sizeof(unsigned long)) { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : call_clbr \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
- __ret = (rettype)((((u64)__edx) << 32) | __eax); \
- } else { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : call_clbr \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
- __ret = (rettype)__eax; \
- } \
- __ret; \
- })
-
-#define __PVOP_CALL(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
- EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
-
-#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
- PVOP_CALLEE_CLOBBERS, , \
- pre, post, ##__VA_ARGS__)
-
-
-#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
- ({ \
- PVOP_VCALL_ARGS; \
- PVOP_TEST_NULL(op); \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : call_clbr \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
- })
-
-#define __PVOP_VCALL(op, pre, post, ...) \
- ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
- VEXTRA_CLOBBERS, \
- pre, post, ##__VA_ARGS__)
-
-#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
- PVOP_VCALLEE_CLOBBERS, , \
- pre, post, ##__VA_ARGS__)
-
-
-
-#define PVOP_CALL0(rettype, op) \
- __PVOP_CALL(rettype, op, "", "")
-#define PVOP_VCALL0(op) \
- __PVOP_VCALL(op, "", "")
-
-#define PVOP_CALLEE0(rettype, op) \
- __PVOP_CALLEESAVE(rettype, op, "", "")
-#define PVOP_VCALLEE0(op) \
- __PVOP_VCALLEESAVE(op, "", "")
-
-
-#define PVOP_CALL1(rettype, op, arg1) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
-#define PVOP_VCALL1(op, arg1) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
-
-#define PVOP_CALLEE1(rettype, op, arg1) \
- __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
-#define PVOP_VCALLEE1(op, arg1) \
- __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
-
-
-#define PVOP_CALL2(rettype, op, arg1, arg2) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-#define PVOP_VCALL2(op, arg1, arg2) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-
-#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
- __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-#define PVOP_VCALLEE2(op, arg1, arg2) \
- __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-
-
-#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
-#define PVOP_VCALL3(op, arg1, arg2, arg3) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
-
-/* This is the only difference in x86_64. We can make it much simpler */
-#ifdef CONFIG_X86_32
-#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
- __PVOP_CALL(rettype, op, \
- "push %[_arg4];", "lea 4(%%esp),%%esp;", \
- PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
- PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
-#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, \
- "push %[_arg4];", "lea 4(%%esp),%%esp;", \
- "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
- "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
-#else
-#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
- __PVOP_CALL(rettype, op, "", "", \
- PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
- PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, "", "", \
- PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
- PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-#endif
static inline int paravirt_enabled(void)
{
@@ -1393,20 +715,6 @@ static inline void pmd_clear(pmd_t *pmdp)
}
#endif /* CONFIG_X86_PAE */
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
- PARAVIRT_LAZY_NONE,
- PARAVIRT_LAZY_MMU,
- PARAVIRT_LAZY_CPU,
-};
-
-enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_start_context_switch(struct task_struct *prev);
-void paravirt_end_context_switch(struct task_struct *next);
-
-void paravirt_enter_lazy_mmu(void);
-void paravirt_leave_lazy_mmu(void);
-
#define __HAVE_ARCH_START_CONTEXT_SWITCH
static inline void arch_start_context_switch(struct task_struct *prev)
{
@@ -1437,12 +745,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
pv_mmu_ops.set_fixmap(idx, phys, flags);
}
-void _paravirt_nop(void);
-u32 _paravirt_ident_32(u32);
-u64 _paravirt_ident_64(u64);
-
-#define paravirt_nop ((void *)_paravirt_nop)
-
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
static inline int __raw_spin_is_locked(struct raw_spinlock *lock)
@@ -1479,17 +781,6 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
#endif
-/* These all sit in the .parainstructions section to tell us what to patch. */
-struct paravirt_patch_site {
- u8 *instr; /* original instructions */
- u8 instrtype; /* type of this instruction */
- u8 len; /* length of original instruction */
- u16 clobbers; /* what registers you may clobber */
-};
-
-extern struct paravirt_patch_site __parainstructions[],
- __parainstructions_end[];
-
#ifdef CONFIG_X86_32
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
new file mode 100644
index 000000000000..2b3371bae295
--- /dev/null
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -0,0 +1,720 @@
+#ifndef _ASM_X86_PARAVIRT_TYPES_H
+#define _ASM_X86_PARAVIRT_TYPES_H
+
+/* Bitmask of what can be clobbered: usually at least eax. */
+#define CLBR_NONE 0
+#define CLBR_EAX (1 << 0)
+#define CLBR_ECX (1 << 1)
+#define CLBR_EDX (1 << 2)
+#define CLBR_EDI (1 << 3)
+
+#ifdef CONFIG_X86_32
+/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+#define CLBR_ANY ((1 << 4) - 1)
+
+#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
+#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
+#define CLBR_SCRATCH (0)
+#else
+#define CLBR_RAX CLBR_EAX
+#define CLBR_RCX CLBR_ECX
+#define CLBR_RDX CLBR_EDX
+#define CLBR_RDI CLBR_EDI
+#define CLBR_RSI (1 << 4)
+#define CLBR_R8 (1 << 5)
+#define CLBR_R9 (1 << 6)
+#define CLBR_R10 (1 << 7)
+#define CLBR_R11 (1 << 8)
+
+#define CLBR_ANY ((1 << 9) - 1)
+
+#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
+ CLBR_RCX | CLBR_R8 | CLBR_R9)
+#define CLBR_RET_REG (CLBR_RAX)
+#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
+
+#endif /* X86_64 */
+
+#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/desc_defs.h>
+#include <asm/kmap_types.h>
+
+struct page;
+struct thread_struct;
+struct desc_ptr;
+struct tss_struct;
+struct mm_struct;
+struct desc_struct;
+struct task_struct;
+struct cpumask;
+
+/*
+ * Wrapper type for pointers to code which uses the non-standard
+ * calling convention. See PV_CALL_SAVE_REGS_THUNK below.
+ */
+struct paravirt_callee_save {
+ void *func;
+};
+
+/* general info */
+struct pv_info {
+ unsigned int kernel_rpl;
+ int shared_kernel_pmd;
+ int paravirt_enabled;
+ const char *name;
+};
+
+struct pv_init_ops {
+ /*
+ * Patch may replace one of the defined code sequences with
+ * arbitrary code, subject to the same register constraints.
+ * This generally means the code is not free to clobber any
+ * registers other than EAX. The patch function should return
+ * the number of bytes of code generated, as we nop pad the
+ * rest in generic code.
+ */
+ unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
+ unsigned long addr, unsigned len);
+
+ /* Basic arch-specific setup */
+ void (*arch_setup)(void);
+ char *(*memory_setup)(void);
+ void (*post_allocator_init)(void);
+
+ /* Print a banner to identify the environment */
+ void (*banner)(void);
+};
+
+
+struct pv_lazy_ops {
+ /* Set deferred update mode, used for batching operations. */
+ void (*enter)(void);
+ void (*leave)(void);
+};
+
+struct pv_time_ops {
+ void (*time_init)(void);
+
+ /* Set and set time of day */
+ unsigned long (*get_wallclock)(void);
+ int (*set_wallclock)(unsigned long);
+
+ unsigned long long (*sched_clock)(void);
+ unsigned long (*get_tsc_khz)(void);
+};
+
+struct pv_cpu_ops {
+ /* hooks for various privileged instructions */
+ unsigned long (*get_debugreg)(int regno);
+ void (*set_debugreg)(int regno, unsigned long value);
+
+ void (*clts)(void);
+
+ unsigned long (*read_cr0)(void);
+ void (*write_cr0)(unsigned long);
+
+ unsigned long (*read_cr4_safe)(void);
+ unsigned long (*read_cr4)(void);
+ void (*write_cr4)(unsigned long);
+
+#ifdef CONFIG_X86_64
+ unsigned long (*read_cr8)(void);
+ void (*write_cr8)(unsigned long);
+#endif
+
+ /* Segment descriptor handling */
+ void (*load_tr_desc)(void);
+ void (*load_gdt)(const struct desc_ptr *);
+ void (*load_idt)(const struct desc_ptr *);
+ void (*store_gdt)(struct desc_ptr *);
+ void (*store_idt)(struct desc_ptr *);
+ void (*set_ldt)(const void *desc, unsigned entries);
+ unsigned long (*store_tr)(void);
+ void (*load_tls)(struct thread_struct *t, unsigned int cpu);
+#ifdef CONFIG_X86_64
+ void (*load_gs_index)(unsigned int idx);
+#endif
+ void (*write_ldt_entry)(struct desc_struct *ldt, int entrynum,
+ const void *desc);
+ void (*write_gdt_entry)(struct desc_struct *,
+ int entrynum, const void *desc, int size);
+ void (*write_idt_entry)(gate_desc *,
+ int entrynum, const gate_desc *gate);
+ void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
+ void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
+
+ void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
+
+ void (*set_iopl_mask)(unsigned mask);
+
+ void (*wbinvd)(void);
+ void (*io_delay)(void);
+
+ /* cpuid emulation, mostly so that caps bits can be disabled */
+ void (*cpuid)(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx);
+
+ /* MSR, PMC and TSR operations.
+ err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
+ u64 (*read_msr_amd)(unsigned int msr, int *err);
+ u64 (*read_msr)(unsigned int msr, int *err);
+ int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+
+ u64 (*read_tsc)(void);
+ u64 (*read_pmc)(int counter);
+ unsigned long long (*read_tscp)(unsigned int *aux);
+
+ /*
+ * Atomically enable interrupts and return to userspace. This
+ * is only ever used to return to 32-bit processes; in a
+ * 64-bit kernel, it's used for 32-on-64 compat processes, but
+ * never native 64-bit processes. (Jump, not call.)
+ */
+ void (*irq_enable_sysexit)(void);
+
+ /*
+ * Switch to usermode gs and return to 64-bit usermode using
+ * sysret. Only used in 64-bit kernels to return to 64-bit
+ * processes. Usermode register state, including %rsp, must
+ * already be restored.
+ */
+ void (*usergs_sysret64)(void);
+
+ /*
+ * Switch to usermode gs and return to 32-bit usermode using
+ * sysret. Used to return to 32-on-64 compat processes.
+ * Other usermode register state, including %esp, must already
+ * be restored.
+ */
+ void (*usergs_sysret32)(void);
+
+ /* Normal iret. Jump to this with the standard iret stack
+ frame set up. */
+ void (*iret)(void);
+
+ void (*swapgs)(void);
+
+ void (*start_context_switch)(struct task_struct *prev);
+ void (*end_context_switch)(struct task_struct *next);
+};
+
+struct pv_irq_ops {
+ void (*init_IRQ)(void);
+
+ /*
+ * Get/set interrupt state. save_fl and restore_fl are only
+ * expected to use X86_EFLAGS_IF; all other bits
+ * returned from save_fl are undefined, and may be ignored by
+ * restore_fl.
+ *
+ * NOTE: These functions callers expect the callee to preserve
+ * more registers than the standard C calling convention.
+ */
+ struct paravirt_callee_save save_fl;
+ struct paravirt_callee_save restore_fl;
+ struct paravirt_callee_save irq_disable;
+ struct paravirt_callee_save irq_enable;
+
+ void (*safe_halt)(void);
+ void (*halt)(void);
+
+#ifdef CONFIG_X86_64
+ void (*adjust_exception_frame)(void);
+#endif
+};
+
+struct pv_apic_ops {
+#ifdef CONFIG_X86_LOCAL_APIC
+ void (*setup_boot_clock)(void);
+ void (*setup_secondary_clock)(void);
+
+ void (*startup_ipi_hook)(int phys_apicid,
+ unsigned long start_eip,
+ unsigned long start_esp);
+#endif
+};
+
+struct pv_mmu_ops {
+ /*
+ * Called before/after init_mm pagetable setup. setup_start
+ * may reset %cr3, and may pre-install parts of the pagetable;
+ * pagetable setup is expected to preserve any existing
+ * mapping.
+ */
+ void (*pagetable_setup_start)(pgd_t *pgd_base);
+ void (*pagetable_setup_done)(pgd_t *pgd_base);
+
+ unsigned long (*read_cr2)(void);
+ void (*write_cr2)(unsigned long);
+
+ unsigned long (*read_cr3)(void);
+ void (*write_cr3)(unsigned long);
+
+ /*
+ * Hooks for intercepting the creation/use/destruction of an
+ * mm_struct.
+ */
+ void (*activate_mm)(struct mm_struct *prev,
+ struct mm_struct *next);
+ void (*dup_mmap)(struct mm_struct *oldmm,
+ struct mm_struct *mm);
+ void (*exit_mmap)(struct mm_struct *mm);
+
+
+ /* TLB operations */
+ void (*flush_tlb_user)(void);
+ void (*flush_tlb_kernel)(void);
+ void (*flush_tlb_single)(unsigned long addr);
+ void (*flush_tlb_others)(const struct cpumask *cpus,
+ struct mm_struct *mm,
+ unsigned long va);
+
+ /* Hooks for allocating and freeing a pagetable top-level */
+ int (*pgd_alloc)(struct mm_struct *mm);
+ void (*pgd_free)(struct mm_struct *mm, pgd_t *pgd);
+
+ /*
+ * Hooks for allocating/releasing pagetable pages when they're
+ * attached to a pagetable
+ */
+ void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn);
+ void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn);
+ void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count);
+ void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn);
+ void (*release_pte)(unsigned long pfn);
+ void (*release_pmd)(unsigned long pfn);
+ void (*release_pud)(unsigned long pfn);
+
+ /* Pagetable manipulation functions */
+ void (*set_pte)(pte_t *ptep, pte_t pteval);
+ void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pteval);
+ void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
+ void (*pte_update)(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep);
+ void (*pte_update_defer)(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
+
+ pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep);
+ void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte);
+
+ struct paravirt_callee_save pte_val;
+ struct paravirt_callee_save make_pte;
+
+ struct paravirt_callee_save pgd_val;
+ struct paravirt_callee_save make_pgd;
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+ void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
+ void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep);
+ void (*pmd_clear)(pmd_t *pmdp);
+
+#endif /* CONFIG_X86_PAE */
+
+ void (*set_pud)(pud_t *pudp, pud_t pudval);
+
+ struct paravirt_callee_save pmd_val;
+ struct paravirt_callee_save make_pmd;
+
+#if PAGETABLE_LEVELS == 4
+ struct paravirt_callee_save pud_val;
+ struct paravirt_callee_save make_pud;
+
+ void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
+#endif /* PAGETABLE_LEVELS == 4 */
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+#ifdef CONFIG_HIGHPTE
+ void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
+#endif
+
+ struct pv_lazy_ops lazy_mode;
+
+ /* dom0 ops */
+
+ /* Sometimes the physical address is a pfn, and sometimes its
+ an mfn. We can tell which is which from the index. */
+ void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
+ phys_addr_t phys, pgprot_t flags);
+};
+
+struct raw_spinlock;
+struct pv_lock_ops {
+ int (*spin_is_locked)(struct raw_spinlock *lock);
+ int (*spin_is_contended)(struct raw_spinlock *lock);
+ void (*spin_lock)(struct raw_spinlock *lock);
+ void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
+ int (*spin_trylock)(struct raw_spinlock *lock);
+ void (*spin_unlock)(struct raw_spinlock *lock);
+};
+
+/* This contains all the paravirt structures: we get a convenient
+ * number for each function using the offset which we use to indicate
+ * what to patch. */
+struct paravirt_patch_template {
+ struct pv_init_ops pv_init_ops;
+ struct pv_time_ops pv_time_ops;
+ struct pv_cpu_ops pv_cpu_ops;
+ struct pv_irq_ops pv_irq_ops;
+ struct pv_apic_ops pv_apic_ops;
+ struct pv_mmu_ops pv_mmu_ops;
+ struct pv_lock_ops pv_lock_ops;
+};
+
+extern struct pv_info pv_info;
+extern struct pv_init_ops pv_init_ops;
+extern struct pv_time_ops pv_time_ops;
+extern struct pv_cpu_ops pv_cpu_ops;
+extern struct pv_irq_ops pv_irq_ops;
+extern struct pv_apic_ops pv_apic_ops;
+extern struct pv_mmu_ops pv_mmu_ops;
+extern struct pv_lock_ops pv_lock_ops;
+
+#define PARAVIRT_PATCH(x) \
+ (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
+
+#define paravirt_type(op) \
+ [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
+ [paravirt_opptr] "i" (&(op))
+#define paravirt_clobber(clobber) \
+ [paravirt_clobber] "i" (clobber)
+
+/*
+ * Generate some code, and mark it as patchable by the
+ * apply_paravirt() alternate instruction patcher.
+ */
+#define _paravirt_alt(insn_string, type, clobber) \
+ "771:\n\t" insn_string "\n" "772:\n" \
+ ".pushsection .parainstructions,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR " 771b\n" \
+ " .byte " type "\n" \
+ " .byte 772b-771b\n" \
+ " .short " clobber "\n" \
+ ".popsection\n"
+
+/* Generate patchable code, with the default asm parameters. */
+#define paravirt_alt(insn_string) \
+ _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
+
+/* Simple instruction patching code. */
+#define DEF_NATIVE(ops, name, code) \
+ extern const char start_##ops##_##name[], end_##ops##_##name[]; \
+ asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
+
+unsigned paravirt_patch_nop(void);
+unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
+unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
+unsigned paravirt_patch_ignore(unsigned len);
+unsigned paravirt_patch_call(void *insnbuf,
+ const void *target, u16 tgt_clobbers,
+ unsigned long addr, u16 site_clobbers,
+ unsigned len);
+unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
+ unsigned long addr, unsigned len);
+unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
+ unsigned long addr, unsigned len);
+
+unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
+ const char *start, const char *end);
+
+unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+ unsigned long addr, unsigned len);
+
+int paravirt_disable_iospace(void);
+
+/*
+ * This generates an indirect call based on the operation type number.
+ * The type number, computed in PARAVIRT_PATCH, is derived from the
+ * offset into the paravirt_patch_template structure, and can therefore be
+ * freely converted back into a structure offset.
+ */
+#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
+
+/*
+ * These macros are intended to wrap calls through one of the paravirt
+ * ops structs, so that they can be later identified and patched at
+ * runtime.
+ *
+ * Normally, a call to a pv_op function is a simple indirect call:
+ * (pv_op_struct.operations)(args...).
+ *
+ * Unfortunately, this is a relatively slow operation for modern CPUs,
+ * because it cannot necessarily determine what the destination
+ * address is. In this case, the address is a runtime constant, so at
+ * the very least we can patch the call to e a simple direct call, or
+ * ideally, patch an inline implementation into the callsite. (Direct
+ * calls are essentially free, because the call and return addresses
+ * are completely predictable.)
+ *
+ * For i386, these macros rely on the standard gcc "regparm(3)" calling
+ * convention, in which the first three arguments are placed in %eax,
+ * %edx, %ecx (in that order), and the remaining arguments are placed
+ * on the stack. All caller-save registers (eax,edx,ecx) are expected
+ * to be modified (either clobbered or used for return values).
+ * X86_64, on the other hand, already specifies a register-based calling
+ * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
+ * %rdx, and %rcx. Note that for this reason, x86_64 does not need any
+ * special handling for dealing with 4 arguments, unlike i386.
+ * However, x86_64 also have to clobber all caller saved registers, which
+ * unfortunately, are quite a bit (r8 - r11)
+ *
+ * The call instruction itself is marked by placing its start address
+ * and size into the .parainstructions section, so that
+ * apply_paravirt() in arch/i386/kernel/alternative.c can do the
+ * appropriate patching under the control of the backend pv_init_ops
+ * implementation.
+ *
+ * Unfortunately there's no way to get gcc to generate the args setup
+ * for the call, and then allow the call itself to be generated by an
+ * inline asm. Because of this, we must do the complete arg setup and
+ * return value handling from within these macros. This is fairly
+ * cumbersome.
+ *
+ * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
+ * It could be extended to more arguments, but there would be little
+ * to be gained from that. For each number of arguments, there are
+ * the two VCALL and CALL variants for void and non-void functions.
+ *
+ * When there is a return value, the invoker of the macro must specify
+ * the return type. The macro then uses sizeof() on that type to
+ * determine whether its a 32 or 64 bit value, and places the return
+ * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
+ * 64-bit). For x86_64 machines, it just returns at %rax regardless of
+ * the return value size.
+ *
+ * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
+ * i386 also passes 64-bit arguments as a pair of adjacent 32-bit arguments
+ * in low,high order
+ *
+ * Small structures are passed and returned in registers. The macro
+ * calling convention can't directly deal with this, so the wrapper
+ * functions must do this.
+ *
+ * These PVOP_* macros are only defined within this header. This
+ * means that all uses must be wrapped in inline functions. This also
+ * makes sure the incoming and outgoing types are always correct.
+ */
+#ifdef CONFIG_X86_32
+#define PVOP_VCALL_ARGS \
+ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
+#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
+
+#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
+
+#define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \
+ "=c" (__ecx)
+#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS
+
+#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx)
+#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
+
+#define EXTRA_CLOBBERS
+#define VEXTRA_CLOBBERS
+#else /* CONFIG_X86_64 */
+#define PVOP_VCALL_ARGS \
+ unsigned long __edi = __edi, __esi = __esi, \
+ __edx = __edx, __ecx = __ecx
+#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
+
+#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
+#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
+#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
+#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x))
+
+#define PVOP_VCALL_CLOBBERS "=D" (__edi), \
+ "=S" (__esi), "=d" (__edx), \
+ "=c" (__ecx)
+#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
+
+#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
+#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
+
+#define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11"
+#define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11"
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_PARAVIRT_DEBUG
+#define PVOP_TEST_NULL(op) BUG_ON(op == NULL)
+#else
+#define PVOP_TEST_NULL(op) ((void)op)
+#endif
+
+#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
+ pre, post, ...) \
+ ({ \
+ rettype __ret; \
+ PVOP_CALL_ARGS; \
+ PVOP_TEST_NULL(op); \
+ /* This is 32-bit specific, but is okay in 64-bit */ \
+ /* since this condition will never hold */ \
+ if (sizeof(rettype) > sizeof(unsigned long)) { \
+ asm volatile(pre \
+ paravirt_alt(PARAVIRT_CALL) \
+ post \
+ : call_clbr \
+ : paravirt_type(op), \
+ paravirt_clobber(clbr), \
+ ##__VA_ARGS__ \
+ : "memory", "cc" extra_clbr); \
+ __ret = (rettype)((((u64)__edx) << 32) | __eax); \
+ } else { \
+ asm volatile(pre \
+ paravirt_alt(PARAVIRT_CALL) \
+ post \
+ : call_clbr \
+ : paravirt_type(op), \
+ paravirt_clobber(clbr), \
+ ##__VA_ARGS__ \
+ : "memory", "cc" extra_clbr); \
+ __ret = (rettype)__eax; \
+ } \
+ __ret; \
+ })
+
+#define __PVOP_CALL(rettype, op, pre, post, ...) \
+ ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
+ EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
+
+#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
+ ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
+ PVOP_CALLEE_CLOBBERS, , \
+ pre, post, ##__VA_ARGS__)
+
+
+#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
+ ({ \
+ PVOP_VCALL_ARGS; \
+ PVOP_TEST_NULL(op); \
+ asm volatile(pre \
+ paravirt_alt(PARAVIRT_CALL) \
+ post \
+ : call_clbr \
+ : paravirt_type(op), \
+ paravirt_clobber(clbr), \
+ ##__VA_ARGS__ \
+ : "memory", "cc" extra_clbr); \
+ })
+
+#define __PVOP_VCALL(op, pre, post, ...) \
+ ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
+ VEXTRA_CLOBBERS, \
+ pre, post, ##__VA_ARGS__)
+
+#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \
+ ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
+ PVOP_VCALLEE_CLOBBERS, , \
+ pre, post, ##__VA_ARGS__)
+
+
+
+#define PVOP_CALL0(rettype, op) \
+ __PVOP_CALL(rettype, op, "", "")
+#define PVOP_VCALL0(op) \
+ __PVOP_VCALL(op, "", "")
+
+#define PVOP_CALLEE0(rettype, op) \
+ __PVOP_CALLEESAVE(rettype, op, "", "")
+#define PVOP_VCALLEE0(op) \
+ __PVOP_VCALLEESAVE(op, "", "")
+
+
+#define PVOP_CALL1(rettype, op, arg1) \
+ __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+#define PVOP_VCALL1(op, arg1) \
+ __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+
+#define PVOP_CALLEE1(rettype, op, arg1) \
+ __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+#define PVOP_VCALLEE1(op, arg1) \
+ __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+
+
+#define PVOP_CALL2(rettype, op, arg1, arg2) \
+ __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+#define PVOP_VCALL2(op, arg1, arg2) \
+ __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+
+#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
+ __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+#define PVOP_VCALLEE2(op, arg1, arg2) \
+ __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2))
+
+
+#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
+ __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
+#define PVOP_VCALL3(op, arg1, arg2, arg3) \
+ __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
+ PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
+
+/* This is the only difference in x86_64. We can make it much simpler */
+#ifdef CONFIG_X86_32
+#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
+ __PVOP_CALL(rettype, op, \
+ "push %[_arg4];", "lea 4(%%esp),%%esp;", \
+ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
+#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
+ __PVOP_VCALL(op, \
+ "push %[_arg4];", "lea 4(%%esp),%%esp;", \
+ "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
+ "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
+#else
+#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
+ __PVOP_CALL(rettype, op, "", "", \
+ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
+#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
+ __PVOP_VCALL(op, "", "", \
+ PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
+#endif
+
+/* Lazy mode for batching updates / context switch */
+enum paravirt_lazy_mode {
+ PARAVIRT_LAZY_NONE,
+ PARAVIRT_LAZY_MMU,
+ PARAVIRT_LAZY_CPU,
+};
+
+enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
+void paravirt_start_context_switch(struct task_struct *prev);
+void paravirt_end_context_switch(struct task_struct *next);
+
+void paravirt_enter_lazy_mmu(void);
+void paravirt_leave_lazy_mmu(void);
+
+void _paravirt_nop(void);
+u32 _paravirt_ident_32(u32);
+u64 _paravirt_ident_64(u64);
+
+#define paravirt_nop ((void *)_paravirt_nop)
+
+/* These all sit in the .parainstructions section to tell us what to patch. */
+struct paravirt_patch_site {
+ u8 *instr; /* original instructions */
+ u8 instrtype; /* type of this instruction */
+ u8 len; /* length of original instruction */
+ u16 clobbers; /* what registers you may clobber */
+};
+
+extern struct paravirt_patch_site __parainstructions[],
+ __parainstructions_end[];
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 103f1ddb0d85..04eacefcfd26 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -49,7 +49,7 @@
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
#define __my_cpu_offset percpu_read(this_cpu_off)
#else
-#define __percpu_arg(x) "%" #x
+#define __percpu_arg(x) "%P" #x
#endif
/*
@@ -104,36 +104,48 @@ do { \
} \
} while (0)
-#define percpu_from_op(op, var) \
+#define percpu_from_op(op, var, constraint) \
({ \
typeof(var) ret__; \
switch (sizeof(var)) { \
case 1: \
asm(op "b "__percpu_arg(1)",%0" \
: "=q" (ret__) \
- : "m" (var)); \
+ : constraint); \
break; \
case 2: \
asm(op "w "__percpu_arg(1)",%0" \
: "=r" (ret__) \
- : "m" (var)); \
+ : constraint); \
break; \
case 4: \
asm(op "l "__percpu_arg(1)",%0" \
: "=r" (ret__) \
- : "m" (var)); \
+ : constraint); \
break; \
case 8: \
asm(op "q "__percpu_arg(1)",%0" \
: "=r" (ret__) \
- : "m" (var)); \
+ : constraint); \
break; \
default: __bad_percpu_size(); \
} \
ret__; \
})
-#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
+/*
+ * percpu_read() makes gcc load the percpu variable every time it is
+ * accessed while percpu_read_stable() allows the value to be cached.
+ * percpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across cpus. The current users include
+ * get_current() and get_thread_info() both of which are actually
+ * per-thread variables implemented as per-cpu variables and thus
+ * stable for the duration of the respective task.
+ */
+#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \
+ "m" (per_cpu__##var))
+#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \
+ "p" (&per_cpu__##var))
#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
index fa64e401589d..e7b7c938ae27 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_counter.h
@@ -84,6 +84,16 @@ union cpuid10_edx {
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2)
+/*
+ * We model BTS tracing as another fixed-mode PMC.
+ *
+ * We choose a value in the middle of the fixed counter range, since lower
+ * values are used by actual fixed counters and higher values are used
+ * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
+ */
+#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
+
+
#ifdef CONFIG_PERF_COUNTERS
extern void init_hw_perf_counters(void);
extern void perf_counters_lapic_init(void);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3cc06e3fceb8..9de8729c1c8f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -134,6 +134,11 @@ static inline unsigned long pte_pfn(pte_t pte)
return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
@@ -351,7 +356,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
* this macro returns the index of the entry in the pmd page which would
* control the given virtual address
*/
-static inline unsigned pmd_index(unsigned long address)
+static inline unsigned long pmd_index(unsigned long address)
{
return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}
@@ -371,7 +376,7 @@ static inline unsigned pmd_index(unsigned long address)
* this function returns the index of the entry in the pte page which would
* control the given virtual address
*/
-static inline unsigned pte_index(unsigned long address)
+static inline unsigned long pte_index(unsigned long address)
{
return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
@@ -422,11 +427,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
}
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
- return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
-}
-
static inline int pud_large(pud_t pud)
{
return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -462,7 +462,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
/* to find an entry in a page-table-directory. */
-static inline unsigned pud_index(unsigned long address)
+static inline unsigned long pud_index(unsigned long address)
{
return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
}
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
index 263d397d2eef..75af592677ec 100644
--- a/arch/x86/include/asm/scatterlist.h
+++ b/arch/x86/include/asm/scatterlist.h
@@ -1,33 +1,8 @@
#ifndef _ASM_X86_SCATTERLIST_H
#define _ASM_X86_SCATTERLIST_H
-#include <asm/types.h>
-
-struct scatterlist {
-#ifdef CONFIG_DEBUG_SG
- unsigned long sg_magic;
-#endif
- unsigned long page_link;
- unsigned int offset;
- unsigned int length;
- dma_addr_t dma_address;
- unsigned int dma_length;
-};
-
-#define ARCH_HAS_SG_CHAIN
#define ISA_DMA_THRESHOLD (0x00ffffff)
-/*
- * These macros should be used after a pci_map_sg call has been done
- * to get bus addresses of each of the SG entries and their lengths.
- * You should only work with the number of sg entries pci_map_sg
- * returns.
- */
-#define sg_dma_address(sg) ((sg)->dma_address)
-#ifdef CONFIG_X86_32
-# define sg_dma_len(sg) ((sg)->length)
-#else
-# define sg_dma_len(sg) ((sg)->dma_length)
-#endif
+#include <asm-generic/scatterlist.h>
#endif /* _ASM_X86_SCATTERLIST_H */
diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/asm/shmbuf.h
index b51413b74971..83c05fc2de38 100644
--- a/arch/x86/include/asm/shmbuf.h
+++ b/arch/x86/include/asm/shmbuf.h
@@ -1,51 +1 @@
-#ifndef _ASM_X86_SHMBUF_H
-#define _ASM_X86_SHMBUF_H
-
-/*
- * The shmid64_ds structure for x86 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space on 32 bit is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- *
- * Pad space on 64 bit is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct shmid64_ds {
- struct ipc64_perm shm_perm; /* operation perms */
- size_t shm_segsz; /* size of segment (bytes) */
- __kernel_time_t shm_atime; /* last attach time */
-#ifdef __i386__
- unsigned long __unused1;
-#endif
- __kernel_time_t shm_dtime; /* last detach time */
-#ifdef __i386__
- unsigned long __unused2;
-#endif
- __kernel_time_t shm_ctime; /* last change time */
-#ifdef __i386__
- unsigned long __unused3;
-#endif
- __kernel_pid_t shm_cpid; /* pid of creator */
- __kernel_pid_t shm_lpid; /* pid of last operator */
- unsigned long shm_nattch; /* no. of current attaches */
- unsigned long __unused4;
- unsigned long __unused5;
-};
-
-struct shminfo64 {
- unsigned long shmmax;
- unsigned long shmmin;
- unsigned long shmmni;
- unsigned long shmseg;
- unsigned long shmall;
- unsigned long __unused1;
- unsigned long __unused2;
- unsigned long __unused3;
- unsigned long __unused4;
-};
-
-#endif /* _ASM_X86_SHMBUF_H */
+#include <asm-generic/shmbuf.h>
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index b2a8c74f2d06..6b71384b9d8b 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -1,63 +1 @@
-#ifndef _ASM_X86_SOCKET_H
-#define _ASM_X86_SOCKET_H
-
-#include <asm/sockios.h>
-
-/* For setsockopt(2) */
-#define SOL_SOCKET 1
-
-#define SO_DEBUG 1
-#define SO_REUSEADDR 2
-#define SO_TYPE 3
-#define SO_ERROR 4
-#define SO_DONTROUTE 5
-#define SO_BROADCAST 6
-#define SO_SNDBUF 7
-#define SO_RCVBUF 8
-#define SO_SNDBUFFORCE 32
-#define SO_RCVBUFFORCE 33
-#define SO_KEEPALIVE 9
-#define SO_OOBINLINE 10
-#define SO_NO_CHECK 11
-#define SO_PRIORITY 12
-#define SO_LINGER 13
-#define SO_BSDCOMPAT 14
-/* To add :#define SO_REUSEPORT 15 */
-#define SO_PASSCRED 16
-#define SO_PEERCRED 17
-#define SO_RCVLOWAT 18
-#define SO_SNDLOWAT 19
-#define SO_RCVTIMEO 20
-#define SO_SNDTIMEO 21
-
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION 22
-#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
-#define SO_SECURITY_ENCRYPTION_NETWORK 24
-
-#define SO_BINDTODEVICE 25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER 26
-#define SO_DETACH_FILTER 27
-
-#define SO_PEERNAME 28
-#define SO_TIMESTAMP 29
-#define SCM_TIMESTAMP SO_TIMESTAMP
-
-#define SO_ACCEPTCONN 30
-
-#define SO_PEERSEC 31
-#define SO_PASSSEC 34
-#define SO_TIMESTAMPNS 35
-#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
-
-#define SO_MARK 36
-
-#define SO_TIMESTAMPING 37
-#define SCM_TIMESTAMPING SO_TIMESTAMPING
-
-#define SO_PROTOCOL 38
-#define SO_DOMAIN 39
-
-#endif /* _ASM_X86_SOCKET_H */
+#include <asm-generic/socket.h>
diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/asm/sockios.h
index 49cc72b5d3c9..def6d4746ee7 100644
--- a/arch/x86/include/asm/sockios.h
+++ b/arch/x86/include/asm/sockios.h
@@ -1,13 +1 @@
-#ifndef _ASM_X86_SOCKIOS_H
-#define _ASM_X86_SOCKIOS_H
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif /* _ASM_X86_SOCKIOS_H */
+#include <asm-generic/sockios.h>
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index c2d742c6e15f..44efdff3975d 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -48,7 +48,7 @@
* head_32 for boot CPU and setup_per_cpu_areas() for others.
*/
#define GDT_STACK_CANARY_INIT \
- [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
+ [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
/*
* Initialize the stackprotector canary value.
@@ -90,9 +90,7 @@ static inline void setup_stack_canary_segment(int cpu)
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
- desc.base0 = canary & 0xffff;
- desc.base1 = (canary >> 16) & 0xff;
- desc.base2 = (canary >> 24) & 0xff;
+ set_desc_base(&desc, canary);
write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
#endif
}
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 643c59b4bc6e..75c49c782e20 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -150,33 +150,6 @@ do { \
#endif
#ifdef __KERNEL__
-#define _set_base(addr, base) do { unsigned long __pr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
- "rorl $16,%%edx\n\t" \
- "movb %%dl,%2\n\t" \
- "movb %%dh,%3" \
- :"=&d" (__pr) \
- :"m" (*((addr)+2)), \
- "m" (*((addr)+4)), \
- "m" (*((addr)+7)), \
- "0" (base) \
- ); } while (0)
-
-#define _set_limit(addr, limit) do { unsigned long __lr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
- "rorl $16,%%edx\n\t" \
- "movb %2,%%dh\n\t" \
- "andb $0xf0,%%dh\n\t" \
- "orb %%dh,%%dl\n\t" \
- "movb %%dl,%2" \
- :"=&d" (__lr) \
- :"m" (*(addr)), \
- "m" (*((addr)+6)), \
- "0" (limit) \
- ); } while (0)
-
-#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
-#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
extern void native_load_gs_index(unsigned);
diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/asm/termbits.h
index af1b70ea440f..3935b106de79 100644
--- a/arch/x86/include/asm/termbits.h
+++ b/arch/x86/include/asm/termbits.h
@@ -1,198 +1 @@
-#ifndef _ASM_X86_TERMBITS_H
-#define _ASM_X86_TERMBITS_H
-
-#include <linux/posix_types.h>
-
-typedef unsigned char cc_t;
-typedef unsigned int speed_t;
-typedef unsigned int tcflag_t;
-
-#define NCCS 19
-struct termios {
- tcflag_t c_iflag; /* input mode flags */
- tcflag_t c_oflag; /* output mode flags */
- tcflag_t c_cflag; /* control mode flags */
- tcflag_t c_lflag; /* local mode flags */
- cc_t c_line; /* line discipline */
- cc_t c_cc[NCCS]; /* control characters */
-};
-
-struct termios2 {
- tcflag_t c_iflag; /* input mode flags */
- tcflag_t c_oflag; /* output mode flags */
- tcflag_t c_cflag; /* control mode flags */
- tcflag_t c_lflag; /* local mode flags */
- cc_t c_line; /* line discipline */
- cc_t c_cc[NCCS]; /* control characters */
- speed_t c_ispeed; /* input speed */
- speed_t c_ospeed; /* output speed */
-};
-
-struct ktermios {
- tcflag_t c_iflag; /* input mode flags */
- tcflag_t c_oflag; /* output mode flags */
- tcflag_t c_cflag; /* control mode flags */
- tcflag_t c_lflag; /* local mode flags */
- cc_t c_line; /* line discipline */
- cc_t c_cc[NCCS]; /* control characters */
- speed_t c_ispeed; /* input speed */
- speed_t c_ospeed; /* output speed */
-};
-
-/* c_cc characters */
-#define VINTR 0
-#define VQUIT 1
-#define VERASE 2
-#define VKILL 3
-#define VEOF 4
-#define VTIME 5
-#define VMIN 6
-#define VSWTC 7
-#define VSTART 8
-#define VSTOP 9
-#define VSUSP 10
-#define VEOL 11
-#define VREPRINT 12
-#define VDISCARD 13
-#define VWERASE 14
-#define VLNEXT 15
-#define VEOL2 16
-
-/* c_iflag bits */
-#define IGNBRK 0000001
-#define BRKINT 0000002
-#define IGNPAR 0000004
-#define PARMRK 0000010
-#define INPCK 0000020
-#define ISTRIP 0000040
-#define INLCR 0000100
-#define IGNCR 0000200
-#define ICRNL 0000400
-#define IUCLC 0001000
-#define IXON 0002000
-#define IXANY 0004000
-#define IXOFF 0010000
-#define IMAXBEL 0020000
-#define IUTF8 0040000
-
-/* c_oflag bits */
-#define OPOST 0000001
-#define OLCUC 0000002
-#define ONLCR 0000004
-#define OCRNL 0000010
-#define ONOCR 0000020
-#define ONLRET 0000040
-#define OFILL 0000100
-#define OFDEL 0000200
-#define NLDLY 0000400
-#define NL0 0000000
-#define NL1 0000400
-#define CRDLY 0003000
-#define CR0 0000000
-#define CR1 0001000
-#define CR2 0002000
-#define CR3 0003000
-#define TABDLY 0014000
-#define TAB0 0000000
-#define TAB1 0004000
-#define TAB2 0010000
-#define TAB3 0014000
-#define XTABS 0014000
-#define BSDLY 0020000
-#define BS0 0000000
-#define BS1 0020000
-#define VTDLY 0040000
-#define VT0 0000000
-#define VT1 0040000
-#define FFDLY 0100000
-#define FF0 0000000
-#define FF1 0100000
-
-/* c_cflag bit meaning */
-#define CBAUD 0010017
-#define B0 0000000 /* hang up */
-#define B50 0000001
-#define B75 0000002
-#define B110 0000003
-#define B134 0000004
-#define B150 0000005
-#define B200 0000006
-#define B300 0000007
-#define B600 0000010
-#define B1200 0000011
-#define B1800 0000012
-#define B2400 0000013
-#define B4800 0000014
-#define B9600 0000015
-#define B19200 0000016
-#define B38400 0000017
-#define EXTA B19200
-#define EXTB B38400
-#define CSIZE 0000060
-#define CS5 0000000
-#define CS6 0000020
-#define CS7 0000040
-#define CS8 0000060
-#define CSTOPB 0000100
-#define CREAD 0000200
-#define PARENB 0000400
-#define PARODD 0001000
-#define HUPCL 0002000
-#define CLOCAL 0004000
-#define CBAUDEX 0010000
-#define BOTHER 0010000 /* non standard rate */
-#define B57600 0010001
-#define B115200 0010002
-#define B230400 0010003
-#define B460800 0010004
-#define B500000 0010005
-#define B576000 0010006
-#define B921600 0010007
-#define B1000000 0010010
-#define B1152000 0010011
-#define B1500000 0010012
-#define B2000000 0010013
-#define B2500000 0010014
-#define B3000000 0010015
-#define B3500000 0010016
-#define B4000000 0010017
-#define CIBAUD 002003600000 /* input baud rate */
-#define CMSPAR 010000000000 /* mark or space (stick) parity */
-#define CRTSCTS 020000000000 /* flow control */
-
-#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */
-
-/* c_lflag bits */
-#define ISIG 0000001
-#define ICANON 0000002
-#define XCASE 0000004
-#define ECHO 0000010
-#define ECHOE 0000020
-#define ECHOK 0000040
-#define ECHONL 0000100
-#define NOFLSH 0000200
-#define TOSTOP 0000400
-#define ECHOCTL 0001000
-#define ECHOPRT 0002000
-#define ECHOKE 0004000
-#define FLUSHO 0010000
-#define PENDIN 0040000
-#define IEXTEN 0100000
-
-/* tcflow() and TCXONC use these */
-#define TCOOFF 0
-#define TCOON 1
-#define TCIOFF 2
-#define TCION 3
-
-/* tcflush() and TCFLSH use these */
-#define TCIFLUSH 0
-#define TCOFLUSH 1
-#define TCIOFLUSH 2
-
-/* tcsetattr uses these */
-#define TCSANOW 0
-#define TCSADRAIN 1
-#define TCSAFLUSH 2
-
-#endif /* _ASM_X86_TERMBITS_H */
+#include <asm-generic/termbits.h>
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h
index c4ee8056baca..280d78a9d966 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/asm/termios.h
@@ -1,114 +1 @@
-#ifndef _ASM_X86_TERMIOS_H
-#define _ASM_X86_TERMIOS_H
-
-#include <asm/termbits.h>
-#include <asm/ioctls.h>
-
-struct winsize {
- unsigned short ws_row;
- unsigned short ws_col;
- unsigned short ws_xpixel;
- unsigned short ws_ypixel;
-};
-
-#define NCC 8
-struct termio {
- unsigned short c_iflag; /* input mode flags */
- unsigned short c_oflag; /* output mode flags */
- unsigned short c_cflag; /* control mode flags */
- unsigned short c_lflag; /* local mode flags */
- unsigned char c_line; /* line discipline */
- unsigned char c_cc[NCC]; /* control characters */
-};
-
-/* modem lines */
-#define TIOCM_LE 0x001
-#define TIOCM_DTR 0x002
-#define TIOCM_RTS 0x004
-#define TIOCM_ST 0x008
-#define TIOCM_SR 0x010
-#define TIOCM_CTS 0x020
-#define TIOCM_CAR 0x040
-#define TIOCM_RNG 0x080
-#define TIOCM_DSR 0x100
-#define TIOCM_CD TIOCM_CAR
-#define TIOCM_RI TIOCM_RNG
-#define TIOCM_OUT1 0x2000
-#define TIOCM_OUT2 0x4000
-#define TIOCM_LOOP 0x8000
-
-/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
-
-#ifdef __KERNEL__
-
-#include <asm/uaccess.h>
-
-/* intr=^C quit=^\ erase=del kill=^U
- eof=^D vtime=\0 vmin=\1 sxtc=\0
- start=^Q stop=^S susp=^Z eol=\0
- reprint=^R discard=^U werase=^W lnext=^V
- eol2=\0
-*/
-#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
-
-/*
- * Translate a "termio" structure into a "termios". Ugh.
- */
-#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
- unsigned short __tmp; \
- get_user(__tmp,&(termio)->x); \
- *(unsigned short *) &(termios)->x = __tmp; \
-}
-
-static inline int user_termio_to_kernel_termios(struct ktermios *termios,
- struct termio __user *termio)
-{
- SET_LOW_TERMIOS_BITS(termios, termio, c_iflag);
- SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);
- SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);
- SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);
- get_user(termios->c_line, &termio->c_line);
- return copy_from_user(termios->c_cc, termio->c_cc, NCC);
-}
-
-/*
- * Translate a "termios" structure into a "termio". Ugh.
- */
-static inline int kernel_termios_to_user_termio(struct termio __user *termio,
- struct ktermios *termios)
-{
- put_user((termios)->c_iflag, &(termio)->c_iflag);
- put_user((termios)->c_oflag, &(termio)->c_oflag);
- put_user((termios)->c_cflag, &(termio)->c_cflag);
- put_user((termios)->c_lflag, &(termio)->c_lflag);
- put_user((termios)->c_line, &(termio)->c_line);
- return copy_to_user((termio)->c_cc, (termios)->c_cc, NCC);
-}
-
-static inline int user_termios_to_kernel_termios(struct ktermios *k,
- struct termios2 __user *u)
-{
- return copy_from_user(k, u, sizeof(struct termios2));
-}
-
-static inline int kernel_termios_to_user_termios(struct termios2 __user *u,
- struct ktermios *k)
-{
- return copy_to_user(u, k, sizeof(struct termios2));
-}
-
-static inline int user_termios_to_kernel_termios_1(struct ktermios *k,
- struct termios __user *u)
-{
- return copy_from_user(k, u, sizeof(struct termios));
-}
-
-static inline int kernel_termios_to_user_termios_1(struct termios __user *u,
- struct ktermios *k)
-{
- return copy_to_user(u, k, sizeof(struct termios));
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_TERMIOS_H */
+#include <asm-generic/termios.h>
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f26432ac1c57..ae0f88de09ab 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -218,7 +218,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
- ti = (void *)(percpu_read(kernel_stack) +
+ ti = (void *)(percpu_read_stable(kernel_stack) +
KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
}
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index bfd74c032fca..4da91ad69e0d 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -81,9 +81,7 @@ extern int panic_on_unrecovered_nmi;
void math_error(void __user *);
void math_emulate(struct math_emu_info *);
-#ifdef CONFIG_X86_32
-unsigned long patch_espfix_desc(unsigned long, unsigned long);
-#else
+#ifndef CONFIG_X86_32
asmlinkage void smp_thermal_interrupt(void);
asmlinkage void mce_threshold_interrupt(void);
#endif
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h
index 09b97745772f..df1da20f4534 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/asm/types.h
@@ -1,19 +1,11 @@
#ifndef _ASM_X86_TYPES_H
#define _ASM_X86_TYPES_H
-#include <asm-generic/int-ll64.h>
+#define dma_addr_t dma_addr_t
-#ifndef __ASSEMBLY__
-
-typedef unsigned short umode_t;
+#include <asm-generic/types.h>
-#endif /* __ASSEMBLY__ */
-
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
#ifdef __KERNEL__
-
#ifndef __ASSEMBLY__
typedef u64 dma64_addr_t;
diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/asm/ucontext.h
index 87324cf439d9..b7c29c8017f2 100644
--- a/arch/x86/include/asm/ucontext.h
+++ b/arch/x86/include/asm/ucontext.h
@@ -7,12 +7,6 @@
* sigcontext struct (uc_mcontext).
*/
-struct ucontext {
- unsigned long uc_flags;
- struct ucontext *uc_link;
- stack_t uc_stack;
- struct sigcontext uc_mcontext;
- sigset_t uc_sigmask; /* mask last for extensibility */
-};
+#include <asm-generic/ucontext.h>
#endif /* _ASM_X86_UCONTEXT_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a7f9e89741c2..d29eba3c04d2 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1157,9 +1157,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
* If MPS is present, it will handle them,
* otherwise the system will stay in PIC mode
*/
- if (acpi_disabled || acpi_noirq) {
+ if (acpi_disabled || acpi_noirq)
return -ENODEV;
- }
if (!cpu_has_apic)
return -ENODEV;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0a1c2830ec66..de039fcdd053 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -49,6 +49,7 @@
#include <asm/mtrr.h>
#include <asm/smp.h>
#include <asm/mce.h>
+#include <asm/kvm_para.h>
unsigned int num_processors;
@@ -1361,52 +1362,76 @@ void enable_x2apic(void)
}
#endif /* CONFIG_X86_X2APIC */
-void __init enable_IR_x2apic(void)
+int __init enable_IR(void)
{
#ifdef CONFIG_INTR_REMAP
int ret;
- unsigned long flags;
- struct IO_APIC_route_entry **ioapic_entries = NULL;
ret = dmar_table_init();
if (ret) {
pr_debug("dmar_table_init() failed with %d:\n", ret);
- goto ir_failed;
+ return 0;
}
if (!intr_remapping_supported()) {
pr_debug("intr-remapping not supported\n");
- goto ir_failed;
+ return 0;
}
-
if (!x2apic_preenabled && skip_ioapic_setup) {
pr_info("Skipped enabling intr-remap because of skipping "
"io-apic setup\n");
- return;
+ return 0;
}
+ if (enable_intr_remapping(x2apic_supported()))
+ return 0;
+
+ pr_info("Enabled Interrupt-remapping\n");
+
+ return 1;
+
+#endif
+ return 0;
+}
+
+void __init enable_IR_x2apic(void)
+{
+ unsigned long flags;
+ struct IO_APIC_route_entry **ioapic_entries = NULL;
+ int ret, x2apic_enabled = 0;
+
ioapic_entries = alloc_ioapic_entries();
if (!ioapic_entries) {
- pr_info("Allocate ioapic_entries failed: %d\n", ret);
- goto end;
+ pr_err("Allocate ioapic_entries failed\n");
+ goto out;
}
ret = save_IO_APIC_setup(ioapic_entries);
if (ret) {
pr_info("Saving IO-APIC state failed: %d\n", ret);
- goto end;
+ goto out;
}
local_irq_save(flags);
- mask_IO_APIC_setup(ioapic_entries);
mask_8259A();
+ mask_IO_APIC_setup(ioapic_entries);
- ret = enable_intr_remapping(x2apic_supported());
- if (ret)
- goto end_restore;
+ ret = enable_IR();
+ if (!ret) {
+ /* IR is required if there is APIC ID > 255 even when running
+ * under KVM
+ */
+ if (max_physical_apicid > 255 || !kvm_para_available())
+ goto nox2apic;
+ /*
+ * without IR all CPUs can be addressed by IOAPIC/MSI
+ * only in physical mode
+ */
+ x2apic_force_phys();
+ }
- pr_info("Enabled Interrupt-remapping\n");
+ x2apic_enabled = 1;
if (x2apic_supported() && !x2apic_mode) {
x2apic_mode = 1;
@@ -1414,41 +1439,25 @@ void __init enable_IR_x2apic(void)
pr_info("Enabled x2apic\n");
}
-end_restore:
- if (ret)
- /*
- * IR enabling failed
- */
+nox2apic:
+ if (!ret) /* IR enabling failed */
restore_IO_APIC_setup(ioapic_entries);
-
unmask_8259A();
local_irq_restore(flags);
-end:
+out:
if (ioapic_entries)
free_ioapic_entries(ioapic_entries);
- if (!ret)
+ if (x2apic_enabled)
return;
-ir_failed:
if (x2apic_preenabled)
- panic("x2apic enabled by bios. But IR enabling failed");
+ panic("x2apic: enabled by BIOS but kernel init failed.");
else if (cpu_has_x2apic)
- pr_info("Not enabling x2apic,Intr-remapping\n");
-#else
- if (!cpu_has_x2apic)
- return;
-
- if (x2apic_preenabled)
- panic("x2apic enabled prior OS handover,"
- " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
-#endif
-
- return;
+ pr_info("Not enabling x2apic, Intr-remapping init failed.\n");
}
-
#ifdef CONFIG_X86_64
/*
* Detect and enable local APICs on non-SMP boards.
@@ -1651,7 +1660,6 @@ int __init APIC_init_uniprocessor(void)
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
boot_cpu_physical_apicid);
- clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
return -1;
}
#endif
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7d52343356e8..78ca97fffa94 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -66,6 +66,8 @@
#include <asm/apic.h>
#define __apicdebuginit(type) static type __init
+#define for_each_irq_pin(entry, head) \
+ for (entry = head; entry; entry = entry->next)
/*
* Is the SiS APIC rmw bug present ?
@@ -116,15 +118,6 @@ static int __init parse_noapic(char *str)
}
early_param("noapic", parse_noapic);
-struct irq_pin_list;
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
struct irq_pin_list {
int apic, pin;
struct irq_pin_list *next;
@@ -139,6 +132,11 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
return pin;
}
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
struct irq_cfg {
struct irq_pin_list *irq_2_pin;
cpumask_var_t domain;
@@ -411,13 +409,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
unsigned long flags;
spin_lock_irqsave(&ioapic_lock, flags);
- entry = cfg->irq_2_pin;
- for (;;) {
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
int pin;
- if (!entry)
- break;
pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin*2);
/* Is the remote IRR bit set? */
@@ -425,9 +420,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
spin_unlock_irqrestore(&ioapic_lock, flags);
return true;
}
- if (!entry->next)
- break;
- entry = entry->next;
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -495,72 +487,68 @@ static void ioapic_mask_entry(int apic, int pin)
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static int
+add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
{
- struct irq_pin_list *entry;
-
- entry = cfg->irq_2_pin;
- if (!entry) {
- entry = get_one_free_irq_2_pin(node);
- if (!entry) {
- printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
- apic, pin);
- return;
- }
- cfg->irq_2_pin = entry;
- entry->apic = apic;
- entry->pin = pin;
- return;
- }
+ struct irq_pin_list **last, *entry;
- while (entry->next) {
- /* not again, please */
+ /* don't allow duplicates */
+ last = &cfg->irq_2_pin;
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
if (entry->apic == apic && entry->pin == pin)
- return;
-
- entry = entry->next;
+ return 0;
+ last = &entry->next;
}
- entry->next = get_one_free_irq_2_pin(node);
- entry = entry->next;
+ entry = get_one_free_irq_2_pin(node);
+ if (!entry) {
+ printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
+ node, apic, pin);
+ return -ENOMEM;
+ }
entry->apic = apic;
entry->pin = pin;
+
+ *last = entry;
+ return 0;
+}
+
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+ if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+ panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
}
/*
* Reroute an IRQ to a different pin.
*/
static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
- int oldapic, int oldpin,
- int newapic, int newpin)
+ int oldapic, int oldpin,
+ int newapic, int newpin)
{
- struct irq_pin_list *entry = cfg->irq_2_pin;
- int replaced = 0;
+ struct irq_pin_list *entry;
- while (entry) {
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
if (entry->apic == oldapic && entry->pin == oldpin) {
entry->apic = newapic;
entry->pin = newpin;
- replaced = 1;
/* every one is different, right? */
- break;
+ return;
}
- entry = entry->next;
}
- /* why? call replace before add? */
- if (!replaced)
- add_pin_to_irq_node(cfg, node, newapic, newpin);
+ /* old apic/pin didn't exist, so just add new ones */
+ add_pin_to_irq_node(cfg, node, newapic, newpin);
}
-static inline void io_apic_modify_irq(struct irq_cfg *cfg,
- int mask_and, int mask_or,
- void (*final)(struct irq_pin_list *entry))
+static void io_apic_modify_irq(struct irq_cfg *cfg,
+ int mask_and, int mask_or,
+ void (*final)(struct irq_pin_list *entry))
{
int pin;
struct irq_pin_list *entry;
- for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin * 2);
@@ -577,7 +565,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
-#ifdef CONFIG_X86_64
static void io_apic_sync(struct irq_pin_list *entry)
{
/*
@@ -593,11 +580,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
{
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
-#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
-}
static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
{
@@ -610,7 +592,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
-#endif /* CONFIG_X86_32 */
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
@@ -1699,12 +1680,8 @@ __apicdebuginit(void) print_IO_APIC(void)
if (!entry)
continue;
printk(KERN_DEBUG "IRQ%d ", irq);
- for (;;) {
+ for_each_irq_pin(entry, cfg->irq_2_pin)
printk("-> %d:%d", entry->apic, entry->pin);
- if (!entry->next)
- break;
- entry = entry->next;
- }
printk("\n");
}
@@ -2208,7 +2185,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
return was_pending;
}
-#ifdef CONFIG_X86_64
static int ioapic_retrigger_irq(unsigned int irq)
{
@@ -2221,14 +2197,6 @@ static int ioapic_retrigger_irq(unsigned int irq)
return 1;
}
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
- apic->send_IPI_self(irq_cfg(irq)->vector);
-
- return 1;
-}
-#endif
/*
* Level and edge triggered IO-APIC interrupts need different handling,
@@ -2266,13 +2234,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
struct irq_pin_list *entry;
u8 vector = cfg->vector;
- entry = cfg->irq_2_pin;
- for (;;) {
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
- if (!entry)
- break;
-
apic = entry->apic;
pin = entry->pin;
/*
@@ -2285,9 +2249,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
io_apic_modify(apic, 0x10 + pin*2, reg);
- if (!entry->next)
- break;
- entry = entry->next;
}
}
@@ -2512,11 +2473,8 @@ atomic_t irq_mis_count;
static void ack_apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
-
-#ifdef CONFIG_X86_32
unsigned long v;
int i;
-#endif
struct irq_cfg *cfg;
int do_unmask_irq = 0;
@@ -2529,31 +2487,28 @@ static void ack_apic_level(unsigned int irq)
}
#endif
-#ifdef CONFIG_X86_32
/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets). Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless. As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source. The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually. We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt. We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul. --macro
- */
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets). Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless. As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source. The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually. We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt. We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul. --macro
+ */
cfg = desc->chip_data;
i = cfg->vector;
-
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-#endif
/*
* We must acknowledge the irq before we move it or the acknowledge will
@@ -2595,7 +2550,7 @@ static void ack_apic_level(unsigned int irq)
unmask_IO_APIC_irq_desc(desc);
}
-#ifdef CONFIG_X86_32
+ /* Tail end of version 0x11 I/O APIC bug workaround */
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
@@ -2603,26 +2558,15 @@ static void ack_apic_level(unsigned int irq)
__unmask_and_level_IO_APIC_irq(cfg);
spin_unlock(&ioapic_lock);
}
-#endif
}
#ifdef CONFIG_INTR_REMAP
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
- int apic, pin;
struct irq_pin_list *entry;
- entry = cfg->irq_2_pin;
- for (;;) {
-
- if (!entry)
- break;
-
- apic = entry->apic;
- pin = entry->pin;
- io_apic_eoi(apic, pin);
- entry = entry->next;
- }
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ io_apic_eoi(entry->apic, entry->pin);
}
static void
@@ -3238,8 +3182,7 @@ void destroy_irq(unsigned int irq)
cfg = desc->chip_data;
dynamic_irq_cleanup(irq);
/* connect back irq_cfg */
- if (desc)
- desc->chip_data = cfg;
+ desc->chip_data = cfg;
free_irte(irq);
spin_lock_irqsave(&vector_lock, flags);
@@ -3909,7 +3852,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
*/
if (irq >= NR_IRQS_LEGACY) {
cfg = desc->chip_data;
- add_pin_to_irq_node(cfg, node, ioapic, pin);
+ if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+ printk(KERN_INFO "can not add pin %d for irq %d\n",
+ pin, irq);
+ return 0;
+ }
}
setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index dbf5445727a9..e6b4f517fcfe 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -150,7 +150,7 @@ int safe_smp_processor_id(void)
{
int apicid, cpuid;
- if (!boot_cpu_has(X86_FEATURE_APIC))
+ if (!cpu_has_apic)
return 0;
apicid = hard_smp_processor_id();
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index b3025b43b63a..cb66a22d98ad 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -39,7 +39,7 @@
int unknown_nmi_panic;
int nmi_watchdog_enabled;
-static cpumask_var_t backtrace_mask;
+static cpumask_t backtrace_mask __read_mostly;
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
static inline int mce_in_progress(void)
{
-#if defined(CONFIG_X86_NEW_MCE)
+#if defined(CONFIG_X86_MCE)
return atomic_read(&mce_entry) > 0;
#endif
return 0;
@@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void)
if (!prev_nmi_count)
goto error;
- alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO);
printk(KERN_INFO "Testing NMI watchdog ... ");
#ifdef CONFIG_SMP
@@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
}
/* We can be called before check_nmi_watchdog, hence NULL check. */
- if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) {
+ if (cpumask_test_cpu(cpu, &backtrace_mask)) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+ show_regs(regs);
dump_stack();
spin_unlock(&lock);
- cpumask_clear_cpu(cpu, backtrace_mask);
+ cpumask_clear_cpu(cpu, &backtrace_mask);
+
+ rc = 1;
}
/* Could check oops_in_progress here too, but it's safer not to */
@@ -552,14 +554,18 @@ int do_nmi_callback(struct pt_regs *regs, int cpu)
return 0;
}
-void __trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(void)
{
int i;
- cpumask_copy(backtrace_mask, cpu_online_mask);
+ cpumask_copy(&backtrace_mask, cpu_online_mask);
+
+ printk(KERN_INFO "sending NMI to all CPUs:\n");
+ apic->send_IPI_all(NMI_VECTOR);
+
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
- if (cpumask_empty(backtrace_mask))
+ if (cpumask_empty(&backtrace_mask))
break;
mdelay(1);
}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index bc3e880f9b82..f3b1037076e4 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -50,11 +50,11 @@ static struct apic *apic_probe[] __initdata = {
void __init default_setup_apic_routing(void)
{
#ifdef CONFIG_X86_X2APIC
- if (x2apic_mode && (apic != &apic_x2apic_phys &&
+ if (x2apic_mode
#ifdef CONFIG_X86_UV
- apic != &apic_x2apic_uv_x &&
+ && apic != &apic_x2apic_uv_x
#endif
- apic != &apic_x2apic_cluster)) {
+ ) {
if (x2apic_phys)
apic = &apic_x2apic_phys;
else
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 442b5508893f..bfe0815c4f96 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -403,7 +403,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
static struct apm_user *user_list;
static DEFINE_SPINLOCK(user_list_lock);
-static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } };
+static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, 0, 0);
static const char driver_version[] = "1.16ac"; /* no spaces */
@@ -2337,8 +2337,8 @@ static int __init apm_init(void)
* This is for buggy BIOS's that refer to (real mode) segment 0x40
* even though they are called in protected mode.
*/
- set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
- _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
+ set_desc_base(&bad_bios_desc, (unsigned long)__va(0x40UL << 4));
+ set_desc_limit(&bad_bios_desc, 4095 - (0x40 << 4));
/*
* Set up the long jump entry point to the APM BIOS, which is called
@@ -2358,12 +2358,12 @@ static int __init apm_init(void)
* code to that CPU.
*/
gdt = get_cpu_gdt_table(0);
- set_base(gdt[APM_CS >> 3],
- __va((unsigned long)apm_info.bios.cseg << 4));
- set_base(gdt[APM_CS_16 >> 3],
- __va((unsigned long)apm_info.bios.cseg_16 << 4));
- set_base(gdt[APM_DS >> 3],
- __va((unsigned long)apm_info.bios.dseg << 4));
+ set_desc_base(&gdt[APM_CS >> 3],
+ (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
+ set_desc_base(&gdt[APM_CS_16 >> 3],
+ (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4));
+ set_desc_base(&gdt[APM_DS >> 3],
+ (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4));
proc_create("apm", 0, NULL, &apm_file_ops);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 82c1b37d38f4..0e05b9f7b8ca 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -2,7 +2,7 @@
#include <linux/bitops.h>
#include <linux/mm.h>
-#include <asm/io.h>
+#include <linux/io.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
@@ -45,8 +45,8 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
#define CBAR_ENB (0x80000000)
#define CBAR_KEY (0X000000CB)
if (c->x86_model == 9 || c->x86_model == 10) {
- if (inl (CBAR) & CBAR_ENB)
- outl (0 | CBAR_KEY, CBAR);
+ if (inl(CBAR) & CBAR_ENB)
+ outl(0 | CBAR_KEY, CBAR);
}
}
@@ -87,9 +87,10 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
d = d2-d;
if (d > 20*K6_BUG_LOOP)
- printk("system stability may be impaired when more than 32 MB are used.\n");
+ printk(KERN_CONT
+ "system stability may be impaired when more than 32 MB are used.\n");
else
- printk("probably OK (after B9730xxxx).\n");
+ printk(KERN_CONT "probably OK (after B9730xxxx).\n");
printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
}
@@ -219,8 +220,9 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff00000) != 0x20000000) {
- printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
- ((l & 0x000fffff)|0x20000000));
+ printk(KERN_INFO
+ "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+ l, ((l & 0x000fffff)|0x20000000));
wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
}
}
@@ -398,7 +400,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
u32 level;
level = cpuid_eax(1);
- if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
+ if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}
if (c->x86 == 0x10 || c->x86 == 0x11)
@@ -487,27 +489,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
* benefit in doing so.
*/
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
- printk(KERN_DEBUG "tseg: %010llx\n", tseg);
- if ((tseg>>PMD_SHIFT) <
+ printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+ if ((tseg>>PMD_SHIFT) <
(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
- ((tseg>>PMD_SHIFT) <
+ ((tseg>>PMD_SHIFT) <
(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
- (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
- set_memory_4k((unsigned long)__va(tseg), 1);
+ (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
+ set_memory_4k((unsigned long)__va(tseg), 1);
}
}
#endif
}
#ifdef CONFIG_X86_32
-static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
+static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
+ unsigned int size)
{
/* AMD errata T13 (order #21922) */
if ((c->x86 == 6)) {
- if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */
+ /* Duron Rev A0 */
+ if (c->x86_model == 3 && c->x86_mask == 0)
size = 64;
+ /* Tbird rev A1/A2 */
if (c->x86_model == 4 &&
- (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */
+ (c->x86_mask == 0 || c->x86_mask == 1))
size = 256;
}
return size;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c8e315f1aa83..01a265212395 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -81,7 +81,7 @@ static void __init check_fpu(void)
boot_cpu_data.fdiv_bug = fdiv_bug;
if (boot_cpu_data.fdiv_bug)
- printk("Hmm, FPU with FDIV bug.\n");
+ printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n");
}
static void __init check_hlt(void)
@@ -98,7 +98,7 @@ static void __init check_hlt(void)
halt();
halt();
halt();
- printk("OK.\n");
+ printk(KERN_CONT "OK.\n");
}
/*
@@ -122,9 +122,9 @@ static void __init check_popad(void)
* CPU hard. Too bad.
*/
if (res != 12345678)
- printk("Buggy.\n");
+ printk(KERN_CONT "Buggy.\n");
else
- printk("OK.\n");
+ printk(KERN_CONT "OK.\n");
#endif
}
@@ -156,7 +156,7 @@ void __init check_bugs(void)
{
identify_boot_cpu();
#ifndef CONFIG_SMP
- printk("CPU: ");
+ printk(KERN_INFO "CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
check_config();
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 9a3ed0649d4e..04f0fe5af83e 100644
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -15,7 +15,7 @@ void __init check_bugs(void)
{
identify_boot_cpu();
#if !defined(CONFIG_SMP)
- printk("CPU: ");
+ printk(KERN_INFO "CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
alternative_instructions();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9a..54a3ead5cfb2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -18,8 +18,8 @@
#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/sections.h>
-#include <asm/topology.h>
-#include <asm/cpumask.h>
+#include <linux/topology.h>
+#include <linux/cpumask.h>
#include <asm/pgtable.h>
#include <asm/atomic.h>
#include <asm/proto.h>
@@ -28,13 +28,13 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mtrr.h>
-#include <asm/numa.h>
+#include <linux/numa.h>
#include <asm/asm.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/pat.h>
-#include <asm/smp.h>
+#include <linux/smp.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -71,45 +71,45 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
* TLS descriptors are currently at a different place compared to i386.
* Hopefully nobody expects them at a fixed place (Wine?)
*/
- [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
- [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
+ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
#else
- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
/*
* Segments used for calling PnP BIOS have byte granularity.
* They code segments and data segments have fixed 64k limits,
* the transfer segment sizes are set at run time.
*/
/* 32-bit code */
- [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
+ [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
/* 16-bit code */
- [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
+ [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
/* 16-bit data */
- [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
+ [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff),
/* 16-bit data */
- [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
+ [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0),
/* 16-bit data */
- [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
+ [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0),
/*
* The APM segments have byte granularity and their bases
* are set at run time. All have 64k limits.
*/
/* 32-bit code */
- [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
+ [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
/* 16-bit code */
- [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
+ [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
/* data */
- [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
+ [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff),
- [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
- [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
+ [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
+ [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
GDT_STACK_CANARY_INIT
#endif
} };
@@ -982,18 +982,26 @@ static __init int setup_disablecpuid(char *arg)
__setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
+struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+/*
+ * The following four percpu variables are hot. Align current_task to
+ * cacheline size such that all four fall in the same cacheline.
+ */
+DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
+ &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
/*
@@ -1008,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
};
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
- __aligned(PAGE_SIZE);
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
/* May not be marked __init: used by software suspend */
void syscall_init(void)
@@ -1042,6 +1049,9 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
#else /* CONFIG_X86_64 */
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, stack_canary);
#endif
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 593171e967ef..19807b89f058 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -3,10 +3,10 @@
#include <linux/delay.h>
#include <linux/pci.h>
#include <asm/dma.h>
-#include <asm/io.h>
+#include <linux/io.h>
#include <asm/processor-cyrix.h>
#include <asm/processor-flags.h>
-#include <asm/timer.h>
+#include <linux/timer.h>
#include <asm/pci-direct.h>
#include <asm/tsc.h>
@@ -282,7 +282,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
* The 5510/5520 companion chips have a funky PIT.
*/
if (vendor == PCI_VENDOR_ID_CYRIX &&
- (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
+ (device == PCI_DEVICE_ID_CYRIX_5510 ||
+ device == PCI_DEVICE_ID_CYRIX_5520))
mark_tsc_unstable("cyrix 5510/5520 detected");
}
#endif
@@ -299,7 +300,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
* ? : 0x7x
* GX1 : 0x8x GX1 datasheet 56
*/
- if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f))
+ if ((0x30 <= dir1 && dir1 <= 0x6f) ||
+ (0x80 <= dir1 && dir1 <= 0x8f))
geode_configure();
return;
} else { /* MediaGX */
@@ -427,9 +429,12 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
- setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
+ /* enable MAPEN */
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
+ /* enable cpuid */
+ setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);
+ /* disable MAPEN */
+ setCx86(CX86_CCR3, ccr3);
local_irq_restore(flags);
}
}
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index fb5b86af0b01..93ba8eeb100a 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -28,11 +28,10 @@
static inline void __cpuinit
detect_hypervisor_vendor(struct cpuinfo_x86 *c)
{
- if (vmware_platform()) {
+ if (vmware_platform())
c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
- } else {
+ else
c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
- }
}
unsigned long get_hypervisor_tsc_freq(void)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3260ab044996..80a722a071b5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,17 +7,17 @@
#include <linux/sched.h>
#include <linux/thread_info.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/msr.h>
-#include <asm/uaccess.h>
#include <asm/ds.h>
#include <asm/bugs.h>
#include <asm/cpu.h>
#ifdef CONFIG_X86_64
-#include <asm/topology.h>
+#include <linux/topology.h>
#include <asm/numa_64.h>
#endif
@@ -174,7 +174,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_F00F_BUG
/*
* All current models of Pentium and Pentium with MMX technology CPUs
- * have the F0 0F bug, which lets nonprivileged users lock up the system.
+ * have the F0 0F bug, which lets nonprivileged users lock up the
+ * system.
* Note that the workaround only should be initialized once...
*/
c->f00f_bug = 0;
@@ -207,7 +208,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
- wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+ wrmsr(MSR_IA32_MISC_ENABLE, lo, hi);
}
}
@@ -283,7 +284,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
/* Intel has a non-standard dependency on %ecx for this CPUID level. */
cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
- return ((eax >> 26) + 1);
+ return (eax >> 26) + 1;
else
return 1;
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 789efe217e1a..306bf0dca061 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -3,7 +3,7 @@
*
* Changes:
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
- * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
* Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
*/
@@ -16,7 +16,7 @@
#include <linux/pci.h>
#include <asm/processor.h>
-#include <asm/smp.h>
+#include <linux/smp.h>
#include <asm/k8.h>
#define LVL_1_INST 1
@@ -25,14 +25,15 @@
#define LVL_3 4
#define LVL_TRACE 5
-struct _cache_table
-{
+struct _cache_table {
unsigned char descriptor;
char cache_type;
short size;
};
-/* all the cache descriptor types we care about (no TLB or trace cache entries) */
+/* All the cache descriptor types we care about (no TLB or
+ trace cache entries) */
+
static const struct _cache_table __cpuinitconst cache_table[] =
{
{ 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
@@ -105,8 +106,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
};
-enum _cache_type
-{
+enum _cache_type {
CACHE_TYPE_NULL = 0,
CACHE_TYPE_DATA = 1,
CACHE_TYPE_INST = 2,
@@ -170,31 +170,31 @@ unsigned short num_cache_leaves;
Maybe later */
union l1_cache {
struct {
- unsigned line_size : 8;
- unsigned lines_per_tag : 8;
- unsigned assoc : 8;
- unsigned size_in_kb : 8;
+ unsigned line_size:8;
+ unsigned lines_per_tag:8;
+ unsigned assoc:8;
+ unsigned size_in_kb:8;
};
unsigned val;
};
union l2_cache {
struct {
- unsigned line_size : 8;
- unsigned lines_per_tag : 4;
- unsigned assoc : 4;
- unsigned size_in_kb : 16;
+ unsigned line_size:8;
+ unsigned lines_per_tag:4;
+ unsigned assoc:4;
+ unsigned size_in_kb:16;
};
unsigned val;
};
union l3_cache {
struct {
- unsigned line_size : 8;
- unsigned lines_per_tag : 4;
- unsigned assoc : 4;
- unsigned res : 2;
- unsigned size_encoded : 14;
+ unsigned line_size:8;
+ unsigned lines_per_tag:4;
+ unsigned assoc:4;
+ unsigned res:2;
+ unsigned size_encoded:14;
};
unsigned val;
};
@@ -350,7 +350,8 @@ static int __cpuinit find_num_cache_leaves(void)
unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
- unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
+ /* Cache sizes */
+ unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
@@ -377,8 +378,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
retval = cpuid4_cache_lookup_regs(i, &this_leaf);
if (retval >= 0) {
- switch(this_leaf.eax.split.level) {
- case 1:
+ switch (this_leaf.eax.split.level) {
+ case 1:
if (this_leaf.eax.split.type ==
CACHE_TYPE_DATA)
new_l1d = this_leaf.size/1024;
@@ -386,19 +387,20 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
CACHE_TYPE_INST)
new_l1i = this_leaf.size/1024;
break;
- case 2:
+ case 2:
new_l2 = this_leaf.size/1024;
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
index_msb = get_count_order(num_threads_sharing);
l2_id = c->apicid >> index_msb;
break;
- case 3:
+ case 3:
new_l3 = this_leaf.size/1024;
num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
- index_msb = get_count_order(num_threads_sharing);
+ index_msb = get_count_order(
+ num_threads_sharing);
l3_id = c->apicid >> index_msb;
break;
- default:
+ default:
break;
}
}
@@ -421,22 +423,21 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
/* Number of times to iterate */
n = cpuid_eax(2) & 0xFF;
- for ( i = 0 ; i < n ; i++ ) {
+ for (i = 0 ; i < n ; i++) {
cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
/* If bit 31 is set, this is an unknown format */
- for ( j = 0 ; j < 3 ; j++ ) {
- if (regs[j] & (1 << 31)) regs[j] = 0;
- }
+ for (j = 0 ; j < 3 ; j++)
+ if (regs[j] & (1 << 31))
+ regs[j] = 0;
/* Byte 0 is level count, not a descriptor */
- for ( j = 1 ; j < 16 ; j++ ) {
+ for (j = 1 ; j < 16 ; j++) {
unsigned char des = dp[j];
unsigned char k = 0;
/* look up this descriptor in the table */
- while (cache_table[k].descriptor != 0)
- {
+ while (cache_table[k].descriptor != 0) {
if (cache_table[k].descriptor == des) {
if (only_trace && cache_table[k].cache_type != LVL_TRACE)
break;
@@ -488,14 +489,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
}
if (trace)
- printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
- else if ( l1i )
- printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
+ printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
+ else if (l1i)
+ printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
if (l1d)
- printk(", L1 D cache: %dK\n", l1d);
+ printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
else
- printk("\n");
+ printk(KERN_CONT "\n");
if (l2)
printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
@@ -558,8 +559,13 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
}
}
#else
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
-static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
+static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+{
+}
+
+static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+{
+}
#endif
static void __cpuinit free_cache_attributes(unsigned int cpu)
@@ -645,7 +651,7 @@ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
static ssize_t show_##file_name \
(struct _cpuid4_info *this_leaf, char *buf) \
{ \
- return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
+ return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
}
show_one_plus(level, eax.split.level, 0);
@@ -656,7 +662,7 @@ show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
{
- return sprintf (buf, "%luK\n", this_leaf->size / 1024);
+ return sprintf(buf, "%luK\n", this_leaf->size / 1024);
}
static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
@@ -669,7 +675,7 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
const struct cpumask *mask;
mask = to_cpumask(this_leaf->shared_cpu_map);
- n = type?
+ n = type ?
cpulist_scnprintf(buf, len-2, mask) :
cpumask_scnprintf(buf, len-2, mask);
buf[n++] = '\n';
@@ -800,7 +806,7 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);
-static struct attribute * default_attrs[] = {
+static struct attribute *default_attrs[] = {
&type.attr,
&level.attr,
&coherency_line_size.attr,
@@ -815,7 +821,7 @@ static struct attribute * default_attrs[] = {
NULL
};
-static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
struct _cache_attr *fattr = to_attr(attr);
struct _index_kobject *this_leaf = to_object(kobj);
@@ -828,8 +834,8 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
return ret;
}
-static ssize_t store(struct kobject * kobj, struct attribute * attr,
- const char * buf, size_t count)
+static ssize_t store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
{
struct _cache_attr *fattr = to_attr(attr);
struct _index_kobject *this_leaf = to_object(kobj);
@@ -883,7 +889,7 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
goto err_out;
per_cpu(index_kobject, cpu) = kzalloc(
- sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
+ sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
if (unlikely(per_cpu(index_kobject, cpu) == NULL))
goto err_out;
@@ -917,7 +923,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
}
for (i = 0; i < num_cache_leaves; i++) {
- this_object = INDEX_KOBJECT_PTR(cpu,i);
+ this_object = INDEX_KOBJECT_PTR(cpu, i);
this_object->cpu = cpu;
this_object->index = i;
retval = kobject_init_and_add(&(this_object->kobj),
@@ -925,9 +931,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
per_cpu(cache_kobject, cpu),
"index%1lu", i);
if (unlikely(retval)) {
- for (j = 0; j < i; j++) {
- kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
- }
+ for (j = 0; j < i; j++)
+ kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
kobject_put(per_cpu(cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
return retval;
@@ -952,7 +957,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
for (i = 0; i < num_cache_leaves; i++)
- kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
+ kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
kobject_put(per_cpu(cache_kobject, cpu));
cpuid4_cache_sysfs_exit(cpu);
}
@@ -977,8 +982,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
-{
+static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
.notifier_call = cacheinfo_cpu_callback,
};
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 188a1ca5ad2b..4ac6d48fe11b 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,8 @@
-obj-y = mce.o
+obj-y = mce.o mce-severity.o
-obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
-obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
-obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
deleted file mode 100644
index b945d5dbc609..000000000000
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Athlon specific Machine Check Exception Reporting
- * (C) Copyright 2002 Dave Jones <davej@redhat.com>
- */
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-
-/* Machine Check Handler For AMD Athlon/Duron: */
-static void k7_machine_check(struct pt_regs *regs, long error_code)
-{
- u32 alow, ahigh, high, low;
- u32 mcgstl, mcgsth;
- int recover = 1;
- int i;
-
- rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
- if (mcgstl & (1<<0)) /* Recoverable ? */
- recover = 0;
-
- printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
- smp_processor_id(), mcgsth, mcgstl);
-
- for (i = 1; i < nr_mce_banks; i++) {
- rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
- if (high & (1<<31)) {
- char misc[20];
- char addr[24];
-
- misc[0] = '\0';
- addr[0] = '\0';
-
- if (high & (1<<29))
- recover |= 1;
- if (high & (1<<25))
- recover |= 2;
- high &= ~(1<<31);
-
- if (high & (1<<27)) {
- rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
- }
- if (high & (1<<26)) {
- rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- snprintf(addr, 24, " at %08x%08x", ahigh, alow);
- }
-
- printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
- smp_processor_id(), i, high, low, misc, addr);
-
- /* Clear it: */
- wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
- /* Serialize: */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
- }
-
- if (recover & 2)
- panic("CPU context corrupt");
- if (recover & 1)
- panic("Unable to continue");
-
- printk(KERN_EMERG "Attempting to continue.\n");
-
- mcgstl &= ~(1<<2);
- wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-}
-
-
-/* AMD K7 machine check is Intel like: */
-void amd_mcheck_init(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- int i;
-
- if (!cpu_has(c, X86_FEATURE_MCE))
- return;
-
- machine_check_vector = k7_machine_check;
- /* Make sure the vector pointer is visible before we enable MCEs: */
- wmb();
-
- printk(KERN_INFO "Intel machine check architecture supported.\n");
-
- rdmsr(MSR_IA32_MCG_CAP, l, h);
- if (l & (1<<8)) /* Control register present ? */
- wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
- nr_mce_banks = l & 0xff;
-
- /*
- * Clear status for MC index 0 separately, we don't touch CTL,
- * as some K7 Athlons cause spurious MCEs when its enabled:
- */
- if (boot_cpu_data.x86 == 6) {
- wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
- i = 1;
- } else
- i = 0;
-
- for (; i < nr_mce_banks; i++) {
- wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
- wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
- }
-
- set_in_cr4(X86_CR4_MCE);
- printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
- smp_processor_id());
-}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 54dcb8ff12e5..6bd51e7ba87b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,3 +1,4 @@
+#include <linux/sysdev.h>
#include <asm/mce.h>
enum severity_level {
@@ -10,6 +11,19 @@ enum severity_level {
MCE_PANIC_SEVERITY,
};
+#define ATTR_LEN 16
+
+/* One object for each MCE bank, shared by all CPUs */
+struct mce_bank {
+ u64 ctl; /* subevents to enable */
+ unsigned char init; /* initialise bank? */
+ struct sysdev_attribute attr; /* sysdev attribute */
+ char attrname[ATTR_LEN]; /* attribute name */
+};
+
int mce_severity(struct mce *a, int tolerant, char **msg);
extern int mce_ser;
+
+extern struct mce_bank *mce_banks;
+
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index ff0807f97056..51f7c725dab5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -209,8 +209,6 @@ static int __init severities_debugfs_init(void)
return 0;
err_out:
- if (fseverities_coverage)
- debugfs_remove(fseverities_coverage);
if (dmce)
debugfs_remove(dmce);
return -ENOMEM;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1cfb623ce11c..97cd5b0baab4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -45,21 +45,8 @@
#include "mce-internal.h"
-/* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs *regs, long error_code)
-{
- printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
- smp_processor_id());
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) =
- unexpected_machine_check;
-
int mce_disabled __read_mostly;
-#ifdef CONFIG_X86_NEW_MCE
-
#define MISC_MCELOG_MINOR 227
#define SPINUNIT 100 /* 100ns */
@@ -77,7 +64,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
*/
static int tolerant __read_mostly = 1;
static int banks __read_mostly;
-static u64 *bank __read_mostly;
static int rip_msr __read_mostly;
static int mce_bootlog __read_mostly = -1;
static int monarch_timeout __read_mostly = -1;
@@ -87,13 +73,13 @@ int mce_cmci_disabled __read_mostly;
int mce_ignore_ce __read_mostly;
int mce_ser __read_mostly;
+struct mce_bank *mce_banks __read_mostly;
+
/* User mode helper program triggered by machine check event */
static unsigned long mce_need_notify;
static char mce_helper[128];
static char *mce_helper_argv[2] = { mce_helper, NULL };
-static unsigned long dont_init_banks;
-
static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
@@ -104,11 +90,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
};
-static inline int skip_bank_init(int i)
-{
- return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
-}
-
static DEFINE_PER_CPU(struct work_struct, mce_work);
/* Do initial initialization of a struct mce */
@@ -242,7 +223,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
/*
* Make sure only one CPU runs in machine check panic
*/
- if (atomic_add_return(1, &mce_paniced) > 1)
+ if (atomic_inc_return(&mce_paniced) > 1)
wait_for_panic();
barrier();
@@ -286,11 +267,11 @@ static int msr_to_offset(u32 msr)
unsigned bank = __get_cpu_var(injectm.bank);
if (msr == rip_msr)
return offsetof(struct mce, ip);
- if (msr == MSR_IA32_MC0_STATUS + bank*4)
+ if (msr == MSR_IA32_MCx_STATUS(bank))
return offsetof(struct mce, status);
- if (msr == MSR_IA32_MC0_ADDR + bank*4)
+ if (msr == MSR_IA32_MCx_ADDR(bank))
return offsetof(struct mce, addr);
- if (msr == MSR_IA32_MC0_MISC + bank*4)
+ if (msr == MSR_IA32_MCx_MISC(bank))
return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus);
@@ -495,7 +476,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
for (i = 0; i < banks; i++) {
- if (!bank[i] || !test_bit(i, *b))
+ if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
m.misc = 0;
@@ -504,7 +485,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.tsc = 0;
barrier();
- m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+ m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if (!(m.status & MCI_STATUS_VAL))
continue;
@@ -519,9 +500,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
continue;
if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
+ m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
+ m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
@@ -537,7 +518,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
/*
* Clear state for this bank.
*/
- mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+ mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
}
/*
@@ -558,7 +539,7 @@ static int mce_no_way_out(struct mce *m, char **msg)
int i;
for (i = 0; i < banks; i++) {
- m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+ m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
return 1;
}
@@ -705,7 +686,7 @@ static int mce_start(int *no_way_out)
* global_nwo should be updated before mce_callin
*/
smp_wmb();
- order = atomic_add_return(1, &mce_callin);
+ order = atomic_inc_return(&mce_callin);
/*
* Wait for everyone.
@@ -842,7 +823,7 @@ static void mce_clear_state(unsigned long *toclear)
for (i = 0; i < banks; i++) {
if (test_bit(i, toclear))
- mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+ mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
}
}
@@ -916,14 +897,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
order = mce_start(&no_way_out);
for (i = 0; i < banks; i++) {
__clear_bit(i, toclear);
- if (!bank[i])
+ if (!mce_banks[i].ctl)
continue;
m.misc = 0;
m.addr = 0;
m.bank = i;
- m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+ m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if ((m.status & MCI_STATUS_VAL) == 0)
continue;
@@ -964,9 +945,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
kill_it = 1;
if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
+ m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
+ m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
/*
* Action optional error. Queue address for later processing.
@@ -1159,10 +1140,25 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
+static int mce_banks_init(void)
+{
+ int i;
+
+ mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
+ if (!mce_banks)
+ return -ENOMEM;
+ for (i = 0; i < banks; i++) {
+ struct mce_bank *b = &mce_banks[i];
+ b->ctl = -1ULL;
+ b->init = 1;
+ }
+ return 0;
+}
+
/*
* Initialize Machine Checks for a CPU.
*/
-static int mce_cap_init(void)
+static int __cpuinit mce_cap_init(void)
{
unsigned b;
u64 cap;
@@ -1182,11 +1178,10 @@ static int mce_cap_init(void)
/* Don't support asymmetric configurations today */
WARN_ON(banks != 0 && b != banks);
banks = b;
- if (!bank) {
- bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
- if (!bank)
- return -ENOMEM;
- memset(bank, 0xff, banks * sizeof(u64));
+ if (!mce_banks) {
+ int err = mce_banks_init();
+ if (err)
+ return err;
}
/* Use accurate RIP reporting if available. */
@@ -1218,15 +1213,16 @@ static void mce_init(void)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
for (i = 0; i < banks; i++) {
- if (skip_bank_init(i))
+ struct mce_bank *b = &mce_banks[i];
+ if (!b->init)
continue;
- wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
- wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+ wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
+ wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
}
}
/* Add per CPU specific workarounds here */
-static void mce_cpu_quirks(struct cpuinfo_x86 *c)
+static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
{
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD) {
@@ -1236,7 +1232,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
* trips off incorrectly with the IOMMU & 3ware
* & Cerberus:
*/
- clear_bit(10, (unsigned long *)&bank[4]);
+ clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
if (c->x86 <= 17 && mce_bootlog < 0) {
/*
@@ -1250,7 +1246,7 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
* by default.
*/
if (c->x86 == 6 && banks > 0)
- bank[0] = 0;
+ mce_banks[0].ctl = 0;
}
if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1263,8 +1259,8 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
* valid event later, merely don't write CTL0.
*/
- if (c->x86 == 6 && c->x86_model < 0x1A)
- __set_bit(0, &dont_init_banks);
+ if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
+ mce_banks[0].init = 0;
/*
* All newer Intel systems support MCE broadcasting. Enable
@@ -1273,6 +1269,10 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c)
if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
monarch_timeout < 0)
monarch_timeout = USEC_PER_SEC;
+
+ /* There are also broken BIOSes on some Pentium M systems. */
+ if (c->x86 == 6 && c->x86_model == 13 && mce_bootlog < 0)
+ mce_bootlog = 0;
}
if (monarch_timeout < 0)
monarch_timeout = 0;
@@ -1324,6 +1324,17 @@ static void mce_init_timer(void)
add_timer_on(t, smp_processor_id());
}
+/* Handle unconfigured int18 (should never happen) */
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{
+ printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+ smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void (*machine_check_vector)(struct pt_regs *, long error_code) =
+ unexpected_machine_check;
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
@@ -1538,8 +1549,10 @@ static struct miscdevice mce_log_device = {
*/
static int __init mcheck_enable(char *str)
{
- if (*str == 0)
+ if (*str == 0) {
enable_p5_mce();
+ return 1;
+ }
if (*str == '=')
str++;
if (!strcmp(str, "off"))
@@ -1580,8 +1593,9 @@ static int mce_disable(void)
int i;
for (i = 0; i < banks; i++) {
- if (!skip_bank_init(i))
- wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+ struct mce_bank *b = &mce_banks[i];
+ if (b->init)
+ wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
return 0;
}
@@ -1656,14 +1670,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev);
__cpuinitdata
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
-static struct sysdev_attribute *bank_attrs;
+static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
+{
+ return container_of(attr, struct mce_bank, attr);
+}
static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
char *buf)
{
- u64 b = bank[attr - bank_attrs];
-
- return sprintf(buf, "%llx\n", b);
+ return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
}
static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1674,7 +1689,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
- bank[attr - bank_attrs] = new;
+ attr_to_bank(attr)->ctl = new;
mce_restart();
return size;
@@ -1816,7 +1831,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
}
for (j = 0; j < banks; j++) {
err = sysdev_create_file(&per_cpu(mce_dev, cpu),
- &bank_attrs[j]);
+ &mce_banks[j].attr);
if (err)
goto error2;
}
@@ -1825,10 +1840,10 @@ static __cpuinit int mce_create_device(unsigned int cpu)
return 0;
error2:
while (--j >= 0)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
error:
while (--i >= 0)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
sysdev_unregister(&per_cpu(mce_dev, cpu));
@@ -1846,7 +1861,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
for (i = 0; i < banks; i++)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
+ sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
sysdev_unregister(&per_cpu(mce_dev, cpu));
cpumask_clear_cpu(cpu, mce_dev_initialized);
@@ -1863,8 +1878,9 @@ static void mce_disable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < banks; i++) {
- if (!skip_bank_init(i))
- wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+ struct mce_bank *b = &mce_banks[i];
+ if (b->init)
+ wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
}
@@ -1879,8 +1895,9 @@ static void mce_reenable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_reenable();
for (i = 0; i < banks; i++) {
- if (!skip_bank_init(i))
- wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+ struct mce_bank *b = &mce_banks[i];
+ if (b->init)
+ wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
}
}
@@ -1928,35 +1945,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
.notifier_call = mce_cpu_callback,
};
-static __init int mce_init_banks(void)
+static __init void mce_init_banks(void)
{
int i;
- bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
- GFP_KERNEL);
- if (!bank_attrs)
- return -ENOMEM;
-
for (i = 0; i < banks; i++) {
- struct sysdev_attribute *a = &bank_attrs[i];
+ struct mce_bank *b = &mce_banks[i];
+ struct sysdev_attribute *a = &b->attr;
- a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
- if (!a->attr.name)
- goto nomem;
+ a->attr.name = b->attrname;
+ snprintf(b->attrname, ATTR_LEN, "bank%d", i);
a->attr.mode = 0644;
a->show = show_bank;
a->store = set_bank;
}
- return 0;
-
-nomem:
- while (--i >= 0)
- kfree(bank_attrs[i].attr.name);
- kfree(bank_attrs);
- bank_attrs = NULL;
-
- return -ENOMEM;
}
static __init int mce_init_device(void)
@@ -1969,9 +1972,7 @@ static __init int mce_init_device(void)
zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
- err = mce_init_banks();
- if (err)
- return err;
+ mce_init_banks();
err = sysdev_class_register(&mce_sysclass);
if (err)
@@ -1991,51 +1992,6 @@ static __init int mce_init_device(void)
device_initcall(mce_init_device);
-#else /* CONFIG_X86_OLD_MCE: */
-
-int nr_mce_banks;
-EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
-
-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
- if (mce_disabled)
- return;
-
- switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- amd_mcheck_init(c);
- break;
-
- case X86_VENDOR_INTEL:
- if (c->x86 == 5)
- intel_p5_mcheck_init(c);
- if (c->x86 == 6)
- intel_p6_mcheck_init(c);
- if (c->x86 == 15)
- intel_p4_mcheck_init(c);
- break;
-
- case X86_VENDOR_CENTAUR:
- if (c->x86 == 5)
- winchip_mcheck_init(c);
- break;
-
- default:
- break;
- }
- printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
-}
-
-static int __init mcheck_enable(char *str)
-{
- mce_p5_enabled = 1;
- return 1;
-}
-__setup("mce", mcheck_enable);
-
-#endif /* CONFIG_X86_OLD_MCE */
-
/*
* Old style boot options parsing. Only for compatibility.
*/
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index e1acec0f7a32..889f665fe93d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -90,7 +90,7 @@ static void cmci_discover(int banks, int boot)
if (test_bit(i, owned))
continue;
- rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+ rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Already owned by someone else? */
if (val & CMCI_EN) {
@@ -101,8 +101,8 @@ static void cmci_discover(int banks, int boot)
}
val |= CMCI_EN | CMCI_THRESHOLD;
- wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
- rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+ wrmsrl(MSR_IA32_MCx_CTL2(i), val);
+ rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & CMCI_EN) {
@@ -152,9 +152,9 @@ void cmci_clear(void)
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
/* Disable CMCI */
- rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+ rdmsrl(MSR_IA32_MCx_CTL2(i), val);
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
- wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
+ wrmsrl(MSR_IA32_MCx_CTL2(i), val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
spin_unlock_irqrestore(&cmci_discover_lock, flags);
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
deleted file mode 100644
index f5f2d6f71fb6..000000000000
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Non Fatal Machine Check Exception Reporting
- *
- * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
- *
- * This file contains routines to check for non-fatal MCEs every 15s
- *
- */
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-
-static int firstbank;
-
-#define MCE_RATE (15*HZ) /* timer rate is 15s */
-
-static void mce_checkregs(void *info)
-{
- u32 low, high;
- int i;
-
- for (i = firstbank; i < nr_mce_banks; i++) {
- rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
-
- if (!(high & (1<<31)))
- continue;
-
- printk(KERN_INFO "MCE: The hardware reports a non fatal, "
- "correctable incident occurred on CPU %d.\n",
- smp_processor_id());
-
- printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
-
- /*
- * Scrub the error so we don't pick it up in MCE_RATE
- * seconds time:
- */
- wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-
- /* Serialize: */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
-}
-
-static void mce_work_fn(struct work_struct *work);
-static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
-
-static void mce_work_fn(struct work_struct *work)
-{
- on_each_cpu(mce_checkregs, NULL, 1);
- schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
-}
-
-static int __init init_nonfatal_mce_checker(void)
-{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- /* Check for MCE support */
- if (!cpu_has(c, X86_FEATURE_MCE))
- return -ENODEV;
-
- /* Check for PPro style MCA */
- if (!cpu_has(c, X86_FEATURE_MCA))
- return -ENODEV;
-
- /* Some Athlons misbehave when we frob bank 0 */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 6)
- firstbank = 1;
- else
- firstbank = 0;
-
- /*
- * Check for non-fatal errors every MCE_RATE s
- */
- schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
- printk(KERN_INFO "Machine check exception polling timer started.\n");
-
- return 0;
-}
-module_init(init_nonfatal_mce_checker);
-
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
deleted file mode 100644
index 4482aea9aa2e..000000000000
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * P4 specific Machine Check Exception Reporting
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-
-/* as supported by the P4/Xeon family */
-struct intel_mce_extended_msrs {
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 esi;
- u32 edi;
- u32 ebp;
- u32 esp;
- u32 eflags;
- u32 eip;
- /* u32 *reserved[]; */
-};
-
-static int mce_num_extended_msrs;
-
-/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
-{
- u32 h;
-
- rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
- rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
- rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
- rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
- rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
- rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
- rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
- rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
- rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
- rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
-}
-
-static void intel_machine_check(struct pt_regs *regs, long error_code)
-{
- u32 alow, ahigh, high, low;
- u32 mcgstl, mcgsth;
- int recover = 1;
- int i;
-
- rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
- if (mcgstl & (1<<0)) /* Recoverable ? */
- recover = 0;
-
- printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
- smp_processor_id(), mcgsth, mcgstl);
-
- if (mce_num_extended_msrs > 0) {
- struct intel_mce_extended_msrs dbg;
-
- intel_get_extended_msrs(&dbg);
-
- printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
- "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
- "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
- smp_processor_id(), dbg.eip, dbg.eflags,
- dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
- dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
- }
-
- for (i = 0; i < nr_mce_banks; i++) {
- rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
- if (high & (1<<31)) {
- char misc[20];
- char addr[24];
-
- misc[0] = addr[0] = '\0';
- if (high & (1<<29))
- recover |= 1;
- if (high & (1<<25))
- recover |= 2;
- high &= ~(1<<31);
- if (high & (1<<27)) {
- rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
- }
- if (high & (1<<26)) {
- rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- snprintf(addr, 24, " at %08x%08x", ahigh, alow);
- }
- printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
- smp_processor_id(), i, high, low, misc, addr);
- }
- }
-
- if (recover & 2)
- panic("CPU context corrupt");
- if (recover & 1)
- panic("Unable to continue");
-
- printk(KERN_EMERG "Attempting to continue.\n");
-
- /*
- * Do not clear the MSR_IA32_MCi_STATUS if the error is not
- * recoverable/continuable.This will allow BIOS to look at the MSRs
- * for errors if the OS could not log the error.
- */
- for (i = 0; i < nr_mce_banks; i++) {
- u32 msr;
- msr = MSR_IA32_MC0_STATUS+i*4;
- rdmsr(msr, low, high);
- if (high&(1<<31)) {
- /* Clear it */
- wrmsr(msr, 0UL, 0UL);
- /* Serialize */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
- }
- mcgstl &= ~(1<<2);
- wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-}
-
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- int i;
-
- machine_check_vector = intel_machine_check;
- wmb();
-
- printk(KERN_INFO "Intel machine check architecture supported.\n");
- rdmsr(MSR_IA32_MCG_CAP, l, h);
- if (l & (1<<8)) /* Control register present ? */
- wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
- nr_mce_banks = l & 0xff;
-
- for (i = 0; i < nr_mce_banks; i++) {
- wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
- wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
- }
-
- set_in_cr4(X86_CR4_MCE);
- printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
- smp_processor_id());
-
- /* Check for P4/Xeon extended MCE MSRs */
- rdmsr(MSR_IA32_MCG_CAP, l, h);
- if (l & (1<<9)) {/* MCG_EXT_P */
- mce_num_extended_msrs = (l >> 16) & 0xff;
- printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
- " available\n",
- smp_processor_id(), mce_num_extended_msrs);
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
- /* Check for P4/Xeon Thermal monitor */
- intel_init_thermal(c);
-#endif
- }
-}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
deleted file mode 100644
index 01e4f8178183..000000000000
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * P6 specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
- */
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-
-/* Machine Check Handler For PII/PIII */
-static void intel_machine_check(struct pt_regs *regs, long error_code)
-{
- u32 alow, ahigh, high, low;
- u32 mcgstl, mcgsth;
- int recover = 1;
- int i;
-
- rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
- if (mcgstl & (1<<0)) /* Recoverable ? */
- recover = 0;
-
- printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
- smp_processor_id(), mcgsth, mcgstl);
-
- for (i = 0; i < nr_mce_banks; i++) {
- rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
- if (high & (1<<31)) {
- char misc[20];
- char addr[24];
-
- misc[0] = '\0';
- addr[0] = '\0';
-
- if (high & (1<<29))
- recover |= 1;
- if (high & (1<<25))
- recover |= 2;
- high &= ~(1<<31);
-
- if (high & (1<<27)) {
- rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
- snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
- }
- if (high & (1<<26)) {
- rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
- snprintf(addr, 24, " at %08x%08x", ahigh, alow);
- }
-
- printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
- smp_processor_id(), i, high, low, misc, addr);
- }
- }
-
- if (recover & 2)
- panic("CPU context corrupt");
- if (recover & 1)
- panic("Unable to continue");
-
- printk(KERN_EMERG "Attempting to continue.\n");
- /*
- * Do not clear the MSR_IA32_MCi_STATUS if the error is not
- * recoverable/continuable.This will allow BIOS to look at the MSRs
- * for errors if the OS could not log the error:
- */
- for (i = 0; i < nr_mce_banks; i++) {
- unsigned int msr;
-
- msr = MSR_IA32_MC0_STATUS+i*4;
- rdmsr(msr, low, high);
- if (high & (1<<31)) {
- /* Clear it: */
- wrmsr(msr, 0UL, 0UL);
- /* Serialize: */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
- }
- mcgstl &= ~(1<<2);
- wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-}
-
-/* Set up machine check reporting for processors with Intel style MCE: */
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
-{
- u32 l, h;
- int i;
-
- /* Check for MCE support */
- if (!cpu_has(c, X86_FEATURE_MCE))
- return;
-
- /* Check for PPro style MCA */
- if (!cpu_has(c, X86_FEATURE_MCA))
- return;
-
- /* Ok machine check is available */
- machine_check_vector = intel_machine_check;
- /* Make sure the vector pointer is visible before we enable MCEs: */
- wmb();
-
- printk(KERN_INFO "Intel machine check architecture supported.\n");
- rdmsr(MSR_IA32_MCG_CAP, l, h);
- if (l & (1<<8)) /* Control register present ? */
- wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
- nr_mce_banks = l & 0xff;
-
- /*
- * Following the example in IA-32 SDM Vol 3:
- * - MC0_CTL should not be written
- * - Status registers on all banks should be cleared on reset
- */
- for (i = 1; i < nr_mce_banks; i++)
- wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-
- for (i = 0; i < nr_mce_banks; i++)
- wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-
- set_in_cr4(X86_CR4_MCE);
- printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
- smp_processor_id());
-}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd191dd5..15f2bc07bb60 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -253,9 +253,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
return;
}
- if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
- tm2 = 1;
-
/* Check whether a vector already exists */
if (h & APIC_VECTOR_MASK) {
printk(KERN_DEBUG
@@ -264,6 +261,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
return;
}
+ /* early Pentium M models use different method for enabling TM2 */
+ if (cpu_has(c, X86_FEATURE_TM2)) {
+ if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
+ rdmsr(MSR_THERM2_CTL, l, h);
+ if (l & MSR_THERM2_CTL_TM_SELECT)
+ tm2 = 1;
+ } else if (l & MSR_IA32_MISC_ENABLE_TM2)
+ tm2 = 1;
+ }
+
/* We'll mask the thermal vector in the lapic till we're ready: */
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
apic_write(APIC_LVTTHMR, h);
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index ee2331b0e58f..33af14110dfd 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -7,15 +7,15 @@
static void
amd_get_mtrr(unsigned int reg, unsigned long *base,
- unsigned long *size, mtrr_type * type)
+ unsigned long *size, mtrr_type *type)
{
unsigned long low, high;
rdmsr(MSR_K6_UWCCR, low, high);
- /* Upper dword is region 1, lower is region 0 */
+ /* Upper dword is region 1, lower is region 0 */
if (reg == 1)
low = high;
- /* The base masks off on the right alignment */
+ /* The base masks off on the right alignment */
*base = (low & 0xFFFE0000) >> PAGE_SHIFT;
*type = 0;
if (low & 1)
@@ -27,74 +27,81 @@ amd_get_mtrr(unsigned int reg, unsigned long *base,
return;
}
/*
- * This needs a little explaining. The size is stored as an
- * inverted mask of bits of 128K granularity 15 bits long offset
- * 2 bits
+ * This needs a little explaining. The size is stored as an
+ * inverted mask of bits of 128K granularity 15 bits long offset
+ * 2 bits.
*
- * So to get a size we do invert the mask and add 1 to the lowest
- * mask bit (4 as its 2 bits in). This gives us a size we then shift
- * to turn into 128K blocks
+ * So to get a size we do invert the mask and add 1 to the lowest
+ * mask bit (4 as its 2 bits in). This gives us a size we then shift
+ * to turn into 128K blocks.
*
- * eg 111 1111 1111 1100 is 512K
+ * eg 111 1111 1111 1100 is 512K
*
- * invert 000 0000 0000 0011
- * +1 000 0000 0000 0100
- * *128K ...
+ * invert 000 0000 0000 0011
+ * +1 000 0000 0000 0100
+ * *128K ...
*/
low = (~low) & 0x1FFFC;
*size = (low + 4) << (15 - PAGE_SHIFT);
- return;
}
-static void amd_set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
-/* [SUMMARY] Set variable MTRR register on the local CPU.
- <reg> The register to set.
- <base> The base address of the region.
- <size> The size of the region. If this is 0 the region is disabled.
- <type> The type of the region.
- [RETURNS] Nothing.
-*/
+/**
+ * amd_set_mtrr - Set variable MTRR register on the local CPU.
+ *
+ * @reg The register to set.
+ * @base The base address of the region.
+ * @size The size of the region. If this is 0 the region is disabled.
+ * @type The type of the region.
+ *
+ * Returns nothing.
+ */
+static void
+amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
{
u32 regs[2];
/*
- * Low is MTRR0 , High MTRR 1
+ * Low is MTRR0, High MTRR 1
*/
rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
/*
- * Blank to disable
+ * Blank to disable
*/
- if (size == 0)
+ if (size == 0) {
regs[reg] = 0;
- else
- /* Set the register to the base, the type (off by one) and an
- inverted bitmask of the size The size is the only odd
- bit. We are fed say 512K We invert this and we get 111 1111
- 1111 1011 but if you subtract one and invert you get the
- desired 111 1111 1111 1100 mask
-
- But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */
+ } else {
+ /*
+ * Set the register to the base, the type (off by one) and an
+ * inverted bitmask of the size The size is the only odd
+ * bit. We are fed say 512K We invert this and we get 111 1111
+ * 1111 1011 but if you subtract one and invert you get the
+ * desired 111 1111 1111 1100 mask
+ *
+ * But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!
+ */
regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
| (base << PAGE_SHIFT) | (type + 1);
+ }
/*
- * The writeback rule is quite specific. See the manual. Its
- * disable local interrupts, write back the cache, set the mtrr
+ * The writeback rule is quite specific. See the manual. Its
+ * disable local interrupts, write back the cache, set the mtrr
*/
wbinvd();
wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
}
-static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
+static int
+amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
{
- /* Apply the K6 block alignment and size rules
- In order
- o Uncached or gathering only
- o 128K or bigger block
- o Power of 2 block
- o base suitably aligned to the power
- */
+ /*
+ * Apply the K6 block alignment and size rules
+ * In order
+ * o Uncached or gathering only
+ * o 128K or bigger block
+ * o Power of 2 block
+ * o base suitably aligned to the power
+ */
if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
|| (size & ~(size - 1)) - size || (base & (size - 1)))
return -EINVAL;
@@ -115,5 +122,3 @@ int __init amd_init_mtrr(void)
set_mtrr_ops(&amd_mtrr_ops);
return 0;
}
-
-//arch_initcall(amd_mtrr_init);
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index cb9aa3a7a7ab..de89f14eff3a 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -1,7 +1,9 @@
#include <linux/init.h>
#include <linux/mm.h>
+
#include <asm/mtrr.h>
#include <asm/msr.h>
+
#include "mtrr.h"
static struct {
@@ -12,25 +14,25 @@ static struct {
static u8 centaur_mcr_reserved;
static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */
-/*
- * Report boot time MCR setups
+/**
+ * centaur_get_free_region - Get a free MTRR.
+ *
+ * @base: The starting (base) address of the region.
+ * @size: The size (in bytes) of the region.
+ *
+ * Returns: the index of the region on success, else -1 on error.
*/
-
static int
centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
-/* [SUMMARY] Get a free MTRR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
{
- int i, max;
- mtrr_type ltype;
unsigned long lbase, lsize;
+ mtrr_type ltype;
+ int i, max;
max = num_var_ranges;
if (replace_reg >= 0 && replace_reg < max)
return replace_reg;
+
for (i = 0; i < max; ++i) {
if (centaur_mcr_reserved & (1 << i))
continue;
@@ -38,11 +40,14 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
if (lsize == 0)
return i;
}
+
return -ENOSPC;
}
-void
-mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
+/*
+ * Report boot time MCR setups
+ */
+void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
centaur_mcr[mcr].low = lo;
centaur_mcr[mcr].high = hi;
@@ -54,33 +59,35 @@ centaur_get_mcr(unsigned int reg, unsigned long *base,
{
*base = centaur_mcr[reg].high >> PAGE_SHIFT;
*size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
- *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */
+ *type = MTRR_TYPE_WRCOMB; /* write-combining */
+
if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
*type = MTRR_TYPE_UNCACHABLE;
if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
*type = MTRR_TYPE_WRBACK;
if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
*type = MTRR_TYPE_WRBACK;
-
}
-static void centaur_set_mcr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
+static void
+centaur_set_mcr(unsigned int reg, unsigned long base,
+ unsigned long size, mtrr_type type)
{
unsigned long low, high;
if (size == 0) {
- /* Disable */
+ /* Disable */
high = low = 0;
} else {
high = base << PAGE_SHIFT;
- if (centaur_mcr_type == 0)
- low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */
- else {
+ if (centaur_mcr_type == 0) {
+ /* Only support write-combining... */
+ low = -size << PAGE_SHIFT | 0x1f;
+ } else {
if (type == MTRR_TYPE_UNCACHABLE)
- low = -size << PAGE_SHIFT | 0x02; /* NC */
+ low = -size << PAGE_SHIFT | 0x02; /* NC */
else
- low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */
+ low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */
}
}
centaur_mcr[reg].high = high;
@@ -88,118 +95,16 @@ static void centaur_set_mcr(unsigned int reg, unsigned long base,
wrmsr(MSR_IDT_MCR0 + reg, low, high);
}
-#if 0
-/*
- * Initialise the later (saner) Winchip MCR variant. In this version
- * the BIOS can pass us the registers it has used (but not their values)
- * and the control register is read/write
- */
-
-static void __init
-centaur_mcr1_init(void)
-{
- unsigned i;
- u32 lo, hi;
-
- /* Unfortunately, MCR's are read-only, so there is no way to
- * find out what the bios might have done.
- */
-
- rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
- if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */
- lo &= ~0x1C0; /* clear key */
- lo |= 0x040; /* set key to 1 */
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */
- }
-
- centaur_mcr_type = 1;
-
- /*
- * Clear any unconfigured MCR's.
- */
-
- for (i = 0; i < 8; ++i) {
- if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
- if (!(lo & (1 << (9 + i))))
- wrmsr(MSR_IDT_MCR0 + i, 0, 0);
- else
- /*
- * If the BIOS set up an MCR we cannot see it
- * but we don't wish to obliterate it
- */
- centaur_mcr_reserved |= (1 << i);
- }
- }
- /*
- * Throw the main write-combining switch...
- * However if OOSTORE is enabled then people have already done far
- * cleverer things and we should behave.
- */
-
- lo |= 15; /* Write combine enables */
- wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-}
-
-/*
- * Initialise the original winchip with read only MCR registers
- * no used bitmask for the BIOS to pass on and write only control
- */
-
-static void __init
-centaur_mcr0_init(void)
-{
- unsigned i;
-
- /* Unfortunately, MCR's are read-only, so there is no way to
- * find out what the bios might have done.
- */
-
- /* Clear any unconfigured MCR's.
- * This way we are sure that the centaur_mcr array contains the actual
- * values. The disadvantage is that any BIOS tweaks are thus undone.
- *
- */
- for (i = 0; i < 8; ++i) {
- if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
- wrmsr(MSR_IDT_MCR0 + i, 0, 0);
- }
-
- wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */
-}
-
-/*
- * Initialise Winchip series MCR registers
- */
-
-static void __init
-centaur_mcr_init(void)
-{
- struct set_mtrr_context ctxt;
-
- set_mtrr_prepare_save(&ctxt);
- set_mtrr_cache_disable(&ctxt);
-
- if (boot_cpu_data.x86_model == 4)
- centaur_mcr0_init();
- else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
- centaur_mcr1_init();
-
- set_mtrr_done(&ctxt);
-}
-#endif
-
-static int centaur_validate_add_page(unsigned long base,
- unsigned long size, unsigned int type)
+static int
+centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
{
/*
- * FIXME: Winchip2 supports uncached
+ * FIXME: Winchip2 supports uncached
*/
- if (type != MTRR_TYPE_WRCOMB &&
+ if (type != MTRR_TYPE_WRCOMB &&
(centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
- printk(KERN_WARNING
- "mtrr: only write-combining%s supported\n",
- centaur_mcr_type ? " and uncacheable are"
- : " is");
+ pr_warning("mtrr: only write-combining%s supported\n",
+ centaur_mcr_type ? " and uncacheable are" : " is");
return -EINVAL;
}
return 0;
@@ -207,7 +112,6 @@ static int centaur_validate_add_page(unsigned long base,
static struct mtrr_ops centaur_mtrr_ops = {
.vendor = X86_VENDOR_CENTAUR,
-// .init = centaur_mcr_init,
.set = centaur_set_mcr,
.get = centaur_get_mcr,
.get_free_region = centaur_get_free_region,
@@ -220,5 +124,3 @@ int __init centaur_init_mtrr(void)
set_mtrr_ops(&centaur_mtrr_ops);
return 0;
}
-
-//arch_initcall(centaur_init_mtrr);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 1d584a18a50d..315738c74aad 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -1,51 +1,75 @@
-/* MTRR (Memory Type Range Register) cleanup
-
- Copyright (C) 2009 Yinghai Lu
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with this library; if not, write to the Free
- Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
+/*
+ * MTRR (Memory Type Range Register) cleanup
+ *
+ * Copyright (C) 2009 Yinghai Lu
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/cpu.h>
-#include <linux/mutex.h>
#include <linux/sort.h>
+#include <linux/mutex.h>
+#include <linux/uaccess.h>
+#include <linux/kvm_para.h>
+#include <asm/processor.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
#include <asm/msr.h>
-#include <asm/kvm_para.h>
-#include "mtrr.h"
-/* should be related to MTRR_VAR_RANGES nums */
-#define RANGE_NUM 256
+#include "mtrr.h"
struct res_range {
- unsigned long start;
- unsigned long end;
+ unsigned long start;
+ unsigned long end;
+};
+
+struct var_mtrr_range_state {
+ unsigned long base_pfn;
+ unsigned long size_pfn;
+ mtrr_type type;
+};
+
+struct var_mtrr_state {
+ unsigned long range_startk;
+ unsigned long range_sizek;
+ unsigned long chunk_sizek;
+ unsigned long gran_sizek;
+ unsigned int reg;
};
+/* Should be related to MTRR_VAR_RANGES nums */
+#define RANGE_NUM 256
+
+static struct res_range __initdata range[RANGE_NUM];
+static int __initdata nr_range;
+
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+
+static int __initdata debug_print;
+#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
+
+
static int __init
-add_range(struct res_range *range, int nr_range, unsigned long start,
- unsigned long end)
+add_range(struct res_range *range, int nr_range,
+ unsigned long start, unsigned long end)
{
- /* out of slots */
+ /* Out of slots: */
if (nr_range >= RANGE_NUM)
return nr_range;
@@ -58,12 +82,12 @@ add_range(struct res_range *range, int nr_range, unsigned long start,
}
static int __init
-add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
- unsigned long end)
+add_range_with_merge(struct res_range *range, int nr_range,
+ unsigned long start, unsigned long end)
{
int i;
- /* try to merge it with old one */
+ /* Try to merge it with old one: */
for (i = 0; i < nr_range; i++) {
unsigned long final_start, final_end;
unsigned long common_start, common_end;
@@ -84,7 +108,7 @@ add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
return nr_range;
}
- /* need to add that */
+ /* Need to add it: */
return add_range(range, nr_range, start, end);
}
@@ -117,7 +141,7 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end)
}
if (start > range[j].start && end < range[j].end) {
- /* find the new spare */
+ /* Find the new spare: */
for (i = 0; i < RANGE_NUM; i++) {
if (range[i].end == 0)
break;
@@ -146,14 +170,8 @@ static int __init cmp_range(const void *x1, const void *x2)
return start1 - start2;
}
-struct var_mtrr_range_state {
- unsigned long base_pfn;
- unsigned long size_pfn;
- mtrr_type type;
-};
-
-static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
-static int __initdata debug_print;
+#define BIOS_BUG_MSG KERN_WARNING \
+ "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
static int __init
x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
@@ -180,7 +198,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
range[i].start, range[i].end + 1);
}
- /* take out UC ranges */
+ /* Take out UC ranges: */
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
if (type != MTRR_TYPE_UNCACHABLE &&
@@ -193,9 +211,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
(mtrr_state.enabled & 1)) {
/* Var MTRR contains UC entry below 1M? Skip it: */
- printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d "
- "contains strange UC entry under 1M, check "
- "with your system vendor!\n", i);
+ printk(BIOS_BUG_MSG, i);
if (base + size <= (1<<(20-PAGE_SHIFT)))
continue;
size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -237,17 +253,13 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
return nr_range;
}
-static struct res_range __initdata range[RANGE_NUM];
-static int __initdata nr_range;
-
#ifdef CONFIG_MTRR_SANITIZER
static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
{
- unsigned long sum;
+ unsigned long sum = 0;
int i;
- sum = 0;
for (i = 0; i < nr_range; i++)
sum += range[i].end + 1 - range[i].start;
@@ -278,17 +290,9 @@ static int __init mtrr_cleanup_debug_setup(char *str)
}
early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
-struct var_mtrr_state {
- unsigned long range_startk;
- unsigned long range_sizek;
- unsigned long chunk_sizek;
- unsigned long gran_sizek;
- unsigned int reg;
-};
-
static void __init
set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
- unsigned char type, unsigned int address_bits)
+ unsigned char type, unsigned int address_bits)
{
u32 base_lo, base_hi, mask_lo, mask_hi;
u64 base, mask;
@@ -301,7 +305,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
mask = (1ULL << address_bits) - 1;
mask &= ~((((u64)sizek) << 10) - 1);
- base = ((u64)basek) << 10;
+ base = ((u64)basek) << 10;
base |= type;
mask |= 0x800;
@@ -317,15 +321,14 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
static void __init
save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
- unsigned char type)
+ unsigned char type)
{
range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
range_state[reg].type = type;
}
-static void __init
-set_var_mtrr_all(unsigned int address_bits)
+static void __init set_var_mtrr_all(unsigned int address_bits)
{
unsigned long basek, sizek;
unsigned char type;
@@ -342,11 +345,11 @@ set_var_mtrr_all(unsigned int address_bits)
static unsigned long to_size_factor(unsigned long sizek, char *factorp)
{
- char factor;
unsigned long base = sizek;
+ char factor;
if (base & ((1<<10) - 1)) {
- /* not MB alignment */
+ /* Not MB-aligned: */
factor = 'K';
} else if (base & ((1<<20) - 1)) {
factor = 'M';
@@ -372,11 +375,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
unsigned long max_align, align;
unsigned long sizek;
- /* Compute the maximum size I can make a range */
+ /* Compute the maximum size with which we can make a range: */
if (range_startk)
max_align = ffs(range_startk) - 1;
else
max_align = 32;
+
align = fls(range_sizek) - 1;
if (align > max_align)
align = max_align;
@@ -386,11 +390,10 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
char start_factor = 'K', size_factor = 'K';
unsigned long start_base, size_base;
- start_base = to_size_factor(range_startk,
- &start_factor),
- size_base = to_size_factor(sizek, &size_factor),
+ start_base = to_size_factor(range_startk, &start_factor);
+ size_base = to_size_factor(sizek, &size_factor);
- printk(KERN_DEBUG "Setting variable MTRR %d, "
+ Dprintk("Setting variable MTRR %d, "
"base: %ld%cB, range: %ld%cB, type %s\n",
reg, start_base, start_factor,
size_base, size_factor,
@@ -425,10 +428,11 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
chunk_sizek = state->chunk_sizek;
gran_sizek = state->gran_sizek;
- /* align with gran size, prevent small block used up MTRRs */
+ /* Align with gran size, prevent small block used up MTRRs: */
range_basek = ALIGN(state->range_startk, gran_sizek);
if ((range_basek > basek) && basek)
return second_sizek;
+
state->range_sizek -= (range_basek - state->range_startk);
range_sizek = ALIGN(state->range_sizek, gran_sizek);
@@ -439,22 +443,21 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
}
state->range_sizek = range_sizek;
- /* try to append some small hole */
+ /* Try to append some small hole: */
range0_basek = state->range_startk;
range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
- /* no increase */
+ /* No increase: */
if (range0_sizek == state->range_sizek) {
- if (debug_print)
- printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
- range0_basek<<10,
- (range0_basek + state->range_sizek)<<10);
+ Dprintk("rangeX: %016lx - %016lx\n",
+ range0_basek<<10,
+ (range0_basek + state->range_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range0_basek,
state->range_sizek, MTRR_TYPE_WRBACK);
return 0;
}
- /* only cut back, when it is not the last */
+ /* Only cut back when it is not the last: */
if (sizek) {
while (range0_basek + range0_sizek > (basek + sizek)) {
if (range0_sizek >= chunk_sizek)
@@ -470,16 +473,16 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
second_try:
range_basek = range0_basek + range0_sizek;
- /* one hole in the middle */
+ /* One hole in the middle: */
if (range_basek > basek && range_basek <= (basek + sizek))
second_sizek = range_basek - basek;
if (range0_sizek > state->range_sizek) {
- /* one hole in middle or at end */
+ /* One hole in middle or at the end: */
hole_sizek = range0_sizek - state->range_sizek - second_sizek;
- /* hole size should be less than half of range0 size */
+ /* Hole size should be less than half of range0 size: */
if (hole_sizek >= (range0_sizek >> 1) &&
range0_sizek >= chunk_sizek) {
range0_sizek -= chunk_sizek;
@@ -491,32 +494,30 @@ second_try:
}
if (range0_sizek) {
- if (debug_print)
- printk(KERN_DEBUG "range0: %016lx - %016lx\n",
- range0_basek<<10,
- (range0_basek + range0_sizek)<<10);
+ Dprintk("range0: %016lx - %016lx\n",
+ range0_basek<<10,
+ (range0_basek + range0_sizek)<<10);
state->reg = range_to_mtrr(state->reg, range0_basek,
range0_sizek, MTRR_TYPE_WRBACK);
}
if (range0_sizek < state->range_sizek) {
- /* need to handle left over */
+ /* Need to handle left over range: */
range_sizek = state->range_sizek - range0_sizek;
- if (debug_print)
- printk(KERN_DEBUG "range: %016lx - %016lx\n",
- range_basek<<10,
- (range_basek + range_sizek)<<10);
+ Dprintk("range: %016lx - %016lx\n",
+ range_basek<<10,
+ (range_basek + range_sizek)<<10);
+
state->reg = range_to_mtrr(state->reg, range_basek,
range_sizek, MTRR_TYPE_WRBACK);
}
if (hole_sizek) {
hole_basek = range_basek - hole_sizek - second_sizek;
- if (debug_print)
- printk(KERN_DEBUG "hole: %016lx - %016lx\n",
- hole_basek<<10,
- (hole_basek + hole_sizek)<<10);
+ Dprintk("hole: %016lx - %016lx\n",
+ hole_basek<<10,
+ (hole_basek + hole_sizek)<<10);
state->reg = range_to_mtrr(state->reg, hole_basek,
hole_sizek, MTRR_TYPE_UNCACHABLE);
}
@@ -537,23 +538,23 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
basek = base_pfn << (PAGE_SHIFT - 10);
sizek = size_pfn << (PAGE_SHIFT - 10);
- /* See if I can merge with the last range */
+ /* See if I can merge with the last range: */
if ((basek <= 1024) ||
(state->range_startk + state->range_sizek == basek)) {
unsigned long endk = basek + sizek;
state->range_sizek = endk - state->range_startk;
return;
}
- /* Write the range mtrrs */
+ /* Write the range mtrrs: */
if (state->range_sizek != 0)
second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
- /* Allocate an msr */
+ /* Allocate an msr: */
state->range_startk = basek + second_sizek;
state->range_sizek = sizek - second_sizek;
}
-/* mininum size of mtrr block that can take hole */
+/* Mininum size of mtrr block that can take hole: */
static u64 mtrr_chunk_size __initdata = (256ULL<<20);
static int __init parse_mtrr_chunk_size_opt(char *p)
@@ -565,7 +566,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p)
}
early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
-/* granity of mtrr of block */
+/* Granularity of mtrr of block: */
static u64 mtrr_gran_size __initdata;
static int __init parse_mtrr_gran_size_opt(char *p)
@@ -577,7 +578,7 @@ static int __init parse_mtrr_gran_size_opt(char *p)
}
early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
-static int nr_mtrr_spare_reg __initdata =
+static unsigned long nr_mtrr_spare_reg __initdata =
CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
static int __init parse_mtrr_spare_reg(char *arg)
@@ -586,7 +587,6 @@ static int __init parse_mtrr_spare_reg(char *arg)
nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
return 0;
}
-
early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
static int __init
@@ -594,8 +594,8 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
u64 chunk_size, u64 gran_size)
{
struct var_mtrr_state var_state;
- int i;
int num_reg;
+ int i;
var_state.range_startk = 0;
var_state.range_sizek = 0;
@@ -605,17 +605,18 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
memset(range_state, 0, sizeof(range_state));
- /* Write the range etc */
- for (i = 0; i < nr_range; i++)
+ /* Write the range: */
+ for (i = 0; i < nr_range; i++) {
set_var_mtrr_range(&var_state, range[i].start,
range[i].end - range[i].start + 1);
+ }
- /* Write the last range */
+ /* Write the last range: */
if (var_state.range_sizek != 0)
range_to_mtrr_with_hole(&var_state, 0, 0);
num_reg = var_state.reg;
- /* Clear out the extra MTRR's */
+ /* Clear out the extra MTRR's: */
while (var_state.reg < num_var_ranges) {
save_var_mtrr(var_state.reg, 0, 0, 0);
var_state.reg++;
@@ -625,11 +626,11 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
}
struct mtrr_cleanup_result {
- unsigned long gran_sizek;
- unsigned long chunk_sizek;
- unsigned long lose_cover_sizek;
- unsigned int num_reg;
- int bad;
+ unsigned long gran_sizek;
+ unsigned long chunk_sizek;
+ unsigned long lose_cover_sizek;
+ unsigned int num_reg;
+ int bad;
};
/*
@@ -645,10 +646,10 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM];
static void __init print_out_mtrr_range_state(void)
{
- int i;
char start_factor = 'K', size_factor = 'K';
unsigned long start_base, size_base;
mtrr_type type;
+ int i;
for (i = 0; i < num_var_ranges; i++) {
@@ -676,10 +677,10 @@ static int __init mtrr_need_cleanup(void)
int i;
mtrr_type type;
unsigned long size;
- /* extra one for all 0 */
+ /* Extra one for all 0: */
int num[MTRR_NUM_TYPES + 1];
- /* check entries number */
+ /* Check entries number: */
memset(num, 0, sizeof(num));
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
@@ -693,88 +694,86 @@ static int __init mtrr_need_cleanup(void)
num[type]++;
}
- /* check if we got UC entries */
+ /* Check if we got UC entries: */
if (!num[MTRR_TYPE_UNCACHABLE])
return 0;
- /* check if we only had WB and UC */
+ /* Check if we only had WB and UC */
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
- num_var_ranges - num[MTRR_NUM_TYPES])
+ num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
return 1;
}
static unsigned long __initdata range_sums;
-static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
- unsigned long extra_remove_base,
- unsigned long extra_remove_size,
- int i)
+
+static void __init
+mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
+ unsigned long x_remove_base,
+ unsigned long x_remove_size, int i)
{
- int num_reg;
static struct res_range range_new[RANGE_NUM];
- static int nr_range_new;
unsigned long range_sums_new;
+ static int nr_range_new;
+ int num_reg;
- /* convert ranges to var ranges state */
- num_reg = x86_setup_var_mtrrs(range, nr_range,
- chunk_size, gran_size);
+ /* Convert ranges to var ranges state: */
+ num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
- /* we got new setting in range_state, check it */
+ /* We got new setting in range_state, check it: */
memset(range_new, 0, sizeof(range_new));
nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
- extra_remove_base, extra_remove_size);
+ x_remove_base, x_remove_size);
range_sums_new = sum_ranges(range_new, nr_range_new);
result[i].chunk_sizek = chunk_size >> 10;
result[i].gran_sizek = gran_size >> 10;
result[i].num_reg = num_reg;
+
if (range_sums < range_sums_new) {
- result[i].lose_cover_sizek =
- (range_sums_new - range_sums) << PSHIFT;
+ result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT;
result[i].bad = 1;
- } else
- result[i].lose_cover_sizek =
- (range_sums - range_sums_new) << PSHIFT;
+ } else {
+ result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT;
+ }
- /* double check it */
+ /* Double check it: */
if (!result[i].bad && !result[i].lose_cover_sizek) {
- if (nr_range_new != nr_range ||
- memcmp(range, range_new, sizeof(range)))
- result[i].bad = 1;
+ if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range)))
+ result[i].bad = 1;
}
- if (!result[i].bad && (range_sums - range_sums_new <
- min_loss_pfn[num_reg])) {
- min_loss_pfn[num_reg] =
- range_sums - range_sums_new;
- }
+ if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg]))
+ min_loss_pfn[num_reg] = range_sums - range_sums_new;
}
static void __init mtrr_print_out_one_result(int i)
{
- char gran_factor, chunk_factor, lose_factor;
unsigned long gran_base, chunk_base, lose_base;
+ char gran_factor, chunk_factor, lose_factor;
gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
- result[i].bad ? "*BAD*" : " ",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
- result[i].num_reg, result[i].bad ? "-" : "",
- lose_base, lose_factor);
+
+ pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+ result[i].bad ? "*BAD*" : " ",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n",
+ result[i].num_reg, result[i].bad ? "-" : "",
+ lose_base, lose_factor);
}
static int __init mtrr_search_optimal_index(void)
{
- int i;
int num_reg_good;
int index_good;
+ int i;
if (nr_mtrr_spare_reg >= num_var_ranges)
nr_mtrr_spare_reg = num_var_ranges - 1;
+
num_reg_good = -1;
for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
if (!min_loss_pfn[i])
@@ -796,24 +795,24 @@ static int __init mtrr_search_optimal_index(void)
return index_good;
}
-
int __init mtrr_cleanup(unsigned address_bits)
{
- unsigned long extra_remove_base, extra_remove_size;
+ unsigned long x_remove_base, x_remove_size;
unsigned long base, size, def, dummy;
- mtrr_type type;
u64 chunk_size, gran_size;
+ mtrr_type type;
int index_good;
int i;
if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
return 0;
+
rdmsr(MSR_MTRRdefType, def, dummy);
def &= 0xff;
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
- /* get it and store it aside */
+ /* Get it and store it aside: */
memset(range_state, 0, sizeof(range_state));
for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i, &base, &size, &type);
@@ -822,29 +821,28 @@ int __init mtrr_cleanup(unsigned address_bits)
range_state[i].type = type;
}
- /* check if we need handle it and can handle it */
+ /* Check if we need handle it and can handle it: */
if (!mtrr_need_cleanup())
return 0;
- /* print original var MTRRs at first, for debugging: */
+ /* Print original var MTRRs at first, for debugging: */
printk(KERN_DEBUG "original variable MTRRs\n");
print_out_mtrr_range_state();
memset(range, 0, sizeof(range));
- extra_remove_size = 0;
- extra_remove_base = 1 << (32 - PAGE_SHIFT);
+ x_remove_size = 0;
+ x_remove_base = 1 << (32 - PAGE_SHIFT);
if (mtrr_tom2)
- extra_remove_size =
- (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
- nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
- extra_remove_size);
+ x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
+
+ nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
/*
- * [0, 1M) should always be coverred by var mtrr with WB
- * and fixed mtrrs should take effective before var mtrr for it
+ * [0, 1M) should always be covered by var mtrr with WB
+ * and fixed mtrrs should take effect before var mtrr for it:
*/
nr_range = add_range_with_merge(range, nr_range, 0,
(1ULL<<(20 - PAGE_SHIFT)) - 1);
- /* sort the ranges */
+ /* Sort the ranges: */
sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
range_sums = sum_ranges(range, nr_range);
@@ -854,7 +852,7 @@ int __init mtrr_cleanup(unsigned address_bits)
if (mtrr_chunk_size && mtrr_gran_size) {
i = 0;
mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
- extra_remove_base, extra_remove_size, i);
+ x_remove_base, x_remove_size, i);
mtrr_print_out_one_result(i);
@@ -880,7 +878,7 @@ int __init mtrr_cleanup(unsigned address_bits)
continue;
mtrr_calc_range_state(chunk_size, gran_size,
- extra_remove_base, extra_remove_size, i);
+ x_remove_base, x_remove_size, i);
if (debug_print) {
mtrr_print_out_one_result(i);
printk(KERN_INFO "\n");
@@ -890,7 +888,7 @@ int __init mtrr_cleanup(unsigned address_bits)
}
}
- /* try to find the optimal index */
+ /* Try to find the optimal index: */
index_good = mtrr_search_optimal_index();
if (index_good != -1) {
@@ -898,7 +896,7 @@ int __init mtrr_cleanup(unsigned address_bits)
i = index_good;
mtrr_print_out_one_result(i);
- /* convert ranges to var ranges state */
+ /* Convert ranges to var ranges state: */
chunk_size = result[i].chunk_sizek;
chunk_size <<= 10;
gran_size = result[i].gran_sizek;
@@ -941,8 +939,8 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
* Note this won't check if the MTRRs < 4GB where the magic bit doesn't
* apply to are wrong, but so far we don't know of any such case in the wild.
*/
-#define Tom2Enabled (1U << 21)
-#define Tom2ForceMemTypeWB (1U << 22)
+#define Tom2Enabled (1U << 21)
+#define Tom2ForceMemTypeWB (1U << 22)
int __init amd_special_default_mtrr(void)
{
@@ -952,7 +950,7 @@ int __init amd_special_default_mtrr(void)
return 0;
if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
return 0;
- /* In case some hypervisor doesn't pass SYSCFG through */
+ /* In case some hypervisor doesn't pass SYSCFG through: */
if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
return 0;
/*
@@ -965,19 +963,21 @@ int __init amd_special_default_mtrr(void)
return 0;
}
-static u64 __init real_trim_memory(unsigned long start_pfn,
- unsigned long limit_pfn)
+static u64 __init
+real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
{
u64 trim_start, trim_size;
+
trim_start = start_pfn;
trim_start <<= PAGE_SHIFT;
+
trim_size = limit_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
- return e820_update_range(trim_start, trim_size, E820_RAM,
- E820_RESERVED);
+ return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED);
}
+
/**
* mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
* @end_pfn: ending page frame number
@@ -985,7 +985,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn,
* Some buggy BIOSes don't setup the MTRRs properly for systems with certain
* memory configurations. This routine checks that the highest MTRR matches
* the end of memory, to make sure the MTRRs having a write back type cover
- * all of the memory the kernel is intending to use. If not, it'll trim any
+ * all of the memory the kernel is intending to use. If not, it'll trim any
* memory off the end by adjusting end_pfn, removing it from the kernel's
* allocation pools, warning the user with an obnoxious message.
*/
@@ -994,21 +994,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
u64 total_trim_size;
-
/* extra one for all 0 */
int num[MTRR_NUM_TYPES + 1];
+
/*
* Make sure we only trim uncachable memory on machines that
* support the Intel MTRR architecture:
*/
if (!is_cpu(INTEL) || disable_mtrr_trim)
return 0;
+
rdmsr(MSR_MTRRdefType, def, dummy);
def &= 0xff;
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
- /* get it and store it aside */
+ /* Get it and store it aside: */
memset(range_state, 0, sizeof(range_state));
for (i = 0; i < num_var_ranges; i++) {
mtrr_if->get(i, &base, &size, &type);
@@ -1017,7 +1018,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
range_state[i].type = type;
}
- /* Find highest cached pfn */
+ /* Find highest cached pfn: */
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
if (type != MTRR_TYPE_WRBACK)
@@ -1028,13 +1029,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
highest_pfn = base + size;
}
- /* kvm/qemu doesn't have mtrr set right, don't trim them all */
+ /* kvm/qemu doesn't have mtrr set right, don't trim them all: */
if (!highest_pfn) {
printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
return 0;
}
- /* check entries number */
+ /* Check entries number: */
memset(num, 0, sizeof(num));
for (i = 0; i < num_var_ranges; i++) {
type = range_state[i].type;
@@ -1046,11 +1047,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
num[type]++;
}
- /* no entry for WB? */
+ /* No entry for WB? */
if (!num[MTRR_TYPE_WRBACK])
return 0;
- /* check if we only had WB and UC */
+ /* Check if we only had WB and UC: */
if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
@@ -1066,31 +1067,31 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
}
nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
+ /* Check the head: */
total_trim_size = 0;
- /* check the head */
if (range[0].start)
total_trim_size += real_trim_memory(0, range[0].start);
- /* check the holes */
+
+ /* Check the holes: */
for (i = 0; i < nr_range - 1; i++) {
if (range[i].end + 1 < range[i+1].start)
total_trim_size += real_trim_memory(range[i].end + 1,
range[i+1].start);
}
- /* check the top */
+
+ /* Check the top: */
i = nr_range - 1;
if (range[i].end + 1 < end_pfn)
total_trim_size += real_trim_memory(range[i].end + 1,
end_pfn);
if (total_trim_size) {
- printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
- " all of memory, losing %lluMB of RAM.\n",
- total_trim_size >> 20);
+ pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
if (!changed_by_mtrr_cleanup)
WARN_ON(1);
- printk(KERN_INFO "update e820 for mtrr\n");
+ pr_info("update e820 for mtrr\n");
update_e820();
return 1;
@@ -1098,4 +1099,3 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
return 0;
}
-
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index ff14c320040c..228d982ce09c 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -1,38 +1,40 @@
#include <linux/init.h>
+#include <linux/io.h>
#include <linux/mm.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include <asm/io.h>
+
#include <asm/processor-cyrix.h>
#include <asm/processor-flags.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+
#include "mtrr.h"
static void
cyrix_get_arr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type * type)
{
- unsigned long flags;
unsigned char arr, ccr3, rcr, shift;
+ unsigned long flags;
arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
- /* Save flags and disable interrupts */
local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
- ((unsigned char *) base)[3] = getCx86(arr);
- ((unsigned char *) base)[2] = getCx86(arr + 1);
- ((unsigned char *) base)[1] = getCx86(arr + 2);
+ ((unsigned char *)base)[3] = getCx86(arr);
+ ((unsigned char *)base)[2] = getCx86(arr + 1);
+ ((unsigned char *)base)[1] = getCx86(arr + 2);
rcr = getCx86(CX86_RCR_BASE + reg);
- setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
+ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
- /* Enable interrupts if it was enabled previously */
local_irq_restore(flags);
+
shift = ((unsigned char *) base)[1] & 0x0f;
*base >>= PAGE_SHIFT;
- /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
+ /*
+ * Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
* Note: shift==0xf means 4G, this is unsupported.
*/
if (shift)
@@ -76,17 +78,20 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
}
}
+/*
+ * cyrix_get_free_region - get a free ARR.
+ *
+ * @base: the starting (base) address of the region.
+ * @size: the size (in bytes) of the region.
+ *
+ * Returns: the index of the region on success, else -1 on error.
+*/
static int
cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
-/* [SUMMARY] Get a free ARR.
- <base> The starting (base) address of the region.
- <size> The size (in bytes) of the region.
- [RETURNS] The index of the region on success, else -1 on error.
-*/
{
- int i;
- mtrr_type ltype;
unsigned long lbase, lsize;
+ mtrr_type ltype;
+ int i;
switch (replace_reg) {
case 7:
@@ -107,14 +112,17 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
cyrix_get_arr(7, &lbase, &lsize, &ltype);
if (lsize == 0)
return 7;
- /* Else try ARR0-ARR6 first */
+ /* Else try ARR0-ARR6 first */
} else {
for (i = 0; i < 7; i++) {
cyrix_get_arr(i, &lbase, &lsize, &ltype);
if (lsize == 0)
return i;
}
- /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */
+ /*
+ * ARR0-ARR6 isn't free
+ * try ARR7 but its size must be at least 256K
+ */
cyrix_get_arr(i, &lbase, &lsize, &ltype);
if ((lsize == 0) && (size >= 0x40))
return i;
@@ -122,21 +130,22 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
return -ENOSPC;
}
-static u32 cr4 = 0;
-static u32 ccr3;
+static u32 cr4, ccr3;
static void prepare_set(void)
{
u32 cr0;
/* Save value of CR4 and clear Page Global Enable (bit 7) */
- if ( cpu_has_pge ) {
+ if (cpu_has_pge) {
cr4 = read_cr4();
write_cr4(cr4 & ~X86_CR4_PGE);
}
- /* Disable and flush caches. Note that wbinvd flushes the TLBs as
- a side-effect */
+ /*
+ * Disable and flush caches.
+ * Note that wbinvd flushes the TLBs as a side-effect
+ */
cr0 = read_cr0() | X86_CR0_CD;
wbinvd();
write_cr0(cr0);
@@ -147,22 +156,21 @@ static void prepare_set(void)
/* Cyrix ARRs - everything else was excluded at the top */
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
-
}
static void post_set(void)
{
- /* Flush caches and TLBs */
+ /* Flush caches and TLBs */
wbinvd();
/* Cyrix ARRs - everything else was excluded at the top */
setCx86(CX86_CCR3, ccr3);
-
- /* Enable caches */
+
+ /* Enable caches */
write_cr0(read_cr0() & 0xbfffffff);
- /* Restore value of CR4 */
- if ( cpu_has_pge )
+ /* Restore value of CR4 */
+ if (cpu_has_pge)
write_cr4(cr4);
}
@@ -178,7 +186,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
size >>= 6;
size &= 0x7fff; /* make sure arr_size <= 14 */
- for (arr_size = 0; size; arr_size++, size >>= 1) ;
+ for (arr_size = 0; size; arr_size++, size >>= 1)
+ ;
if (reg < 7) {
switch (type) {
@@ -215,18 +224,18 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
prepare_set();
base <<= PAGE_SHIFT;
- setCx86(arr, ((unsigned char *) &base)[3]);
- setCx86(arr + 1, ((unsigned char *) &base)[2]);
- setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size);
+ setCx86(arr + 0, ((unsigned char *)&base)[3]);
+ setCx86(arr + 1, ((unsigned char *)&base)[2]);
+ setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size);
setCx86(CX86_RCR_BASE + reg, arr_type);
post_set();
}
typedef struct {
- unsigned long base;
- unsigned long size;
- mtrr_type type;
+ unsigned long base;
+ unsigned long size;
+ mtrr_type type;
} arr_state_t;
static arr_state_t arr_state[8] = {
@@ -247,16 +256,17 @@ static void cyrix_set_all(void)
setCx86(CX86_CCR0 + i, ccr_state[i]);
for (; i < 7; i++)
setCx86(CX86_CCR4 + i, ccr_state[i]);
- for (i = 0; i < 8; i++)
- cyrix_set_arr(i, arr_state[i].base,
+
+ for (i = 0; i < 8; i++) {
+ cyrix_set_arr(i, arr_state[i].base,
arr_state[i].size, arr_state[i].type);
+ }
post_set();
}
static struct mtrr_ops cyrix_mtrr_ops = {
.vendor = X86_VENDOR_CYRIX,
-// .init = cyrix_arr_init,
.set_all = cyrix_set_all,
.set = cyrix_set_arr,
.get = cyrix_get_arr,
@@ -270,5 +280,3 @@ int __init cyrix_init_mtrr(void)
set_mtrr_ops(&cyrix_mtrr_ops);
return 0;
}
-
-//arch_initcall(cyrix_init_mtrr);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0543f69f0b27..55da0c5f68dd 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -1,28 +1,34 @@
-/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
- because MTRRs can span upto 40 bits (36bits on most modern x86) */
+/*
+ * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
+ * because MTRRs can span upto 40 bits (36bits on most modern x86)
+ */
+#define DEBUG
+
+#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/io.h>
#include <linux/mm.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include <asm/system.h>
-#include <asm/cpufeature.h>
+
#include <asm/processor-flags.h>
+#include <asm/cpufeature.h>
#include <asm/tlbflush.h>
+#include <asm/system.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
#include <asm/pat.h>
+
#include "mtrr.h"
struct fixed_range_block {
- int base_msr; /* start address of an MTRR block */
- int ranges; /* number of MTRRs in this block */
+ int base_msr; /* start address of an MTRR block */
+ int ranges; /* number of MTRRs in this block */
};
static struct fixed_range_block fixed_range_blocks[] = {
- { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */
- { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */
- { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */
+ { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */
+ { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */
+ { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */
{}
};
@@ -30,10 +36,10 @@ static unsigned long smp_changes_mask;
static int mtrr_state_set;
u64 mtrr_tom2;
-struct mtrr_state_type mtrr_state = {};
+struct mtrr_state_type mtrr_state;
EXPORT_SYMBOL_GPL(mtrr_state);
-/**
+/*
* BIOS is expected to clear MtrrFixDramModEn bit, see for example
* "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
* Opteron Processors" (26094 Rev. 3.30 February 2006), section
@@ -104,9 +110,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
* Look of multiple ranges matching this address and pick type
* as per MTRR precedence
*/
- if (!(mtrr_state.enabled & 2)) {
+ if (!(mtrr_state.enabled & 2))
return mtrr_state.def_type;
- }
prev_match = 0xFF;
for (i = 0; i < num_var_ranges; ++i) {
@@ -125,9 +130,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
if (start_state != end_state)
return 0xFE;
- if ((start & mask) != (base & mask)) {
+ if ((start & mask) != (base & mask))
continue;
- }
curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
if (prev_match == 0xFF) {
@@ -148,9 +152,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
curr_match = MTRR_TYPE_WRTHROUGH;
}
- if (prev_match != curr_match) {
+ if (prev_match != curr_match)
return MTRR_TYPE_UNCACHABLE;
- }
}
if (mtrr_tom2) {
@@ -164,7 +167,7 @@ u8 mtrr_type_lookup(u64 start, u64 end)
return mtrr_state.def_type;
}
-/* Get the MSR pair relating to a var range */
+/* Get the MSR pair relating to a var range */
static void
get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
{
@@ -172,7 +175,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
-/* fill the MSR pair relating to a var range */
+/* Fill the MSR pair relating to a var range */
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
{
@@ -186,10 +189,9 @@ void fill_mtrr_var_range(unsigned int index,
vr[index].mask_hi = mask_hi;
}
-static void
-get_fixed_ranges(mtrr_type * frs)
+static void get_fixed_ranges(mtrr_type *frs)
{
- unsigned int *p = (unsigned int *) frs;
+ unsigned int *p = (unsigned int *)frs;
int i;
k8_check_syscfg_dram_mod_en();
@@ -217,22 +219,22 @@ static void __init print_fixed_last(void)
if (!last_fixed_end)
return;
- printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start,
- last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
+ pr_debug(" %05X-%05X %s\n", last_fixed_start,
+ last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
last_fixed_end = 0;
}
static void __init update_fixed_last(unsigned base, unsigned end,
- mtrr_type type)
+ mtrr_type type)
{
last_fixed_start = base;
last_fixed_end = end;
last_fixed_type = type;
}
-static void __init print_fixed(unsigned base, unsigned step,
- const mtrr_type *types)
+static void __init
+print_fixed(unsigned base, unsigned step, const mtrr_type *types)
{
unsigned i;
@@ -259,54 +261,55 @@ static void __init print_mtrr_state(void)
unsigned int i;
int high_width;
- printk(KERN_DEBUG "MTRR default type: %s\n",
- mtrr_attrib_to_str(mtrr_state.def_type));
+ pr_debug("MTRR default type: %s\n",
+ mtrr_attrib_to_str(mtrr_state.def_type));
if (mtrr_state.have_fixed) {
- printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n",
- mtrr_state.enabled & 1 ? "en" : "dis");
+ pr_debug("MTRR fixed ranges %sabled:\n",
+ mtrr_state.enabled & 1 ? "en" : "dis");
print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
for (i = 0; i < 2; ++i)
- print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
+ print_fixed(0x80000 + i * 0x20000, 0x04000,
+ mtrr_state.fixed_ranges + (i + 1) * 8);
for (i = 0; i < 8; ++i)
- print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
+ print_fixed(0xC0000 + i * 0x08000, 0x01000,
+ mtrr_state.fixed_ranges + (i + 3) * 8);
/* tail */
print_fixed_last();
}
- printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
- mtrr_state.enabled & 2 ? "en" : "dis");
+ pr_debug("MTRR variable ranges %sabled:\n",
+ mtrr_state.enabled & 2 ? "en" : "dis");
if (size_or_mask & 0xffffffffUL)
high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
else
high_width = ffs(size_or_mask>>32) + 32 - 1;
high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
+
for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
- printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n",
- i,
- high_width,
- mtrr_state.var_ranges[i].base_hi,
- mtrr_state.var_ranges[i].base_lo >> 12,
- high_width,
- mtrr_state.var_ranges[i].mask_hi,
- mtrr_state.var_ranges[i].mask_lo >> 12,
- mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
+ pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n",
+ i,
+ high_width,
+ mtrr_state.var_ranges[i].base_hi,
+ mtrr_state.var_ranges[i].base_lo >> 12,
+ high_width,
+ mtrr_state.var_ranges[i].mask_hi,
+ mtrr_state.var_ranges[i].mask_lo >> 12,
+ mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
else
- printk(KERN_DEBUG " %u disabled\n", i);
- }
- if (mtrr_tom2) {
- printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
- mtrr_tom2, mtrr_tom2>>20);
+ pr_debug(" %u disabled\n", i);
}
+ if (mtrr_tom2)
+ pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}
-/* Grab all of the MTRR state for this CPU into *state */
+/* Grab all of the MTRR state for this CPU into *state */
void __init get_mtrr_state(void)
{
- unsigned int i;
struct mtrr_var_range *vrs;
- unsigned lo, dummy;
unsigned long flags;
+ unsigned lo, dummy;
+ unsigned int i;
vrs = mtrr_state.var_ranges;
@@ -324,6 +327,7 @@ void __init get_mtrr_state(void)
if (amd_special_default_mtrr()) {
unsigned low, high;
+
/* TOP_MEM2 */
rdmsr(MSR_K8_TOP_MEM2, low, high);
mtrr_tom2 = high;
@@ -344,10 +348,9 @@ void __init get_mtrr_state(void)
post_set();
local_irq_restore(flags);
-
}
-/* Some BIOS's are fucked and don't set all MTRRs the same! */
+/* Some BIOS's are messed up and don't set all MTRRs the same! */
void __init mtrr_state_warn(void)
{
unsigned long mask = smp_changes_mask;
@@ -355,28 +358,33 @@ void __init mtrr_state_warn(void)
if (!mask)
return;
if (mask & MTRR_CHANGE_MASK_FIXED)
- printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+ pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
if (mask & MTRR_CHANGE_MASK_VARIABLE)
- printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n");
+ pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
if (mask & MTRR_CHANGE_MASK_DEFTYPE)
- printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+ pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+
printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
printk(KERN_INFO "mtrr: corrected configuration.\n");
}
-/* Doesn't attempt to pass an error out to MTRR users
- because it's quite complicated in some cases and probably not
- worth it because the best error handling is to ignore it. */
+/*
+ * Doesn't attempt to pass an error out to MTRR users
+ * because it's quite complicated in some cases and probably not
+ * worth it because the best error handling is to ignore it.
+ */
void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
{
- if (wrmsr_safe(msr, a, b) < 0)
+ if (wrmsr_safe(msr, a, b) < 0) {
printk(KERN_ERR
"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
smp_processor_id(), msr, a, b);
+ }
}
/**
- * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
+ * set_fixed_range - checks & updates a fixed-range MTRR if it
+ * differs from the value it should have
* @msr: MSR address of the MTTR which should be checked and updated
* @changed: pointer which indicates whether the MTRR needed to be changed
* @msrwords: pointer to the MSR values which the MSR should have
@@ -401,20 +409,23 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
*
* Returns: The index of the region on success, else negative on error.
*/
-int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
+int
+generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
{
- int i, max;
- mtrr_type ltype;
unsigned long lbase, lsize;
+ mtrr_type ltype;
+ int i, max;
max = num_var_ranges;
if (replace_reg >= 0 && replace_reg < max)
return replace_reg;
+
for (i = 0; i < max; ++i) {
mtrr_if->get(i, &lbase, &lsize, &ltype);
if (lsize == 0)
return i;
}
+
return -ENOSPC;
}
@@ -434,7 +445,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
if ((mask_lo & 0x800) == 0) {
- /* Invalid (i.e. free) range */
+ /* Invalid (i.e. free) range */
*base = 0;
*size = 0;
*type = 0;
@@ -471,27 +482,31 @@ out_put_cpu:
}
/**
- * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set
+ * set_fixed_ranges - checks & updates the fixed-range MTRRs if they
+ * differ from the saved set
* @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
*/
-static int set_fixed_ranges(mtrr_type * frs)
+static int set_fixed_ranges(mtrr_type *frs)
{
- unsigned long long *saved = (unsigned long long *) frs;
+ unsigned long long *saved = (unsigned long long *)frs;
bool changed = false;
- int block=-1, range;
+ int block = -1, range;
k8_check_syscfg_dram_mod_en();
- while (fixed_range_blocks[++block].ranges)
- for (range=0; range < fixed_range_blocks[block].ranges; range++)
- set_fixed_range(fixed_range_blocks[block].base_msr + range,
- &changed, (unsigned int *) saved++);
+ while (fixed_range_blocks[++block].ranges) {
+ for (range = 0; range < fixed_range_blocks[block].ranges; range++)
+ set_fixed_range(fixed_range_blocks[block].base_msr + range,
+ &changed, (unsigned int *)saved++);
+ }
return changed;
}
-/* Set the MSR pair relating to a var range. Returns TRUE if
- changes are made */
+/*
+ * Set the MSR pair relating to a var range.
+ * Returns true if changes are made.
+ */
static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
{
unsigned int lo, hi;
@@ -501,6 +516,7 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
|| (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
(hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
+
mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
changed = true;
}
@@ -526,21 +542,26 @@ static u32 deftype_lo, deftype_hi;
*/
static unsigned long set_mtrr_state(void)
{
- unsigned int i;
unsigned long change_mask = 0;
+ unsigned int i;
- for (i = 0; i < num_var_ranges; i++)
+ for (i = 0; i < num_var_ranges; i++) {
if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
change_mask |= MTRR_CHANGE_MASK_VARIABLE;
+ }
if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges))
change_mask |= MTRR_CHANGE_MASK_FIXED;
- /* Set_mtrr_restore restores the old value of MTRRdefType,
- so to set it we fiddle with the saved value */
+ /*
+ * Set_mtrr_restore restores the old value of MTRRdefType,
+ * so to set it we fiddle with the saved value:
+ */
if ((deftype_lo & 0xff) != mtrr_state.def_type
|| ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
- deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10);
+
+ deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type |
+ (mtrr_state.enabled << 10);
change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
}
@@ -548,33 +569,36 @@ static unsigned long set_mtrr_state(void)
}
-static unsigned long cr4 = 0;
+static unsigned long cr4;
static DEFINE_SPINLOCK(set_atomicity_lock);
/*
- * Since we are disabling the cache don't allow any interrupts - they
- * would run extremely slow and would only increase the pain. The caller must
- * ensure that local interrupts are disabled and are reenabled after post_set()
- * has been called.
+ * Since we are disabling the cache don't allow any interrupts,
+ * they would run extremely slow and would only increase the pain.
+ *
+ * The caller must ensure that local interrupts are disabled and
+ * are reenabled after post_set() has been called.
*/
-
static void prepare_set(void) __acquires(set_atomicity_lock)
{
unsigned long cr0;
- /* Note that this is not ideal, since the cache is only flushed/disabled
- for this CPU while the MTRRs are changed, but changing this requires
- more invasive changes to the way the kernel boots */
+ /*
+ * Note that this is not ideal
+ * since the cache is only flushed/disabled for this CPU while the
+ * MTRRs are changed, but changing this requires more invasive
+ * changes to the way the kernel boots
+ */
spin_lock(&set_atomicity_lock);
- /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
+ /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
cr0 = read_cr0() | X86_CR0_CD;
write_cr0(cr0);
wbinvd();
- /* Save value of CR4 and clear Page Global Enable (bit 7) */
- if ( cpu_has_pge ) {
+ /* Save value of CR4 and clear Page Global Enable (bit 7) */
+ if (cpu_has_pge) {
cr4 = read_cr4();
write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -582,26 +606,26 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
__flush_tlb();
- /* Save MTRR state */
+ /* Save MTRR state */
rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
- /* Disable MTRRs, and set the default type to uncached */
+ /* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
}
static void post_set(void) __releases(set_atomicity_lock)
{
- /* Flush TLBs (no need to flush caches - they are disabled) */
+ /* Flush TLBs (no need to flush caches - they are disabled) */
__flush_tlb();
/* Intel (P6) standard MTRRs */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
-
- /* Enable caches */
+
+ /* Enable caches */
write_cr0(read_cr0() & 0xbfffffff);
- /* Restore value of CR4 */
- if ( cpu_has_pge )
+ /* Restore value of CR4 */
+ if (cpu_has_pge)
write_cr4(cr4);
spin_unlock(&set_atomicity_lock);
}
@@ -623,24 +647,27 @@ static void generic_set_all(void)
post_set();
local_irq_restore(flags);
- /* Use the atomic bitops to update the global mask */
+ /* Use the atomic bitops to update the global mask */
for (count = 0; count < sizeof mask * 8; ++count) {
if (mask & 0x01)
set_bit(count, &smp_changes_mask);
mask >>= 1;
}
-
+
}
+/**
+ * generic_set_mtrr - set variable MTRR register on the local CPU.
+ *
+ * @reg: The register to set.
+ * @base: The base address of the region.
+ * @size: The size of the region. If this is 0 the region is disabled.
+ * @type: The type of the region.
+ *
+ * Returns nothing.
+ */
static void generic_set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type)
-/* [SUMMARY] Set variable MTRR register on the local CPU.
- <reg> The register to set.
- <base> The base address of the region.
- <size> The size of the region. If this is 0 the region is disabled.
- <type> The type of the region.
- [RETURNS] Nothing.
-*/
{
unsigned long flags;
struct mtrr_var_range *vr;
@@ -651,8 +678,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
prepare_set();
if (size == 0) {
- /* The invalid bit is kept in the mask, so we simply clear the
- relevant mask register to disable a range. */
+ /*
+ * The invalid bit is kept in the mask, so we simply
+ * clear the relevant mask register to disable a range.
+ */
mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
memset(vr, 0, sizeof(struct mtrr_var_range));
} else {
@@ -669,46 +698,50 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
local_irq_restore(flags);
}
-int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
+int generic_validate_add_page(unsigned long base, unsigned long size,
+ unsigned int type)
{
unsigned long lbase, last;
- /* For Intel PPro stepping <= 7, must be 4 MiB aligned
- and not touch 0x70000000->0x7003FFFF */
+ /*
+ * For Intel PPro stepping <= 7
+ * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF
+ */
if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_mask <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
- printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+ pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
return -EINVAL;
}
if (!(base + size < 0x70000 || base > 0x7003F) &&
(type == MTRR_TYPE_WRCOMB
|| type == MTRR_TYPE_WRBACK)) {
- printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+ pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
return -EINVAL;
}
}
- /* Check upper bits of base and last are equal and lower bits are 0
- for base and 1 for last */
+ /*
+ * Check upper bits of base and last are equal and lower bits are 0
+ * for base and 1 for last
+ */
last = base + size - 1;
for (lbase = base; !(lbase & 1) && (last & 1);
- lbase = lbase >> 1, last = last >> 1) ;
+ lbase = lbase >> 1, last = last >> 1)
+ ;
if (lbase != last) {
- printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n",
- base, size);
+ pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
return -EINVAL;
}
return 0;
}
-
static int generic_have_wrcomb(void)
{
unsigned long config, dummy;
rdmsr(MSR_MTRRcap, config, dummy);
- return (config & (1 << 10));
+ return config & (1 << 10);
}
int positive_have_wrcomb(void)
@@ -716,14 +749,15 @@ int positive_have_wrcomb(void)
return 1;
}
-/* generic structure...
+/*
+ * Generic structure...
*/
struct mtrr_ops generic_mtrr_ops = {
- .use_intel_if = 1,
- .set_all = generic_set_all,
- .get = generic_get_mtrr,
- .get_free_region = generic_get_free_region,
- .set = generic_set_mtrr,
- .validate_add_page = generic_validate_add_page,
- .have_wrcomb = generic_have_wrcomb,
+ .use_intel_if = 1,
+ .set_all = generic_set_all,
+ .get = generic_get_mtrr,
+ .get_free_region = generic_get_free_region,
+ .set = generic_set_mtrr,
+ .validate_add_page = generic_validate_add_page,
+ .have_wrcomb = generic_have_wrcomb,
};
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index fb73a52913a4..08b6ea4c62b4 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -1,27 +1,28 @@
-#include <linux/init.h>
-#include <linux/proc_fs.h>
#include <linux/capability.h>
-#include <linux/ctype.h>
-#include <linux/module.h>
#include <linux/seq_file.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
#define LINE_SIZE 80
#include <asm/mtrr.h>
+
#include "mtrr.h"
#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
static const char *const mtrr_strings[MTRR_NUM_TYPES] =
{
- "uncachable", /* 0 */
- "write-combining", /* 1 */
- "?", /* 2 */
- "?", /* 3 */
- "write-through", /* 4 */
- "write-protect", /* 5 */
- "write-back", /* 6 */
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
};
const char *mtrr_attrib_to_str(int x)
@@ -35,8 +36,8 @@ static int
mtrr_file_add(unsigned long base, unsigned long size,
unsigned int type, bool increment, struct file *file, int page)
{
+ unsigned int *fcount = FILE_FCOUNT(file);
int reg, max;
- unsigned int *fcount = FILE_FCOUNT(file);
max = num_var_ranges;
if (fcount == NULL) {
@@ -61,8 +62,8 @@ static int
mtrr_file_del(unsigned long base, unsigned long size,
struct file *file, int page)
{
- int reg;
unsigned int *fcount = FILE_FCOUNT(file);
+ int reg;
if (!page) {
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
@@ -81,13 +82,14 @@ mtrr_file_del(unsigned long base, unsigned long size,
return reg;
}
-/* RED-PEN: seq_file can seek now. this is ignored. */
+/*
+ * seq_file can seek but we ignore it.
+ *
+ * Format of control line:
+ * "base=%Lx size=%Lx type=%s" or "disable=%d"
+ */
static ssize_t
mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
-/* Format of control line:
- "base=%Lx size=%Lx type=%s" OR:
- "disable=%d"
-*/
{
int i, err;
unsigned long reg;
@@ -100,15 +102,18 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
return -EPERM;
if (!len)
return -EINVAL;
+
memset(line, 0, LINE_SIZE);
if (len > LINE_SIZE)
len = LINE_SIZE;
if (copy_from_user(line, buf, len - 1))
return -EFAULT;
+
linelen = strlen(line);
ptr = line + linelen - 1;
if (linelen && *ptr == '\n')
*ptr = '\0';
+
if (!strncmp(line, "disable=", 8)) {
reg = simple_strtoul(line + 8, &ptr, 0);
err = mtrr_del_page(reg, 0, 0);
@@ -116,28 +121,35 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
return err;
return len;
}
+
if (strncmp(line, "base=", 5))
return -EINVAL;
+
base = simple_strtoull(line + 5, &ptr, 0);
- for (; isspace(*ptr); ++ptr) ;
+ for (; isspace(*ptr); ++ptr)
+ ;
+
if (strncmp(ptr, "size=", 5))
return -EINVAL;
+
size = simple_strtoull(ptr + 5, &ptr, 0);
if ((base & 0xfff) || (size & 0xfff))
return -EINVAL;
- for (; isspace(*ptr); ++ptr) ;
+ for (; isspace(*ptr); ++ptr)
+ ;
+
if (strncmp(ptr, "type=", 5))
return -EINVAL;
ptr += 5;
- for (; isspace(*ptr); ++ptr) ;
+ for (; isspace(*ptr); ++ptr)
+ ;
+
for (i = 0; i < MTRR_NUM_TYPES; ++i) {
if (strcmp(ptr, mtrr_strings[i]))
continue;
base >>= PAGE_SHIFT;
size >>= PAGE_SHIFT;
- err =
- mtrr_add_page((unsigned long) base, (unsigned long) size, i,
- true);
+ err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true);
if (err < 0)
return err;
return len;
@@ -181,7 +193,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
case MTRRIOC32_SET_PAGE_ENTRY:
case MTRRIOC32_DEL_PAGE_ENTRY:
case MTRRIOC32_KILL_PAGE_ENTRY: {
- struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg;
+ struct mtrr_sentry32 __user *s32;
+
+ s32 = (struct mtrr_sentry32 __user *)__arg;
err = get_user(sentry.base, &s32->base);
err |= get_user(sentry.size, &s32->size);
err |= get_user(sentry.type, &s32->type);
@@ -191,7 +205,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
}
case MTRRIOC32_GET_ENTRY:
case MTRRIOC32_GET_PAGE_ENTRY: {
- struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
+ struct mtrr_gentry32 __user *g32;
+
+ g32 = (struct mtrr_gentry32 __user *)__arg;
err = get_user(gentry.regnum, &g32->regnum);
err |= get_user(gentry.base, &g32->base);
err |= get_user(gentry.size, &g32->size);
@@ -314,7 +330,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
if (err)
return err;
- switch(cmd) {
+ switch (cmd) {
case MTRRIOC_GET_ENTRY:
case MTRRIOC_GET_PAGE_ENTRY:
if (copy_to_user(arg, &gentry, sizeof gentry))
@@ -323,7 +339,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
#ifdef CONFIG_COMPAT
case MTRRIOC32_GET_ENTRY:
case MTRRIOC32_GET_PAGE_ENTRY: {
- struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg;
+ struct mtrr_gentry32 __user *g32;
+
+ g32 = (struct mtrr_gentry32 __user *)__arg;
err = put_user(gentry.base, &g32->base);
err |= put_user(gentry.size, &g32->size);
err |= put_user(gentry.regnum, &g32->regnum);
@@ -335,11 +353,10 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
return err;
}
-static int
-mtrr_close(struct inode *ino, struct file *file)
+static int mtrr_close(struct inode *ino, struct file *file)
{
- int i, max;
unsigned int *fcount = FILE_FCOUNT(file);
+ int i, max;
if (fcount != NULL) {
max = num_var_ranges;
@@ -359,22 +376,22 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset);
static int mtrr_open(struct inode *inode, struct file *file)
{
- if (!mtrr_if)
+ if (!mtrr_if)
return -EIO;
- if (!mtrr_if->get)
- return -ENXIO;
+ if (!mtrr_if->get)
+ return -ENXIO;
return single_open(file, mtrr_seq_show, NULL);
}
static const struct file_operations mtrr_fops = {
- .owner = THIS_MODULE,
- .open = mtrr_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = mtrr_write,
- .unlocked_ioctl = mtrr_ioctl,
- .compat_ioctl = mtrr_ioctl,
- .release = mtrr_close,
+ .owner = THIS_MODULE,
+ .open = mtrr_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = mtrr_write,
+ .unlocked_ioctl = mtrr_ioctl,
+ .compat_ioctl = mtrr_ioctl,
+ .release = mtrr_close,
};
static int mtrr_seq_show(struct seq_file *seq, void *offset)
@@ -388,23 +405,24 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
max = num_var_ranges;
for (i = 0; i < max; i++) {
mtrr_if->get(i, &base, &size, &type);
- if (size == 0)
+ if (size == 0) {
mtrr_usage_table[i] = 0;
- else {
- if (size < (0x100000 >> PAGE_SHIFT)) {
- /* less than 1MB */
- factor = 'K';
- size <<= PAGE_SHIFT - 10;
- } else {
- factor = 'M';
- size >>= 20 - PAGE_SHIFT;
- }
- /* RED-PEN: base can be > 32bit */
- len += seq_printf(seq,
- "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
- i, base, base >> (20 - PAGE_SHIFT), size, factor,
- mtrr_usage_table[i], mtrr_attrib_to_str(type));
+ continue;
}
+ if (size < (0x100000 >> PAGE_SHIFT)) {
+ /* less than 1MB */
+ factor = 'K';
+ size <<= PAGE_SHIFT - 10;
+ } else {
+ factor = 'M';
+ size >>= 20 - PAGE_SHIFT;
+ }
+ /* Base can be > 32bit */
+ len += seq_printf(seq, "reg%02i: base=0x%06lx000 "
+ "(%5luMB), size=%5lu%cB, count=%d: %s\n",
+ i, base, base >> (20 - PAGE_SHIFT), size,
+ factor, mtrr_usage_table[i],
+ mtrr_attrib_to_str(type));
}
return 0;
}
@@ -422,6 +440,5 @@ static int __init mtrr_if_init(void)
proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
return 0;
}
-
arch_initcall(mtrr_if_init);
#endif /* CONFIG_PROC_FS */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 8fc248b5aeaf..7af0f88a4163 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -25,43 +25,48 @@
Operating System Writer's Guide" (Intel document number 242692),
section 11.11.7
- This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
- on 6-7 March 2002.
- Source: Intel Architecture Software Developers Manual, Volume 3:
+ This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
+ on 6-7 March 2002.
+ Source: Intel Architecture Software Developers Manual, Volume 3:
System Programming Guide; Section 9.11. (1997 edition - PPro).
*/
+#define DEBUG
+
+#include <linux/types.h> /* FIXME: kvm_para.h needs this */
+
+#include <linux/kvm_para.h>
+#include <linux/uaccess.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/init.h>
+#include <linux/sort.h>
+#include <linux/cpu.h>
#include <linux/pci.h>
#include <linux/smp.h>
-#include <linux/cpu.h>
-#include <linux/mutex.h>
-#include <linux/sort.h>
+#include <asm/processor.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
#include <asm/msr.h>
-#include <asm/kvm_para.h>
+
#include "mtrr.h"
-u32 num_var_ranges = 0;
+u32 num_var_ranges;
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
-static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {};
+static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
-struct mtrr_ops * mtrr_if = NULL;
+struct mtrr_ops *mtrr_if;
static void set_mtrr(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
-void set_mtrr_ops(struct mtrr_ops * ops)
+void set_mtrr_ops(struct mtrr_ops *ops)
{
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
mtrr_ops[ops->vendor] = ops;
@@ -72,30 +77,36 @@ static int have_wrcomb(void)
{
struct pci_dev *dev;
u8 rev;
-
- if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
- /* ServerWorks LE chipsets < rev 6 have problems with write-combining
- Don't allow it and leave room for other chipsets to be tagged */
+
+ dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
+ if (dev != NULL) {
+ /*
+ * ServerWorks LE chipsets < rev 6 have problems with
+ * write-combining. Don't allow it and leave room for other
+ * chipsets to be tagged
+ */
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
if (rev <= 5) {
- printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
+ pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
}
- /* Intel 450NX errata # 23. Non ascending cacheline evictions to
- write combining memory may resulting in data corruption */
+ /*
+ * Intel 450NX errata # 23. Non ascending cacheline evictions to
+ * write combining memory may resulting in data corruption
+ */
if (dev->vendor == PCI_VENDOR_ID_INTEL &&
dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
- printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
+ pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
pci_dev_put(dev);
- }
- return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
+ }
+ return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
}
/* This function returns the number of variable MTRRs */
@@ -103,12 +114,13 @@ static void __init set_num_var_ranges(void)
{
unsigned long config = 0, dummy;
- if (use_intel()) {
+ if (use_intel())
rdmsr(MSR_MTRRcap, config, dummy);
- } else if (is_cpu(AMD))
+ else if (is_cpu(AMD))
config = 2;
else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
config = 8;
+
num_var_ranges = config & 0xff;
}
@@ -130,10 +142,12 @@ struct set_mtrr_data {
mtrr_type smp_type;
};
+/**
+ * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
+ *
+ * Returns nothing.
+ */
static void ipi_handler(void *info)
-/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
- [RETURNS] Nothing.
-*/
{
#ifdef CONFIG_SMP
struct set_mtrr_data *data = info;
@@ -142,18 +156,19 @@ static void ipi_handler(void *info)
local_irq_save(flags);
atomic_dec(&data->count);
- while(!atomic_read(&data->gate))
+ while (!atomic_read(&data->gate))
cpu_relax();
/* The master has cleared me to execute */
- if (data->smp_reg != ~0U)
- mtrr_if->set(data->smp_reg, data->smp_base,
+ if (data->smp_reg != ~0U) {
+ mtrr_if->set(data->smp_reg, data->smp_base,
data->smp_size, data->smp_type);
- else
+ } else {
mtrr_if->set_all();
+ }
atomic_dec(&data->count);
- while(atomic_read(&data->gate))
+ while (atomic_read(&data->gate))
cpu_relax();
atomic_dec(&data->count);
@@ -161,7 +176,8 @@ static void ipi_handler(void *info)
#endif
}
-static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
+static inline int types_compatible(mtrr_type type1, mtrr_type type2)
+{
return type1 == MTRR_TYPE_UNCACHABLE ||
type2 == MTRR_TYPE_UNCACHABLE ||
(type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
@@ -176,10 +192,10 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
* @type: mtrr type
*
* This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
- *
+ *
* 1. Send IPI to do the following:
* 2. Disable Interrupts
- * 3. Wait for all procs to do so
+ * 3. Wait for all procs to do so
* 4. Enter no-fill cache mode
* 5. Flush caches
* 6. Clear PGE bit
@@ -189,26 +205,27 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
* 10. Enable all range registers
* 11. Flush all TLBs and caches again
* 12. Enter normal cache mode and reenable caching
- * 13. Set PGE
+ * 13. Set PGE
* 14. Wait for buddies to catch up
* 15. Enable interrupts.
- *
+ *
* What does that mean for us? Well, first we set data.count to the number
* of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
* until it hits 0 and proceed. We set the data.gate flag and reset data.count.
- * Meanwhile, they are waiting for that flag to be set. Once it's set, each
- * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it
- * differently, so we call mtrr_if->set() callback and let them take care of it.
- * When they're done, they again decrement data->count and wait for data.gate to
- * be reset.
- * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
+ * Meanwhile, they are waiting for that flag to be set. Once it's set, each
+ * CPU goes through the transition of updating MTRRs.
+ * The CPU vendors may each do it differently,
+ * so we call mtrr_if->set() callback and let them take care of it.
+ * When they're done, they again decrement data->count and wait for data.gate
+ * to be reset.
+ * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
* Everyone then enables interrupts and we all continue on.
*
* Note that the mechanism is the same for UP systems, too; all the SMP stuff
* becomes nops.
*/
-static void set_mtrr(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
+static void
+set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
{
struct set_mtrr_data data;
unsigned long flags;
@@ -218,121 +235,122 @@ static void set_mtrr(unsigned int reg, unsigned long base,
data.smp_size = size;
data.smp_type = type;
atomic_set(&data.count, num_booting_cpus() - 1);
- /* make sure data.count is visible before unleashing other CPUs */
+
+ /* Make sure data.count is visible before unleashing other CPUs */
smp_wmb();
- atomic_set(&data.gate,0);
+ atomic_set(&data.gate, 0);
- /* Start the ball rolling on other CPUs */
+ /* Start the ball rolling on other CPUs */
if (smp_call_function(ipi_handler, &data, 0) != 0)
panic("mtrr: timed out waiting for other CPUs\n");
local_irq_save(flags);
- while(atomic_read(&data.count))
+ while (atomic_read(&data.count))
cpu_relax();
- /* ok, reset count and toggle gate */
+ /* Ok, reset count and toggle gate */
atomic_set(&data.count, num_booting_cpus() - 1);
smp_wmb();
- atomic_set(&data.gate,1);
+ atomic_set(&data.gate, 1);
- /* do our MTRR business */
+ /* Do our MTRR business */
- /* HACK!
+ /*
+ * HACK!
* We use this same function to initialize the mtrrs on boot.
* The state of the boot cpu's mtrrs has been saved, and we want
- * to replicate across all the APs.
+ * to replicate across all the APs.
* If we're doing that @reg is set to something special...
*/
- if (reg != ~0U)
- mtrr_if->set(reg,base,size,type);
+ if (reg != ~0U)
+ mtrr_if->set(reg, base, size, type);
- /* wait for the others */
- while(atomic_read(&data.count))
+ /* Wait for the others */
+ while (atomic_read(&data.count))
cpu_relax();
atomic_set(&data.count, num_booting_cpus() - 1);
smp_wmb();
- atomic_set(&data.gate,0);
+ atomic_set(&data.gate, 0);
/*
* Wait here for everyone to have seen the gate change
* So we're the last ones to touch 'data'
*/
- while(atomic_read(&data.count))
+ while (atomic_read(&data.count))
cpu_relax();
local_irq_restore(flags);
}
/**
- * mtrr_add_page - Add a memory type region
- * @base: Physical base address of region in pages (in units of 4 kB!)
- * @size: Physical size of region in pages (4 kB)
- * @type: Type of MTRR desired
- * @increment: If this is true do usage counting on the region
+ * mtrr_add_page - Add a memory type region
+ * @base: Physical base address of region in pages (in units of 4 kB!)
+ * @size: Physical size of region in pages (4 kB)
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
*
- * Memory type region registers control the caching on newer Intel and
- * non Intel processors. This function allows drivers to request an
- * MTRR is added. The details and hardware specifics of each processor's
- * implementation are hidden from the caller, but nevertheless the
- * caller should expect to need to provide a power of two size on an
- * equivalent power of two boundary.
+ * Memory type region registers control the caching on newer Intel and
+ * non Intel processors. This function allows drivers to request an
+ * MTRR is added. The details and hardware specifics of each processor's
+ * implementation are hidden from the caller, but nevertheless the
+ * caller should expect to need to provide a power of two size on an
+ * equivalent power of two boundary.
*
- * If the region cannot be added either because all regions are in use
- * or the CPU cannot support it a negative value is returned. On success
- * the register number for this entry is returned, but should be treated
- * as a cookie only.
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
*
- * On a multiprocessor machine the changes are made to all processors.
- * This is required on x86 by the Intel processors.
+ * On a multiprocessor machine the changes are made to all processors.
+ * This is required on x86 by the Intel processors.
*
- * The available types are
+ * The available types are
*
- * %MTRR_TYPE_UNCACHABLE - No caching
+ * %MTRR_TYPE_UNCACHABLE - No caching
*
- * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
*
- * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
*
- * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
*
- * BUGS: Needs a quiet flag for the cases where drivers do not mind
- * failures and do not wish system log messages to be sent.
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
*/
-
-int mtrr_add_page(unsigned long base, unsigned long size,
+int mtrr_add_page(unsigned long base, unsigned long size,
unsigned int type, bool increment)
{
+ unsigned long lbase, lsize;
int i, replace, error;
mtrr_type ltype;
- unsigned long lbase, lsize;
if (!mtrr_if)
return -ENXIO;
-
- if ((error = mtrr_if->validate_add_page(base,size,type)))
+
+ error = mtrr_if->validate_add_page(base, size, type);
+ if (error)
return error;
if (type >= MTRR_NUM_TYPES) {
- printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
+ pr_warning("mtrr: type: %u invalid\n", type);
return -EINVAL;
}
- /* If the type is WC, check that this processor supports it */
+ /* If the type is WC, check that this processor supports it */
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
- printk(KERN_WARNING
- "mtrr: your processor doesn't support write-combining\n");
+ pr_warning("mtrr: your processor doesn't support write-combining\n");
return -ENOSYS;
}
if (!size) {
- printk(KERN_WARNING "mtrr: zero sized request\n");
+ pr_warning("mtrr: zero sized request\n");
return -EINVAL;
}
if (base & size_or_mask || size & size_or_mask) {
- printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
+ pr_warning("mtrr: base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -341,36 +359,40 @@ int mtrr_add_page(unsigned long base, unsigned long size,
/* No CPU hotplug when we change MTRR entries */
get_online_cpus();
- /* Search for existing MTRR */
+
+ /* Search for existing MTRR */
mutex_lock(&mtrr_mutex);
for (i = 0; i < num_var_ranges; ++i) {
mtrr_if->get(i, &lbase, &lsize, &ltype);
- if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase)
+ if (!lsize || base > lbase + lsize - 1 ||
+ base + size - 1 < lbase)
continue;
- /* At this point we know there is some kind of overlap/enclosure */
+ /*
+ * At this point we know there is some kind of
+ * overlap/enclosure
+ */
if (base < lbase || base + size - 1 > lbase + lsize - 1) {
- if (base <= lbase && base + size - 1 >= lbase + lsize - 1) {
+ if (base <= lbase &&
+ base + size - 1 >= lbase + lsize - 1) {
/* New region encloses an existing region */
if (type == ltype) {
replace = replace == -1 ? i : -2;
continue;
- }
- else if (types_compatible(type, ltype))
+ } else if (types_compatible(type, ltype))
continue;
}
- printk(KERN_WARNING
- "mtrr: 0x%lx000,0x%lx000 overlaps existing"
- " 0x%lx000,0x%lx000\n", base, size, lbase,
- lsize);
+ pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
+ " 0x%lx000,0x%lx000\n", base, size, lbase,
+ lsize);
goto out;
}
- /* New region is enclosed by an existing region */
+ /* New region is enclosed by an existing region */
if (ltype != type) {
if (types_compatible(type, ltype))
continue;
- printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
- base, size, mtrr_attrib_to_str(ltype),
- mtrr_attrib_to_str(type));
+ pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+ base, size, mtrr_attrib_to_str(ltype),
+ mtrr_attrib_to_str(type));
goto out;
}
if (increment)
@@ -378,7 +400,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
error = i;
goto out;
}
- /* Search for an empty MTRR */
+ /* Search for an empty MTRR */
i = mtrr_if->get_free_region(base, size, replace);
if (i >= 0) {
set_mtrr(i, base, size, type);
@@ -393,8 +415,9 @@ int mtrr_add_page(unsigned long base, unsigned long size,
mtrr_usage_table[replace] = 0;
}
}
- } else
- printk(KERN_INFO "mtrr: no more MTRRs available\n");
+ } else {
+ pr_info("mtrr: no more MTRRs available\n");
+ }
error = i;
out:
mutex_unlock(&mtrr_mutex);
@@ -405,10 +428,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- printk(KERN_WARNING
- "mtrr: size and base must be multiples of 4 kiB\n");
- printk(KERN_DEBUG
- "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
+ pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+ pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
}
@@ -416,66 +437,64 @@ static int mtrr_check(unsigned long base, unsigned long size)
}
/**
- * mtrr_add - Add a memory type region
- * @base: Physical base address of region
- * @size: Physical size of region
- * @type: Type of MTRR desired
- * @increment: If this is true do usage counting on the region
+ * mtrr_add - Add a memory type region
+ * @base: Physical base address of region
+ * @size: Physical size of region
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
*
- * Memory type region registers control the caching on newer Intel and
- * non Intel processors. This function allows drivers to request an
- * MTRR is added. The details and hardware specifics of each processor's
- * implementation are hidden from the caller, but nevertheless the
- * caller should expect to need to provide a power of two size on an
- * equivalent power of two boundary.
+ * Memory type region registers control the caching on newer Intel and
+ * non Intel processors. This function allows drivers to request an
+ * MTRR is added. The details and hardware specifics of each processor's
+ * implementation are hidden from the caller, but nevertheless the
+ * caller should expect to need to provide a power of two size on an
+ * equivalent power of two boundary.
*
- * If the region cannot be added either because all regions are in use
- * or the CPU cannot support it a negative value is returned. On success
- * the register number for this entry is returned, but should be treated
- * as a cookie only.
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
*
- * On a multiprocessor machine the changes are made to all processors.
- * This is required on x86 by the Intel processors.
+ * On a multiprocessor machine the changes are made to all processors.
+ * This is required on x86 by the Intel processors.
*
- * The available types are
+ * The available types are
*
- * %MTRR_TYPE_UNCACHABLE - No caching
+ * %MTRR_TYPE_UNCACHABLE - No caching
*
- * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
*
- * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
*
- * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
*
- * BUGS: Needs a quiet flag for the cases where drivers do not mind
- * failures and do not wish system log messages to be sent.
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
*/
-
-int
-mtrr_add(unsigned long base, unsigned long size, unsigned int type,
- bool increment)
+int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+ bool increment)
{
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
increment);
}
+EXPORT_SYMBOL(mtrr_add);
/**
- * mtrr_del_page - delete a memory type region
- * @reg: Register returned by mtrr_add
- * @base: Physical base address
- * @size: Size of region
+ * mtrr_del_page - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
*
- * If register is supplied then base and size are ignored. This is
- * how drivers should call it.
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
*
- * Releases an MTRR region. If the usage count drops to zero the
- * register is freed and the region returns to default state.
- * On success the register is returned, on failure a negative error
- * code.
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
*/
-
int mtrr_del_page(int reg, unsigned long base, unsigned long size)
{
int i, max;
@@ -500,22 +519,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
}
}
if (reg < 0) {
- printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
- size);
+ pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
+ base, size);
goto out;
}
}
if (reg >= max) {
- printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
+ pr_warning("mtrr: register: %d too big\n", reg);
goto out;
}
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
- printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
+ pr_warning("mtrr: MTRR %d not used\n", reg);
goto out;
}
if (mtrr_usage_table[reg] < 1) {
- printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+ pr_warning("mtrr: reg: %d has count=0\n", reg);
goto out;
}
if (--mtrr_usage_table[reg] < 1)
@@ -526,33 +545,31 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
put_online_cpus();
return error;
}
+
/**
- * mtrr_del - delete a memory type region
- * @reg: Register returned by mtrr_add
- * @base: Physical base address
- * @size: Size of region
+ * mtrr_del - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
*
- * If register is supplied then base and size are ignored. This is
- * how drivers should call it.
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
*
- * Releases an MTRR region. If the usage count drops to zero the
- * register is freed and the region returns to default state.
- * On success the register is returned, on failure a negative error
- * code.
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
*/
-
-int
-mtrr_del(int reg, unsigned long base, unsigned long size)
+int mtrr_del(int reg, unsigned long base, unsigned long size)
{
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
}
-
-EXPORT_SYMBOL(mtrr_add);
EXPORT_SYMBOL(mtrr_del);
-/* HACK ALERT!
+/*
+ * HACK ALERT!
* These should be called implicitly, but we can't yet until all the initcall
* stuff is done...
*/
@@ -576,29 +593,28 @@ struct mtrr_value {
static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
-static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
+static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
{
int i;
for (i = 0; i < num_var_ranges; i++) {
- mtrr_if->get(i,
- &mtrr_value[i].lbase,
- &mtrr_value[i].lsize,
- &mtrr_value[i].ltype);
+ mtrr_if->get(i, &mtrr_value[i].lbase,
+ &mtrr_value[i].lsize,
+ &mtrr_value[i].ltype);
}
return 0;
}
-static int mtrr_restore(struct sys_device * sysdev)
+static int mtrr_restore(struct sys_device *sysdev)
{
int i;
for (i = 0; i < num_var_ranges; i++) {
- if (mtrr_value[i].lsize)
- set_mtrr(i,
- mtrr_value[i].lbase,
- mtrr_value[i].lsize,
- mtrr_value[i].ltype);
+ if (mtrr_value[i].lsize) {
+ set_mtrr(i, mtrr_value[i].lbase,
+ mtrr_value[i].lsize,
+ mtrr_value[i].ltype);
+ }
}
return 0;
}
@@ -615,26 +631,29 @@ int __initdata changed_by_mtrr_cleanup;
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
- * This needs to be called early; before any of the other CPUs are
+ * This needs to be called early; before any of the other CPUs are
* initialized (i.e. before smp_init()).
- *
+ *
*/
void __init mtrr_bp_init(void)
{
u32 phys_addr;
+
init_ifs();
phys_addr = 32;
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
- size_or_mask = 0xff000000; /* 36 bits */
+ size_or_mask = 0xff000000; /* 36 bits */
size_and_mask = 0x00f00000;
phys_addr = 36;
- /* This is an AMD specific MSR, but we assume(hope?) that
- Intel will implement it to when they extend the address
- bus of the Xeon. */
+ /*
+ * This is an AMD specific MSR, but we assume(hope?) that
+ * Intel will implement it to when they extend the address
+ * bus of the Xeon.
+ */
if (cpuid_eax(0x80000000) >= 0x80000008) {
phys_addr = cpuid_eax(0x80000008) & 0xff;
/* CPUID workaround for Intel 0F33/0F34 CPU */
@@ -649,9 +668,11 @@ void __init mtrr_bp_init(void)
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
boot_cpu_data.x86 == 6) {
- /* VIA C* family have Intel style MTRRs, but
- don't support PAE */
- size_or_mask = 0xfff00000; /* 32 bits */
+ /*
+ * VIA C* family have Intel style MTRRs,
+ * but don't support PAE
+ */
+ size_or_mask = 0xfff00000; /* 32 bits */
size_and_mask = 0;
phys_addr = 32;
}
@@ -694,7 +715,6 @@ void __init mtrr_bp_init(void)
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
}
-
}
}
}
@@ -706,12 +726,17 @@ void mtrr_ap_init(void)
if (!mtrr_if || !use_intel())
return;
/*
- * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed,
- * but this routine will be called in cpu boot time, holding the lock
- * breaks it. This routine is called in two cases: 1.very earily time
- * of software resume, when there absolutely isn't mtrr entry changes;
- * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
- * prevent mtrr entry changes
+ * Ideally we should hold mtrr_mutex here to avoid mtrr entries
+ * changed, but this routine will be called in cpu boot time,
+ * holding the lock breaks it.
+ *
+ * This routine is called in two cases:
+ *
+ * 1. very earily time of software resume, when there absolutely
+ * isn't mtrr entry changes;
+ *
+ * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
+ * lock to prevent mtrr entry changes
*/
local_irq_save(flags);
@@ -732,19 +757,23 @@ static int __init mtrr_init_finialize(void)
{
if (!mtrr_if)
return 0;
+
if (use_intel()) {
if (!changed_by_mtrr_cleanup)
mtrr_state_warn();
- } else {
- /* The CPUs haven't MTRR and seem to not support SMP. They have
- * specific drivers, we use a tricky method to support
- * suspend/resume for them.
- * TBD: is there any system with such CPU which supports
- * suspend/resume? if no, we should remove the code.
- */
- sysdev_driver_register(&cpu_sysdev_class,
- &mtrr_sysdev_driver);
+ return 0;
}
+
+ /*
+ * The CPU has no MTRR and seems to not support SMP. They have
+ * specific drivers, we use a tricky method to support
+ * suspend/resume for them.
+ *
+ * TBD: is there any system with such CPU which supports
+ * suspend/resume? If no, we should remove the code.
+ */
+ sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver);
+
return 0;
}
subsys_initcall(mtrr_init_finialize);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 7538b767f206..a501dee9a87a 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -1,5 +1,5 @@
/*
- * local mtrr defines.
+ * local MTRR defines.
*/
#include <linux/types.h>
@@ -14,13 +14,12 @@ extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
struct mtrr_ops {
u32 vendor;
u32 use_intel_if;
-// void (*init)(void);
void (*set)(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
void (*set_all)(void);
void (*get)(unsigned int reg, unsigned long *base,
- unsigned long *size, mtrr_type * type);
+ unsigned long *size, mtrr_type *type);
int (*get_free_region)(unsigned long base, unsigned long size,
int replace_reg);
int (*validate_add_page)(unsigned long base, unsigned long size,
@@ -39,11 +38,11 @@ extern int positive_have_wrcomb(void);
/* library functions for processor-specific routines */
struct set_mtrr_context {
- unsigned long flags;
- unsigned long cr4val;
- u32 deftype_lo;
- u32 deftype_hi;
- u32 ccr3;
+ unsigned long flags;
+ unsigned long cr4val;
+ u32 deftype_lo;
+ u32 deftype_hi;
+ u32 ccr3;
};
void set_mtrr_done(struct set_mtrr_context *ctxt);
@@ -54,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
void get_mtrr_state(void);
-extern void set_mtrr_ops(struct mtrr_ops * ops);
+extern void set_mtrr_ops(struct mtrr_ops *ops);
extern u64 size_or_mask, size_and_mask;
-extern struct mtrr_ops * mtrr_if;
+extern struct mtrr_ops *mtrr_if;
#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 1f5fb1588d1f..dfc80b4e6b0d 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -1,24 +1,25 @@
-#include <linux/mm.h>
#include <linux/init.h>
-#include <asm/io.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
#include <asm/processor-cyrix.h>
#include <asm/processor-flags.h>
-#include "mtrr.h"
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include "mtrr.h"
-/* Put the processor into a state where MTRRs can be safely set */
+/* Put the processor into a state where MTRRs can be safely set */
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
{
unsigned int cr0;
- /* Disable interrupts locally */
+ /* Disable interrupts locally */
local_irq_save(ctxt->flags);
if (use_intel() || is_cpu(CYRIX)) {
- /* Save value of CR4 and clear Page Global Enable (bit 7) */
+ /* Save value of CR4 and clear Page Global Enable (bit 7) */
if (cpu_has_pge) {
ctxt->cr4val = read_cr4();
write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
@@ -33,50 +34,61 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
write_cr0(cr0);
wbinvd();
- if (use_intel())
- /* Save MTRR state */
+ if (use_intel()) {
+ /* Save MTRR state */
rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
- else
- /* Cyrix ARRs - everything else were excluded at the top */
+ } else {
+ /*
+ * Cyrix ARRs -
+ * everything else were excluded at the top
+ */
ctxt->ccr3 = getCx86(CX86_CCR3);
+ }
}
}
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
{
- if (use_intel())
- /* Disable MTRRs, and set the default type to uncached */
+ if (use_intel()) {
+ /* Disable MTRRs, and set the default type to uncached */
mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
ctxt->deftype_hi);
- else if (is_cpu(CYRIX))
- /* Cyrix ARRs - everything else were excluded at the top */
- setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
+ } else {
+ if (is_cpu(CYRIX)) {
+ /* Cyrix ARRs - everything else were excluded at the top */
+ setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
+ }
+ }
}
-/* Restore the processor after a set_mtrr_prepare */
+/* Restore the processor after a set_mtrr_prepare */
void set_mtrr_done(struct set_mtrr_context *ctxt)
{
if (use_intel() || is_cpu(CYRIX)) {
- /* Flush caches and TLBs */
+ /* Flush caches and TLBs */
wbinvd();
- /* Restore MTRRdefType */
- if (use_intel())
+ /* Restore MTRRdefType */
+ if (use_intel()) {
/* Intel (P6) standard MTRRs */
- mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
- else
- /* Cyrix ARRs - everything else was excluded at the top */
+ mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
+ ctxt->deftype_hi);
+ } else {
+ /*
+ * Cyrix ARRs -
+ * everything else was excluded at the top
+ */
setCx86(CX86_CCR3, ctxt->ccr3);
+ }
- /* Enable caches */
+ /* Enable caches */
write_cr0(read_cr0() & 0xbfffffff);
- /* Restore value of CR4 */
+ /* Restore value of CR4 */
if (cpu_has_pge)
write_cr4(ctxt->cr4val);
}
- /* Re-enable interrupts locally (if enabled previously) */
+ /* Re-enable interrupts locally (if enabled previously) */
local_irq_restore(ctxt->flags);
}
-
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a7aa8f900954..b237c181aa41 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -6,6 +6,7 @@
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
*
* For licencing details see kernel-base/COPYING
*/
@@ -20,6 +21,7 @@
#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
+#include <linux/cpu.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -27,12 +29,52 @@
static u64 perf_counter_mask __read_mostly;
+/* The maximal number of PEBS counters: */
+#define MAX_PEBS_COUNTERS 4
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE 24
+
+/* The size of a per-cpu BTS buffer in bytes: */
+#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024)
+
+/* The BTS overflow threshold in bytes from the end of the buffer: */
+#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64)
+
+
+/*
+ * Bits in the debugctlmsr controlling branch tracing.
+ */
+#define X86_DEBUGCTL_TR (1 << 6)
+#define X86_DEBUGCTL_BTS (1 << 7)
+#define X86_DEBUGCTL_BTINT (1 << 8)
+#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
+#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+ u64 bts_buffer_base;
+ u64 bts_index;
+ u64 bts_absolute_maximum;
+ u64 bts_interrupt_threshold;
+ u64 pebs_buffer_base;
+ u64 pebs_index;
+ u64 pebs_absolute_maximum;
+ u64 pebs_interrupt_threshold;
+ u64 pebs_counter_reset[MAX_PEBS_COUNTERS];
+};
+
struct cpu_hw_counters {
struct perf_counter *counters[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
int enabled;
+ struct debug_store *ds;
};
/*
@@ -57,6 +99,8 @@ struct x86_pmu {
u64 counter_mask;
u64 max_period;
u64 intel_ctrl;
+ void (*enable_bts)(u64 config);
+ void (*disable_bts)(void);
};
static struct x86_pmu x86_pmu __read_mostly;
@@ -576,6 +620,9 @@ x86_perf_counter_update(struct perf_counter *counter,
u64 prev_raw_count, new_raw_count;
s64 delta;
+ if (idx == X86_PMC_IDX_FIXED_BTS)
+ return 0;
+
/*
* Careful: an NMI might modify the previous counter value.
*
@@ -659,10 +706,109 @@ static void release_pmc_hardware(void)
enable_lapic_nmi_watchdog();
}
+static inline bool bts_available(void)
+{
+ return x86_pmu.enable_bts != NULL;
+}
+
+static inline void init_debug_store_on_cpu(int cpu)
+{
+ struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+
+ if (!ds)
+ return;
+
+ wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+ (u32)((u64)(long)ds), (u32)((u64)(long)ds >> 32));
+}
+
+static inline void fini_debug_store_on_cpu(int cpu)
+{
+ if (!per_cpu(cpu_hw_counters, cpu).ds)
+ return;
+
+ wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static void release_bts_hardware(void)
+{
+ int cpu;
+
+ if (!bts_available())
+ return;
+
+ get_online_cpus();
+
+ for_each_online_cpu(cpu)
+ fini_debug_store_on_cpu(cpu);
+
+ for_each_possible_cpu(cpu) {
+ struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+
+ if (!ds)
+ continue;
+
+ per_cpu(cpu_hw_counters, cpu).ds = NULL;
+
+ kfree((void *)(long)ds->bts_buffer_base);
+ kfree(ds);
+ }
+
+ put_online_cpus();
+}
+
+static int reserve_bts_hardware(void)
+{
+ int cpu, err = 0;
+
+ if (!bts_available())
+ return -EOPNOTSUPP;
+
+ get_online_cpus();
+
+ for_each_possible_cpu(cpu) {
+ struct debug_store *ds;
+ void *buffer;
+
+ err = -ENOMEM;
+ buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
+ if (unlikely(!buffer))
+ break;
+
+ ds = kzalloc(sizeof(*ds), GFP_KERNEL);
+ if (unlikely(!ds)) {
+ kfree(buffer);
+ break;
+ }
+
+ ds->bts_buffer_base = (u64)(long)buffer;
+ ds->bts_index = ds->bts_buffer_base;
+ ds->bts_absolute_maximum =
+ ds->bts_buffer_base + BTS_BUFFER_SIZE;
+ ds->bts_interrupt_threshold =
+ ds->bts_absolute_maximum - BTS_OVFL_TH;
+
+ per_cpu(cpu_hw_counters, cpu).ds = ds;
+ err = 0;
+ }
+
+ if (err)
+ release_bts_hardware();
+ else {
+ for_each_online_cpu(cpu)
+ init_debug_store_on_cpu(cpu);
+ }
+
+ put_online_cpus();
+
+ return err;
+}
+
static void hw_perf_counter_destroy(struct perf_counter *counter)
{
if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
release_pmc_hardware();
+ release_bts_hardware();
mutex_unlock(&pmc_reserve_mutex);
}
}
@@ -705,6 +851,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
return 0;
}
+static void intel_pmu_enable_bts(u64 config)
+{
+ unsigned long debugctlmsr;
+
+ debugctlmsr = get_debugctlmsr();
+
+ debugctlmsr |= X86_DEBUGCTL_TR;
+ debugctlmsr |= X86_DEBUGCTL_BTS;
+ debugctlmsr |= X86_DEBUGCTL_BTINT;
+
+ if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+ debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+
+ if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+ debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+
+ update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_bts(void)
+{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ unsigned long debugctlmsr;
+
+ if (!cpuc->ds)
+ return;
+
+ debugctlmsr = get_debugctlmsr();
+
+ debugctlmsr &=
+ ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
+ X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+
+ update_debugctlmsr(debugctlmsr);
+}
+
/*
* Setup the hardware configuration for a given attr_type
*/
@@ -721,9 +903,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
err = 0;
if (!atomic_inc_not_zero(&active_counters)) {
mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
- err = -EBUSY;
- else
+ if (atomic_read(&active_counters) == 0) {
+ if (!reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ reserve_bts_hardware();
+ }
+ if (!err)
atomic_inc(&active_counters);
mutex_unlock(&pmc_reserve_mutex);
}
@@ -801,7 +987,18 @@ static void p6_pmu_disable_all(void)
static void intel_pmu_disable_all(void)
{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ if (!cpuc->enabled)
+ return;
+
+ cpuc->enabled = 0;
+ barrier();
+
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+ intel_pmu_disable_bts();
}
static void amd_pmu_disable_all(void)
@@ -859,7 +1056,25 @@ static void p6_pmu_enable_all(void)
static void intel_pmu_enable_all(void)
{
+ struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+
+ if (cpuc->enabled)
+ return;
+
+ cpuc->enabled = 1;
+ barrier();
+
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
+ if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+ struct perf_counter *counter =
+ cpuc->counters[X86_PMC_IDX_FIXED_BTS];
+
+ if (WARN_ON_ONCE(!counter))
+ return;
+
+ intel_pmu_enable_bts(counter->hw.config);
+ }
}
static void amd_pmu_enable_all(void)
@@ -946,6 +1161,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
static inline void
intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
{
+ if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+ intel_pmu_disable_bts();
+ return;
+ }
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc, idx);
return;
@@ -974,6 +1194,9 @@ x86_perf_counter_set_period(struct perf_counter *counter,
s64 period = hwc->sample_period;
int err, ret = 0;
+ if (idx == X86_PMC_IDX_FIXED_BTS)
+ return 0;
+
/*
* If we are way outside a reasoable range then just skip forward:
*/
@@ -1056,6 +1279,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
{
+ if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+ if (!__get_cpu_var(cpu_hw_counters).enabled)
+ return;
+
+ intel_pmu_enable_bts(hwc->config);
+ return;
+ }
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_enable_fixed(hwc, idx);
return;
@@ -1077,11 +1308,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
{
unsigned int event;
+ event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+
+ if (unlikely((event ==
+ x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+ (hwc->sample_period == 1)))
+ return X86_PMC_IDX_FIXED_BTS;
+
if (!x86_pmu.num_counters_fixed)
return -1;
- event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
@@ -1102,7 +1338,25 @@ static int x86_pmu_enable(struct perf_counter *counter)
int idx;
idx = fixed_mode_idx(counter, hwc);
- if (idx >= 0) {
+ if (idx == X86_PMC_IDX_FIXED_BTS) {
+ /*
+ * Try to use BTS for branch tracing. If that is not
+ * available, try to get a generic counter.
+ */
+ if (unlikely(!cpuc->ds))
+ goto try_generic;
+
+ /*
+ * Try to get the fixed counter, if that is already taken
+ * then try to get a generic counter:
+ */
+ if (test_and_set_bit(idx, cpuc->used_mask))
+ goto try_generic;
+
+ hwc->config_base = 0;
+ hwc->counter_base = 0;
+ hwc->idx = idx;
+ } else if (idx >= 0) {
/*
* Try to get the fixed counter, if that is already taken
* then try to get a generic counter:
@@ -1213,6 +1467,45 @@ void perf_counter_print_debug(void)
local_irq_restore(flags);
}
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
+ struct perf_sample_data *data)
+{
+ struct debug_store *ds = cpuc->ds;
+ struct bts_record {
+ u64 from;
+ u64 to;
+ u64 flags;
+ };
+ struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
+ unsigned long orig_ip = data->regs->ip;
+ u64 at;
+
+ if (!counter)
+ return;
+
+ if (!ds)
+ return;
+
+ for (at = ds->bts_buffer_base;
+ at < ds->bts_index;
+ at += sizeof(struct bts_record)) {
+ struct bts_record *rec = (struct bts_record *)(long)at;
+
+ data->regs->ip = rec->from;
+ data->addr = rec->to;
+
+ perf_counter_output(counter, 1, data);
+ }
+
+ ds->bts_index = ds->bts_buffer_base;
+
+ data->regs->ip = orig_ip;
+ data->addr = 0;
+
+ /* There's new data available. */
+ counter->pending_kill = POLL_IN;
+}
+
static void x86_pmu_disable(struct perf_counter *counter)
{
struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1237,6 +1530,15 @@ static void x86_pmu_disable(struct perf_counter *counter)
* that we are disabling:
*/
x86_perf_counter_update(counter, hwc, idx);
+
+ /* Drain the remaining BTS records. */
+ if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+ struct perf_sample_data data;
+ struct pt_regs regs;
+
+ data.regs = &regs;
+ intel_pmu_drain_bts_buffer(cpuc, &data);
+ }
cpuc->counters[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
@@ -1264,6 +1566,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter)
static void intel_pmu_reset(void)
{
+ struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
unsigned long flags;
int idx;
@@ -1281,6 +1584,8 @@ static void intel_pmu_reset(void)
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
}
+ if (ds)
+ ds->bts_index = ds->bts_buffer_base;
local_irq_restore(flags);
}
@@ -1346,6 +1651,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_counters);
perf_disable();
+ intel_pmu_drain_bts_buffer(cpuc, &data);
status = intel_pmu_get_status();
if (!status) {
perf_enable();
@@ -1547,6 +1853,8 @@ static struct x86_pmu intel_pmu = {
* the generic counter period:
*/
.max_period = (1ULL << 31) - 1,
+ .enable_bts = intel_pmu_enable_bts,
+ .disable_bts = intel_pmu_disable_bts,
};
static struct x86_pmu amd_pmu = {
@@ -1936,3 +2244,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry;
}
+
+void hw_perf_counter_setup_online(int cpu)
+{
+ init_debug_store_on_cpu(cpu);
+}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index e60ed740d2b3..392bea43b890 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -68,16 +68,16 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
/* returns the bit offset of the performance counter register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
- return (msr - MSR_K7_PERFCTR0);
+ return msr - MSR_K7_PERFCTR0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return (msr - MSR_ARCH_PERFMON_PERFCTR0);
+ return msr - MSR_ARCH_PERFMON_PERFCTR0;
switch (boot_cpu_data.x86) {
case 6:
- return (msr - MSR_P6_PERFCTR0);
+ return msr - MSR_P6_PERFCTR0;
case 15:
- return (msr - MSR_P4_BPU_PERFCTR0);
+ return msr - MSR_P4_BPU_PERFCTR0;
}
}
return 0;
@@ -92,16 +92,16 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
/* returns the bit offset of the event selection register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
- return (msr - MSR_K7_EVNTSEL0);
+ return msr - MSR_K7_EVNTSEL0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
- return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
+ return msr - MSR_ARCH_PERFMON_EVENTSEL0;
switch (boot_cpu_data.x86) {
case 6:
- return (msr - MSR_P6_EVNTSEL0);
+ return msr - MSR_P6_EVNTSEL0;
case 15:
- return (msr - MSR_P4_BSU_ESCR0);
+ return msr - MSR_P4_BSU_ESCR0;
}
}
return 0;
@@ -113,7 +113,7 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
{
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- return (!test_bit(counter, perfctr_nmi_owner));
+ return !test_bit(counter, perfctr_nmi_owner);
}
/* checks the an msr for availability */
@@ -124,7 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
counter = nmi_perfctr_msr_to_bit(msr);
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
- return (!test_bit(counter, perfctr_nmi_owner));
+ return !test_bit(counter, perfctr_nmi_owner);
}
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
@@ -237,7 +237,7 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
*/
counter_val = (u64)cpu_khz * 1000;
do_div(counter_val, retval);
- if (counter_val > 0x7fffffffULL) {
+ if (counter_val > 0x7fffffffULL) {
u64 count = (u64)cpu_khz * 1000;
do_div(count, 0x7fffffffUL);
retval = count + 1;
@@ -251,7 +251,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr,
u64 count = (u64)cpu_khz * 1000;
do_div(count, nmi_hz);
- if(descr)
+ if (descr)
pr_debug("setting %s to -0x%08Lx\n", descr, count);
wrmsrl(perfctr_msr, 0 - count);
}
@@ -262,7 +262,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
u64 count = (u64)cpu_khz * 1000;
do_div(count, nmi_hz);
- if(descr)
+ if (descr)
pr_debug("setting %s to -0x%08Lx\n", descr, count);
wrmsr(perfctr_msr, (u32)(-count), 0);
}
@@ -296,7 +296,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
/* setup the timer */
wrmsr(evntsel_msr, evntsel, 0);
- write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
+ write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
/* initialize the wd struct before enabling */
wd->perfctr_msr = perfctr_msr;
@@ -387,7 +387,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
/* setup the timer */
wrmsr(evntsel_msr, evntsel, 0);
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
+ write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
/* initialize the wd struct before enabling */
wd->perfctr_msr = perfctr_msr;
@@ -415,7 +415,7 @@ static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
apic_write(APIC_LVTPC, APIC_DM_NMI);
/* P6/ARCH_PERFMON has 32 bit counter write */
- write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
+ write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
}
static const struct wd_ops p6_wd_ops = {
@@ -490,9 +490,9 @@ static int setup_p4_watchdog(unsigned nmi_hz)
if (smp_num_siblings == 2) {
unsigned int ebx, apicid;
- ebx = cpuid_ebx(1);
- apicid = (ebx >> 24) & 0xff;
- ht_num = apicid & 1;
+ ebx = cpuid_ebx(1);
+ apicid = (ebx >> 24) & 0xff;
+ ht_num = apicid & 1;
} else
#endif
ht_num = 0;
@@ -544,7 +544,7 @@ static int setup_p4_watchdog(unsigned nmi_hz)
}
evntsel = P4_ESCR_EVENT_SELECT(0x3F)
- | P4_ESCR_OS
+ | P4_ESCR_OS
| P4_ESCR_USR;
cccr_val |= P4_CCCR_THRESHOLD(15)
@@ -612,7 +612,7 @@ static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
{
unsigned dummy;
/*
- * P4 quirks:
+ * P4 quirks:
* - An overflown perfctr will assert its interrupt
* until the OVF flag in its CCCR is cleared.
* - LVTPC is masked on interrupt and must be
@@ -662,7 +662,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
* NOTE: Corresponding bit = 0 in ebx indicates event present.
*/
cpuid(10, &(eax.full), &ebx, &unused, &unused);
- if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+ if ((eax.split.mask_length <
+ (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
return 0;
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index d5e30397246b..62ac8cb6ba27 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
#endif
seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
-#ifdef CONFIG_X86_64
seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
c->x86_phys_bits, c->x86_virt_bits);
-#endif
seq_printf(m, "power management:");
for (i = 0; i < 32; i++) {
@@ -128,7 +126,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (i < ARRAY_SIZE(x86_power_flags) &&
x86_power_flags[i])
seq_printf(m, "%s%s",
- x86_power_flags[i][0]?" ":"",
+ x86_power_flags[i][0] ? " " : "",
x86_power_flags[i]);
else
seq_printf(m, " [%d]", i);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 284c399e3234..bc24f514ec93 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -49,17 +49,17 @@ static inline int __vmware_platform(void)
static unsigned long __vmware_get_tsc_khz(void)
{
- uint64_t tsc_hz;
- uint32_t eax, ebx, ecx, edx;
+ uint64_t tsc_hz;
+ uint32_t eax, ebx, ecx, edx;
- VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
+ VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
- if (ebx == UINT_MAX)
- return 0;
- tsc_hz = eax | (((uint64_t)ebx) << 32);
- do_div(tsc_hz, 1000);
- BUG_ON(tsc_hz >> 32);
- return tsc_hz;
+ if (ebx == UINT_MAX)
+ return 0;
+ tsc_hz = eax | (((uint64_t)ebx) << 32);
+ do_div(tsc_hz, 1000);
+ BUG_ON(tsc_hz >> 32);
+ return tsc_hz;
}
/*
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index b4f14c6c09d9..37250fe490b1 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -27,9 +27,7 @@ static void doublefault_fn(void)
if (ptr_ok(gdt)) {
gdt += GDT_ENTRY_TSS << 3;
- tss = *(u16 *)(gdt+2);
- tss += *(u8 *)(gdt+4) << 16;
- tss += *(u8 *)(gdt+7) << 24;
+ tss = get_desc_base((struct desc_struct *)gdt);
printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
if (ptr_ok(tss)) {
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 48bfe1386038..ef42a038f1a6 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -509,15 +509,15 @@ enum bts_field {
bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
};
-static inline unsigned long bts_get(const char *base, enum bts_field field)
+static inline unsigned long bts_get(const char *base, unsigned long field)
{
base += (ds_cfg.sizeof_ptr_field * field);
return *(unsigned long *)base;
}
-static inline void bts_set(char *base, enum bts_field field, unsigned long val)
+static inline void bts_set(char *base, unsigned long field, unsigned long val)
{
- base += (ds_cfg.sizeof_ptr_field * field);;
+ base += (ds_cfg.sizeof_ptr_field * field);
(*(unsigned long *)base) = val;
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c8405718a4c3..2d8a371d4339 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,7 +15,6 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
-#include <linux/ftrace.h>
#include <asm/stacktrace.h>
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d94e1ea3b9fe..8e9663413b7f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -417,10 +417,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long return_hooker = (unsigned long)
&return_to_handler;
- /* Nmi's are currently unsupported */
- if (unlikely(in_nmi()))
- return;
-
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index b0cdde6932f5..74656d1d4e30 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
seq_printf(p, " Threshold APIC interrupts\n");
# endif
#endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
for_each_online_cpu(j)
seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
@@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->irq_threshold_count;
# endif
#endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
sum += per_cpu(mce_exception_count, cpu);
sum += per_cpu(mce_poll_count, cpu);
#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 92b7703d3d58..ccf8ab54f31a 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -190,7 +190,7 @@ static void __init apic_intr_init(void)
#ifdef CONFIG_X86_MCE_THRESHOLD
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif
-#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
+#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC)
alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
#endif
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index ae13e34f7248..2cf12fd32c83 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -147,7 +147,7 @@ again:
return NULL;
addr = page_to_phys(page);
- if (!is_buffer_dma_capable(dma_mask, addr, size)) {
+ if (addr + size > dma_mask) {
__free_pages(page, get_order(size));
if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d2e56b8f48e7..98a827ee9ed7 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -190,14 +190,13 @@ static void iommu_full(struct device *dev, size_t size, int dir)
static inline int
need_iommu(struct device *dev, unsigned long addr, size_t size)
{
- return force_iommu ||
- !is_buffer_dma_capable(*dev->dma_mask, addr, size);
+ return force_iommu || !dma_capable(dev, addr, size);
}
static inline int
nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
{
- return !is_buffer_dma_capable(*dev->dma_mask, addr, size);
+ return !dma_capable(dev, addr, size);
}
/* Map a single continuous physical area into the IOMMU.
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 71d412a09f30..c0a4222bf62b 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -14,7 +14,7 @@
static int
check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
{
- if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) {
+ if (hwdev && !dma_capable(hwdev, bus, size)) {
if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
printk(KERN_ERR
"nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 1e66b18f45cb..aaa6b7839f1e 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -13,31 +13,6 @@
int swiotlb __read_mostly;
-void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
- return alloc_bootmem_low_pages(size);
-}
-
-void *swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
- return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-
-dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
- return paddr;
-}
-
-phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-{
- return baddr;
-}
-
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
- return 0;
-}
-
static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags)
{
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984af..4cf79567cdab 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -61,9 +61,6 @@
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
/*
* Return saved PC of a blocked thread.
*/
@@ -350,14 +347,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*next = &next_p->thread;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ bool preload_fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- __unlazy_fpu(prev_p);
+ /*
+ * If the task has used fpu the last 5 timeslices, just do a full
+ * restore of the math state immediately to avoid the trap; the
+ * chances of needing FPU soon are obviously high now
+ */
+ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+ __unlazy_fpu(prev_p);
/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter > 5)
+ if (preload_fpu)
prefetch(next->xstate);
/*
@@ -398,6 +402,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
+ /* If we're going to preload the fpu context, make sure clts
+ is run while we're batching the cpu state updates. */
+ if (preload_fpu)
+ clts();
+
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -407,15 +416,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_end_context_switch(next_p);
- /* If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- *
- * tsk_used_math() checks prevent calling math_state_restore(),
- * which can sleep in the case of !tsk_used_math()
- */
- if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
- math_state_restore();
+ if (preload_fpu)
+ __math_state_restore();
/*
* Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb5407b9d..ad535b683170 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -55,9 +55,6 @@
asmlinkage extern void ret_from_fork(void);
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -386,9 +383,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
unsigned fsindex, gsindex;
+ bool preload_fpu;
+
+ /*
+ * If the task has used fpu the last 5 timeslices, just do a full
+ * restore of the math state immediately to avoid the trap; the
+ * chances of needing FPU soon are obviously high now
+ */
+ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter > 5)
+ if (preload_fpu)
prefetch(next->xstate);
/*
@@ -419,6 +424,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
load_TLS(next, cpu);
+ /* Must be after DS reload */
+ unlazy_fpu(prev_p);
+
+ /* Make sure cpu is ready for new context */
+ if (preload_fpu)
+ clts();
+
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
@@ -459,9 +471,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
- /* Must be after DS reload */
- unlazy_fpu(prev_p);
-
/*
* Switch the PDA and FPU contexts.
*/
@@ -480,15 +489,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
- /* If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- *
- * tsk_used_math() checks prevent calling math_state_restore(),
- * which can sleep in the case of !tsk_used_math()
+ /*
+ * Preload the FPU context, now that we've determined that the
+ * task is likely to be using it.
*/
- if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
- math_state_restore();
+ if (preload_fpu)
+ __math_state_restore();
return prev_p;
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4c578751e94e..cc26ad4c3070 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -856,7 +856,7 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_process();
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index e8b9863ef8c4..3149032ff107 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/ptrace.h>
+#include <asm/desc.h>
unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
{
@@ -23,7 +24,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
* and APM bios ones we just ignore here.
*/
if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
- u32 *desc;
+ struct desc_struct *desc;
unsigned long base;
seg &= ~7UL;
@@ -33,12 +34,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
addr = -1L; /* bogus selector, access would fault */
else {
desc = child->mm->context.ldt + seg;
- base = ((desc[0] >> 16) |
- ((desc[1] & 0xff) << 16) |
- (desc[1] & 0xff000000));
+ base = get_desc_base(desc);
/* 16-bit code segment? */
- if (!((desc[1] >> 22) & 1))
+ if (!desc->d)
addr &= 0xffff;
addr += base;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5204332f475d..83264922a878 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -76,7 +76,7 @@ char ignore_fpu_irq;
* F0 0F bug workaround.. We have a special link segment
* for this.
*/
-gate_desc idt_table[256]
+gate_desc idt_table[NR_VECTORS]
__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
#endif
@@ -786,33 +786,34 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
#endif
}
-#ifdef CONFIG_X86_32
-unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
+asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
{
- struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
- unsigned long base = (kesp - uesp) & -THREAD_SIZE;
- unsigned long new_kesp = kesp - base;
- unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
- __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
-
- /* Set up base for espfix segment */
- desc &= 0x00f0ff0000000000ULL;
- desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
- ((((__u64)base) << 32) & 0xff00000000000000ULL) |
- ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
- (lim_pages & 0xffff);
- *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
-
- return new_kesp;
}
-#endif
-asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
+asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
{
}
-asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
+/*
+ * __math_state_restore assumes that cr0.TS is already clear and the
+ * fpu state is all ready for use. Used during context switch.
+ */
+void __math_state_restore(void)
{
+ struct thread_info *thread = current_thread_info();
+ struct task_struct *tsk = thread->task;
+
+ /*
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+ */
+ if (unlikely(restore_fpu_checking(tsk))) {
+ stts();
+ force_sig(SIGSEGV, tsk);
+ return;
+ }
+
+ thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
+ tsk->fpu_counter++;
}
/*
@@ -846,17 +847,8 @@ asmlinkage void math_state_restore(void)
}
clts(); /* Allow maths ops (or we recurse) */
- /*
- * Paranoid restore. send a SIGSEGV if we fail to restore the state.
- */
- if (unlikely(restore_fpu_checking(tsk))) {
- stts();
- force_sig(SIGSEGV, tsk);
- return;
- }
- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
- tsk->fpu_counter++;
+ __math_state_restore();
}
EXPORT_SYMBOL_GPL(math_state_restore);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index bfae139182ff..775a020990a5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -285,26 +285,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address,
tsk->thread.screen_bitmap |= 1 << bit;
}
-static void dump_pagetable(unsigned long address)
+static bool low_pfn(unsigned long pfn)
{
- __typeof__(pte_val(__pte(0))) page;
+ return pfn < max_low_pfn;
+}
- page = read_cr3();
- page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+static void dump_pagetable(unsigned long address)
+{
+ pgd_t *base = __va(read_cr3());
+ pgd_t *pgd = &base[pgd_index(address)];
+ pmd_t *pmd;
+ pte_t *pte;
#ifdef CONFIG_X86_PAE
- printk("*pdpt = %016Lx ", page);
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && page & _PAGE_PRESENT) {
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
- & (PTRS_PER_PMD - 1)];
- printk(KERN_CONT "*pde = %016Lx ", page);
- page &= ~_PAGE_NX;
- }
-#else
- printk("*pde = %08lx ", page);
+ printk("*pdpt = %016Lx ", pgd_val(*pgd));
+ if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+ goto out;
#endif
+ pmd = pmd_offset(pud_offset(pgd, address), address);
+ printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
/*
* We must not directly access the pte in the highpte
@@ -312,16 +311,12 @@ static void dump_pagetable(unsigned long address)
* And let's rather not kmap-atomic the pte, just in case
* it's allocated already:
*/
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && (page & _PAGE_PRESENT)
- && !(page & _PAGE_PSE)) {
-
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
- & (PTRS_PER_PTE - 1)];
- printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
- }
+ if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+ goto out;
+ pte = pte_offset_kernel(pmd, address);
+ printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+out:
printk("\n");
}
@@ -450,16 +445,12 @@ static int bad_address(void *p)
static void dump_pagetable(unsigned long address)
{
- pgd_t *pgd;
+ pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+ pgd_t *pgd = base + pgd_index(address);
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- pgd = (pgd_t *)read_cr3();
-
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-
- pgd += pgd_index(address);
if (bad_address(pgd))
goto bad;
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 2112ed55e7ea..1617958a3805 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -24,7 +24,7 @@ void kunmap(struct page *page)
* no global lock is needed and because the kmap code must perform a global TLB
* invalidation when the kmap pool wraps.
*
- * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * However when holding an atomic kmap it is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 89b9a5cd63da..cb88b1a0bd5f 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -1,11 +1,14 @@
/**
* @file nmi_int.c
*
- * @remark Copyright 2002-2008 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
* @author Robert Richter <robert.richter@amd.com>
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
*/
#include <linux/init.h>
@@ -24,13 +27,35 @@
#include "op_counter.h"
#include "op_x86_model.h"
-static struct op_x86_model_spec const *model;
+static struct op_x86_model_spec *model;
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+/* common functions */
+
+u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+ struct op_counter_config *counter_config)
+{
+ u64 val = 0;
+ u16 event = (u16)counter_config->event;
+
+ val |= ARCH_PERFMON_EVENTSEL_INT;
+ val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
+ val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
+ val |= (counter_config->unit_mask & 0xFF) << 8;
+ event &= model->event_mask ? model->event_mask : 0xFF;
+ val |= event & 0xFF;
+ val |= (event & 0x0F00) << 24;
+
+ return val;
+}
+
+
static int profile_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
@@ -52,36 +77,214 @@ static int profile_exceptions_notify(struct notifier_block *self,
static void nmi_cpu_save_registers(struct op_msrs *msrs)
{
- unsigned int const nr_ctrs = model->num_counters;
- unsigned int const nr_ctrls = model->num_controls;
struct op_msr *counters = msrs->counters;
struct op_msr *controls = msrs->controls;
unsigned int i;
- for (i = 0; i < nr_ctrs; ++i) {
- if (counters[i].addr) {
- rdmsr(counters[i].addr,
- counters[i].saved.low,
- counters[i].saved.high);
- }
+ for (i = 0; i < model->num_counters; ++i) {
+ if (counters[i].addr)
+ rdmsrl(counters[i].addr, counters[i].saved);
+ }
+
+ for (i = 0; i < model->num_controls; ++i) {
+ if (controls[i].addr)
+ rdmsrl(controls[i].addr, controls[i].saved);
+ }
+}
+
+static void nmi_cpu_start(void *dummy)
+{
+ struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
+ model->start(msrs);
+}
+
+static int nmi_start(void)
+{
+ on_each_cpu(nmi_cpu_start, NULL, 1);
+ return 0;
+}
+
+static void nmi_cpu_stop(void *dummy)
+{
+ struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
+ model->stop(msrs);
+}
+
+static void nmi_stop(void)
+{
+ on_each_cpu(nmi_cpu_stop, NULL, 1);
+}
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static DEFINE_PER_CPU(int, switch_index);
+
+static inline int has_mux(void)
+{
+ return !!model->switch_ctrl;
+}
+
+inline int op_x86_phys_to_virt(int phys)
+{
+ return __get_cpu_var(switch_index) + phys;
+}
+
+inline int op_x86_virt_to_phys(int virt)
+{
+ return virt % model->num_counters;
+}
+
+static void nmi_shutdown_mux(void)
+{
+ int i;
+
+ if (!has_mux())
+ return;
+
+ for_each_possible_cpu(i) {
+ kfree(per_cpu(cpu_msrs, i).multiplex);
+ per_cpu(cpu_msrs, i).multiplex = NULL;
+ per_cpu(switch_index, i) = 0;
}
+}
+
+static int nmi_setup_mux(void)
+{
+ size_t multiplex_size =
+ sizeof(struct op_msr) * model->num_virt_counters;
+ int i;
+
+ if (!has_mux())
+ return 1;
+
+ for_each_possible_cpu(i) {
+ per_cpu(cpu_msrs, i).multiplex =
+ kmalloc(multiplex_size, GFP_KERNEL);
+ if (!per_cpu(cpu_msrs, i).multiplex)
+ return 0;
+ }
+
+ return 1;
+}
+
+static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
+{
+ int i;
+ struct op_msr *multiplex = msrs->multiplex;
+
+ if (!has_mux())
+ return;
- for (i = 0; i < nr_ctrls; ++i) {
- if (controls[i].addr) {
- rdmsr(controls[i].addr,
- controls[i].saved.low,
- controls[i].saved.high);
+ for (i = 0; i < model->num_virt_counters; ++i) {
+ if (counter_config[i].enabled) {
+ multiplex[i].saved = -(u64)counter_config[i].count;
+ } else {
+ multiplex[i].addr = 0;
+ multiplex[i].saved = 0;
}
}
+
+ per_cpu(switch_index, cpu) = 0;
+}
+
+static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
+{
+ struct op_msr *multiplex = msrs->multiplex;
+ int i;
+
+ for (i = 0; i < model->num_counters; ++i) {
+ int virt = op_x86_phys_to_virt(i);
+ if (multiplex[virt].addr)
+ rdmsrl(multiplex[virt].addr, multiplex[virt].saved);
+ }
+}
+
+static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
+{
+ struct op_msr *multiplex = msrs->multiplex;
+ int i;
+
+ for (i = 0; i < model->num_counters; ++i) {
+ int virt = op_x86_phys_to_virt(i);
+ if (multiplex[virt].addr)
+ wrmsrl(multiplex[virt].addr, multiplex[virt].saved);
+ }
}
-static void nmi_save_registers(void *dummy)
+static void nmi_cpu_switch(void *dummy)
{
int cpu = smp_processor_id();
+ int si = per_cpu(switch_index, cpu);
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
- nmi_cpu_save_registers(msrs);
+
+ nmi_cpu_stop(NULL);
+ nmi_cpu_save_mpx_registers(msrs);
+
+ /* move to next set */
+ si += model->num_counters;
+ if ((si > model->num_virt_counters) || (counter_config[si].count == 0))
+ per_cpu(switch_index, cpu) = 0;
+ else
+ per_cpu(switch_index, cpu) = si;
+
+ model->switch_ctrl(model, msrs);
+ nmi_cpu_restore_mpx_registers(msrs);
+
+ nmi_cpu_start(NULL);
+}
+
+
+/*
+ * Quick check to see if multiplexing is necessary.
+ * The check should be sufficient since counters are used
+ * in ordre.
+ */
+static int nmi_multiplex_on(void)
+{
+ return counter_config[model->num_counters].count ? 0 : -EINVAL;
+}
+
+static int nmi_switch_event(void)
+{
+ if (!has_mux())
+ return -ENOSYS; /* not implemented */
+ if (nmi_multiplex_on() < 0)
+ return -EINVAL; /* not necessary */
+
+ on_each_cpu(nmi_cpu_switch, NULL, 1);
+
+ return 0;
+}
+
+static inline void mux_init(struct oprofile_operations *ops)
+{
+ if (has_mux())
+ ops->switch_events = nmi_switch_event;
+}
+
+static void mux_clone(int cpu)
+{
+ if (!has_mux())
+ return;
+
+ memcpy(per_cpu(cpu_msrs, cpu).multiplex,
+ per_cpu(cpu_msrs, 0).multiplex,
+ sizeof(struct op_msr) * model->num_virt_counters);
}
+#else
+
+inline int op_x86_phys_to_virt(int phys) { return phys; }
+inline int op_x86_virt_to_phys(int virt) { return virt; }
+static inline void nmi_shutdown_mux(void) { }
+static inline int nmi_setup_mux(void) { return 1; }
+static inline void
+nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
+static inline void mux_init(struct oprofile_operations *ops) { }
+static void mux_clone(int cpu) { }
+
+#endif
+
static void free_msrs(void)
{
int i;
@@ -95,38 +298,32 @@ static void free_msrs(void)
static int allocate_msrs(void)
{
- int success = 1;
size_t controls_size = sizeof(struct op_msr) * model->num_controls;
size_t counters_size = sizeof(struct op_msr) * model->num_counters;
int i;
for_each_possible_cpu(i) {
per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
- GFP_KERNEL);
- if (!per_cpu(cpu_msrs, i).counters) {
- success = 0;
- break;
- }
+ GFP_KERNEL);
+ if (!per_cpu(cpu_msrs, i).counters)
+ return 0;
per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
- GFP_KERNEL);
- if (!per_cpu(cpu_msrs, i).controls) {
- success = 0;
- break;
- }
+ GFP_KERNEL);
+ if (!per_cpu(cpu_msrs, i).controls)
+ return 0;
}
- if (!success)
- free_msrs();
-
- return success;
+ return 1;
}
static void nmi_cpu_setup(void *dummy)
{
int cpu = smp_processor_id();
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
+ nmi_cpu_save_registers(msrs);
spin_lock(&oprofilefs_lock);
- model->setup_ctrs(msrs);
+ model->setup_ctrs(model, msrs);
+ nmi_cpu_setup_mux(cpu, msrs);
spin_unlock(&oprofilefs_lock);
per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
apic_write(APIC_LVTPC, APIC_DM_NMI);
@@ -144,11 +341,15 @@ static int nmi_setup(void)
int cpu;
if (!allocate_msrs())
- return -ENOMEM;
+ err = -ENOMEM;
+ else if (!nmi_setup_mux())
+ err = -ENOMEM;
+ else
+ err = register_die_notifier(&profile_exceptions_nb);
- err = register_die_notifier(&profile_exceptions_nb);
if (err) {
free_msrs();
+ nmi_shutdown_mux();
return err;
}
@@ -159,45 +360,38 @@ static int nmi_setup(void)
/* Assume saved/restored counters are the same on all CPUs */
model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
for_each_possible_cpu(cpu) {
- if (cpu != 0) {
- memcpy(per_cpu(cpu_msrs, cpu).counters,
- per_cpu(cpu_msrs, 0).counters,
- sizeof(struct op_msr) * model->num_counters);
-
- memcpy(per_cpu(cpu_msrs, cpu).controls,
- per_cpu(cpu_msrs, 0).controls,
- sizeof(struct op_msr) * model->num_controls);
- }
+ if (!cpu)
+ continue;
+
+ memcpy(per_cpu(cpu_msrs, cpu).counters,
+ per_cpu(cpu_msrs, 0).counters,
+ sizeof(struct op_msr) * model->num_counters);
+
+ memcpy(per_cpu(cpu_msrs, cpu).controls,
+ per_cpu(cpu_msrs, 0).controls,
+ sizeof(struct op_msr) * model->num_controls);
+ mux_clone(cpu);
}
- on_each_cpu(nmi_save_registers, NULL, 1);
on_each_cpu(nmi_cpu_setup, NULL, 1);
nmi_enabled = 1;
return 0;
}
-static void nmi_restore_registers(struct op_msrs *msrs)
+static void nmi_cpu_restore_registers(struct op_msrs *msrs)
{
- unsigned int const nr_ctrs = model->num_counters;
- unsigned int const nr_ctrls = model->num_controls;
struct op_msr *counters = msrs->counters;
struct op_msr *controls = msrs->controls;
unsigned int i;
- for (i = 0; i < nr_ctrls; ++i) {
- if (controls[i].addr) {
- wrmsr(controls[i].addr,
- controls[i].saved.low,
- controls[i].saved.high);
- }
+ for (i = 0; i < model->num_controls; ++i) {
+ if (controls[i].addr)
+ wrmsrl(controls[i].addr, controls[i].saved);
}
- for (i = 0; i < nr_ctrs; ++i) {
- if (counters[i].addr) {
- wrmsr(counters[i].addr,
- counters[i].saved.low,
- counters[i].saved.high);
- }
+ for (i = 0; i < model->num_counters; ++i) {
+ if (counters[i].addr)
+ wrmsrl(counters[i].addr, counters[i].saved);
}
}
@@ -205,7 +399,7 @@ static void nmi_cpu_shutdown(void *dummy)
{
unsigned int v;
int cpu = smp_processor_id();
- struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
+ struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
/* restoring APIC_LVTPC can trigger an apic error because the delivery
* mode and vector nr combination can be illegal. That's by design: on
@@ -216,7 +410,7 @@ static void nmi_cpu_shutdown(void *dummy)
apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
apic_write(APIC_LVTERR, v);
- nmi_restore_registers(msrs);
+ nmi_cpu_restore_registers(msrs);
}
static void nmi_shutdown(void)
@@ -226,42 +420,18 @@ static void nmi_shutdown(void)
nmi_enabled = 0;
on_each_cpu(nmi_cpu_shutdown, NULL, 1);
unregister_die_notifier(&profile_exceptions_nb);
+ nmi_shutdown_mux();
msrs = &get_cpu_var(cpu_msrs);
model->shutdown(msrs);
free_msrs();
put_cpu_var(cpu_msrs);
}
-static void nmi_cpu_start(void *dummy)
-{
- struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
- model->start(msrs);
-}
-
-static int nmi_start(void)
-{
- on_each_cpu(nmi_cpu_start, NULL, 1);
- return 0;
-}
-
-static void nmi_cpu_stop(void *dummy)
-{
- struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
- model->stop(msrs);
-}
-
-static void nmi_stop(void)
-{
- on_each_cpu(nmi_cpu_stop, NULL, 1);
-}
-
-struct op_counter_config counter_config[OP_MAX_COUNTER];
-
static int nmi_create_files(struct super_block *sb, struct dentry *root)
{
unsigned int i;
- for (i = 0; i < model->num_counters; ++i) {
+ for (i = 0; i < model->num_virt_counters; ++i) {
struct dentry *dir;
char buf[4];
@@ -270,7 +440,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
* NOTE: assumes 1:1 mapping here (that counters are organized
* sequentially in their struct assignment).
*/
- if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
+ if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i)))
continue;
snprintf(buf, sizeof(buf), "%d", i);
@@ -402,6 +572,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
static int __init ppro_init(char **cpu_type)
{
__u8 cpu_model = boot_cpu_data.x86_model;
+ struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
if (force_arch_perfmon && cpu_has_arch_perfmon)
return 0;
@@ -428,7 +599,7 @@ static int __init ppro_init(char **cpu_type)
*cpu_type = "i386/core_2";
break;
case 26:
- arch_perfmon_setup_counters();
+ spec = &op_arch_perfmon_spec;
*cpu_type = "i386/core_i7";
break;
case 28:
@@ -439,17 +610,7 @@ static int __init ppro_init(char **cpu_type)
return 0;
}
- model = &op_ppro_spec;
- return 1;
-}
-
-static int __init arch_perfmon_init(char **cpu_type)
-{
- if (!cpu_has_arch_perfmon)
- return 0;
- *cpu_type = "i386/arch_perfmon";
- model = &op_arch_perfmon_spec;
- arch_perfmon_setup_counters();
+ model = spec;
return 1;
}
@@ -471,27 +632,26 @@ int __init op_nmi_init(struct oprofile_operations *ops)
/* Needs to be at least an Athlon (or hammer in 32bit mode) */
switch (family) {
- default:
- return -ENODEV;
case 6:
- model = &op_amd_spec;
cpu_type = "i386/athlon";
break;
case 0xf:
- model = &op_amd_spec;
- /* Actually it could be i386/hammer too, but give
- user space an consistent name. */
+ /*
+ * Actually it could be i386/hammer too, but
+ * give user space an consistent name.
+ */
cpu_type = "x86-64/hammer";
break;
case 0x10:
- model = &op_amd_spec;
cpu_type = "x86-64/family10";
break;
case 0x11:
- model = &op_amd_spec;
cpu_type = "x86-64/family11h";
break;
+ default:
+ return -ENODEV;
}
+ model = &op_amd_spec;
break;
case X86_VENDOR_INTEL:
@@ -510,8 +670,15 @@ int __init op_nmi_init(struct oprofile_operations *ops)
break;
}
- if (!cpu_type && !arch_perfmon_init(&cpu_type))
+ if (cpu_type)
+ break;
+
+ if (!cpu_has_arch_perfmon)
return -ENODEV;
+
+ /* use arch perfmon as fallback */
+ cpu_type = "i386/arch_perfmon";
+ model = &op_arch_perfmon_spec;
break;
default:
@@ -522,18 +689,23 @@ int __init op_nmi_init(struct oprofile_operations *ops)
register_cpu_notifier(&oprofile_cpu_nb);
#endif
/* default values, can be overwritten by model */
- ops->create_files = nmi_create_files;
- ops->setup = nmi_setup;
- ops->shutdown = nmi_shutdown;
- ops->start = nmi_start;
- ops->stop = nmi_stop;
- ops->cpu_type = cpu_type;
+ ops->create_files = nmi_create_files;
+ ops->setup = nmi_setup;
+ ops->shutdown = nmi_shutdown;
+ ops->start = nmi_start;
+ ops->stop = nmi_stop;
+ ops->cpu_type = cpu_type;
if (model->init)
ret = model->init(ops);
if (ret)
return ret;
+ if (!model->num_virt_counters)
+ model->num_virt_counters = model->num_counters;
+
+ mux_init(ops);
+
init_sysfs();
using_nmi = 1;
printk(KERN_INFO "oprofile: using NMI interrupt.\n");
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h
index 91b6a116165e..e28398df0df2 100644
--- a/arch/x86/oprofile/op_counter.h
+++ b/arch/x86/oprofile/op_counter.h
@@ -10,7 +10,7 @@
#ifndef OP_COUNTER_H
#define OP_COUNTER_H
-#define OP_MAX_COUNTER 8
+#define OP_MAX_COUNTER 32
/* Per-perfctr configuration as set via
* oprofilefs.
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 8fdf06e4edf9..39686c29f03a 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -9,12 +9,15 @@
* @author Philippe Elie
* @author Graydon Hoare
* @author Robert Richter <robert.richter@amd.com>
- * @author Barry Kasindorf
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
*/
#include <linux/oprofile.h>
#include <linux/device.h>
#include <linux/pci.h>
+#include <linux/percpu.h>
#include <asm/ptrace.h>
#include <asm/msr.h>
@@ -25,43 +28,36 @@
#define NUM_COUNTERS 4
#define NUM_CONTROLS 4
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+#define NUM_VIRT_COUNTERS 32
+#define NUM_VIRT_CONTROLS 32
+#else
+#define NUM_VIRT_COUNTERS NUM_COUNTERS
+#define NUM_VIRT_CONTROLS NUM_CONTROLS
+#endif
+
+#define OP_EVENT_MASK 0x0FFF
+#define OP_CTR_OVERFLOW (1ULL<<31)
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
-#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
-#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
-
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_CLEAR_LO(x) (x &= (1<<21))
-#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
-#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
-#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9))
-#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
-
-static unsigned long reset_value[NUM_COUNTERS];
+#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
+
+static unsigned long reset_value[NUM_VIRT_COUNTERS];
#ifdef CONFIG_OPROFILE_IBS
/* IbsFetchCtl bits/masks */
-#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */
-#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */
-#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */
+#define IBS_FETCH_RAND_EN (1ULL<<57)
+#define IBS_FETCH_VAL (1ULL<<49)
+#define IBS_FETCH_ENABLE (1ULL<<48)
+#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL
/*IbsOpCtl bits */
-#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */
-#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */
+#define IBS_OP_CNT_CTL (1ULL<<19)
+#define IBS_OP_VAL (1ULL<<18)
+#define IBS_OP_ENABLE (1ULL<<17)
-#define IBS_FETCH_SIZE 6
-#define IBS_OP_SIZE 12
+#define IBS_FETCH_SIZE 6
+#define IBS_OP_SIZE 12
static int has_ibs; /* AMD Family10h and later */
@@ -78,6 +74,45 @@ static struct op_ibs_config ibs_config;
#endif
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void op_mux_fill_in_addresses(struct op_msrs * const msrs)
+{
+ int i;
+
+ for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
+ int hw_counter = op_x86_virt_to_phys(i);
+ if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+ msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
+ else
+ msrs->multiplex[i].addr = 0;
+ }
+}
+
+static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ /* enable active counters */
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ int virt = op_x86_phys_to_virt(i);
+ if (!counter_config[virt].enabled)
+ continue;
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[virt]);
+ wrmsrl(msrs->controls[i].addr, val);
+ }
+}
+
+#else
+
+static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { }
+
+#endif
+
/* functions for op_amd_spec */
static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
@@ -97,150 +132,174 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
else
msrs->controls[i].addr = 0;
}
-}
+ op_mux_fill_in_addresses(msrs);
+}
-static void op_amd_setup_ctrs(struct op_msrs const * const msrs)
+static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;
+ /* setup reset_value */
+ for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
+ if (counter_config[i].enabled)
+ reset_value[i] = counter_config[i].count;
+ else
+ reset_value[i] = 0;
+ }
+
/* clear all counters */
- for (i = 0 ; i < NUM_CONTROLS; ++i) {
- if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+ for (i = 0; i < NUM_CONTROLS; ++i) {
+ if (unlikely(!msrs->controls[i].addr))
continue;
- CTRL_READ(low, high, msrs, i);
- CTRL_CLEAR_LO(low);
- CTRL_CLEAR_HI(high);
- CTRL_WRITE(low, high, msrs, i);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ wrmsrl(msrs->controls[i].addr, val);
}
/* avoid a false detection of ctr overflows in NMI handler */
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (unlikely(!CTR_IS_RESERVED(msrs, i)))
+ if (unlikely(!msrs->counters[i].addr))
continue;
- CTR_WRITE(1, msrs, i);
+ wrmsrl(msrs->counters[i].addr, -1LL);
}
/* enable active counters */
for (i = 0; i < NUM_COUNTERS; ++i) {
- if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
- reset_value[i] = counter_config[i].count;
+ int virt = op_x86_phys_to_virt(i);
+ if (!counter_config[virt].enabled)
+ continue;
+ if (!msrs->counters[i].addr)
+ continue;
- CTR_WRITE(counter_config[i].count, msrs, i);
-
- CTRL_READ(low, high, msrs, i);
- CTRL_CLEAR_LO(low);
- CTRL_CLEAR_HI(high);
- CTRL_SET_ENABLE(low);
- CTRL_SET_USR(low, counter_config[i].user);
- CTRL_SET_KERN(low, counter_config[i].kernel);
- CTRL_SET_UM(low, counter_config[i].unit_mask);
- CTRL_SET_EVENT_LOW(low, counter_config[i].event);
- CTRL_SET_EVENT_HIGH(high, counter_config[i].event);
- CTRL_SET_HOST_ONLY(high, 0);
- CTRL_SET_GUEST_ONLY(high, 0);
-
- CTRL_WRITE(low, high, msrs, i);
- } else {
- reset_value[i] = 0;
- }
+ /* setup counter registers */
+ wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
+
+ /* setup control registers */
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[virt]);
+ wrmsrl(msrs->controls[i].addr, val);
}
}
#ifdef CONFIG_OPROFILE_IBS
-static inline int
+static inline void
op_amd_handle_ibs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
- u32 low, high;
- u64 msr;
+ u64 val, ctl;
struct op_entry entry;
if (!has_ibs)
- return 1;
+ return;
if (ibs_config.fetch_enabled) {
- rdmsr(MSR_AMD64_IBSFETCHCTL, low, high);
- if (high & IBS_FETCH_HIGH_VALID_BIT) {
- rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr);
- oprofile_write_reserve(&entry, regs, msr,
+ rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
+ if (ctl & IBS_FETCH_VAL) {
+ rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
+ oprofile_write_reserve(&entry, regs, val,
IBS_FETCH_CODE, IBS_FETCH_SIZE);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- oprofile_add_data(&entry, low);
- oprofile_add_data(&entry, high);
- rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
+ oprofile_add_data64(&entry, val);
+ oprofile_add_data64(&entry, ctl);
+ rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
+ oprofile_add_data64(&entry, val);
oprofile_write_commit(&entry);
/* reenable the IRQ */
- high &= ~IBS_FETCH_HIGH_VALID_BIT;
- high |= IBS_FETCH_HIGH_ENABLE;
- low &= IBS_FETCH_LOW_MAX_CNT_MASK;
- wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK);
+ ctl |= IBS_FETCH_ENABLE;
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
}
}
if (ibs_config.op_enabled) {
- rdmsr(MSR_AMD64_IBSOPCTL, low, high);
- if (low & IBS_OP_LOW_VALID_BIT) {
- rdmsrl(MSR_AMD64_IBSOPRIP, msr);
- oprofile_write_reserve(&entry, regs, msr,
+ rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
+ if (ctl & IBS_OP_VAL) {
+ rdmsrl(MSR_AMD64_IBSOPRIP, val);
+ oprofile_write_reserve(&entry, regs, val,
IBS_OP_CODE, IBS_OP_SIZE);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSOPDATA, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSOPDATA2, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSOPDATA3, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSDCLINAD, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
- rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr);
- oprofile_add_data(&entry, (u32)msr);
- oprofile_add_data(&entry, (u32)(msr >> 32));
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSOPDATA, val);
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSOPDATA2, val);
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSOPDATA3, val);
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSDCLINAD, val);
+ oprofile_add_data64(&entry, val);
+ rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
+ oprofile_add_data64(&entry, val);
oprofile_write_commit(&entry);
/* reenable the IRQ */
- high = 0;
- low &= ~IBS_OP_LOW_VALID_BIT;
- low |= IBS_OP_LOW_ENABLE;
- wrmsr(MSR_AMD64_IBSOPCTL, low, high);
+ ctl &= ~IBS_OP_VAL & 0xFFFFFFFF;
+ ctl |= IBS_OP_ENABLE;
+ wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
}
}
+}
- return 1;
+static inline void op_amd_start_ibs(void)
+{
+ u64 val;
+ if (has_ibs && ibs_config.fetch_enabled) {
+ val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
+ val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
+ val |= IBS_FETCH_ENABLE;
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
+ }
+
+ if (has_ibs && ibs_config.op_enabled) {
+ val = (ibs_config.max_cnt_op >> 4) & 0xFFFF;
+ val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
+ val |= IBS_OP_ENABLE;
+ wrmsrl(MSR_AMD64_IBSOPCTL, val);
+ }
+}
+
+static void op_amd_stop_ibs(void)
+{
+ if (has_ibs && ibs_config.fetch_enabled)
+ /* clear max count and enable */
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
+
+ if (has_ibs && ibs_config.op_enabled)
+ /* clear max count and enable */
+ wrmsrl(MSR_AMD64_IBSOPCTL, 0);
}
+#else
+
+static inline void op_amd_handle_ibs(struct pt_regs * const regs,
+ struct op_msrs const * const msrs) { }
+static inline void op_amd_start_ibs(void) { }
+static inline void op_amd_stop_ibs(void) { }
+
#endif
static int op_amd_check_ctrs(struct pt_regs * const regs,
struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;
- for (i = 0 ; i < NUM_COUNTERS; ++i) {
- if (!reset_value[i])
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ int virt = op_x86_phys_to_virt(i);
+ if (!reset_value[virt])
continue;
- CTR_READ(low, high, msrs, i);
- if (CTR_OVERFLOWED(low)) {
- oprofile_add_sample(regs, i);
- CTR_WRITE(reset_value[i], msrs, i);
- }
+ rdmsrl(msrs->counters[i].addr, val);
+ /* bit is clear if overflowed: */
+ if (val & OP_CTR_OVERFLOW)
+ continue;
+ oprofile_add_sample(regs, virt);
+ wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
}
-#ifdef CONFIG_OPROFILE_IBS
op_amd_handle_ibs(regs, msrs);
-#endif
/* See op_model_ppro.c */
return 1;
@@ -248,79 +307,50 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
static void op_amd_start(struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
- if (reset_value[i]) {
- CTRL_READ(low, high, msrs, i);
- CTRL_SET_ACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
- }
- }
-#ifdef CONFIG_OPROFILE_IBS
- if (has_ibs && ibs_config.fetch_enabled) {
- low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
- high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */
- + IBS_FETCH_HIGH_ENABLE;
- wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (!reset_value[op_x86_phys_to_virt(i)])
+ continue;
+ rdmsrl(msrs->controls[i].addr, val);
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
}
- if (has_ibs && ibs_config.op_enabled) {
- low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF)
- + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */
- + IBS_OP_LOW_ENABLE;
- high = 0;
- wrmsr(MSR_AMD64_IBSOPCTL, low, high);
- }
-#endif
+ op_amd_start_ibs();
}
-
static void op_amd_stop(struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;
/*
* Subtle: stop on all counters to avoid race with setting our
* pm callback
*/
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
- if (!reset_value[i])
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (!reset_value[op_x86_phys_to_virt(i)])
continue;
- CTRL_READ(low, high, msrs, i);
- CTRL_SET_INACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
- }
-
-#ifdef CONFIG_OPROFILE_IBS
- if (has_ibs && ibs_config.fetch_enabled) {
- /* clear max count and enable */
- low = 0;
- high = 0;
- wrmsr(MSR_AMD64_IBSFETCHCTL, low, high);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
}
- if (has_ibs && ibs_config.op_enabled) {
- /* clear max count and enable */
- low = 0;
- high = 0;
- wrmsr(MSR_AMD64_IBSOPCTL, low, high);
- }
-#endif
+ op_amd_stop_ibs();
}
static void op_amd_shutdown(struct op_msrs const * const msrs)
{
int i;
- for (i = 0 ; i < NUM_COUNTERS ; ++i) {
- if (CTR_IS_RESERVED(msrs, i))
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (msrs->counters[i].addr)
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
}
- for (i = 0 ; i < NUM_CONTROLS ; ++i) {
- if (CTRL_IS_RESERVED(msrs, i))
+ for (i = 0; i < NUM_CONTROLS; ++i) {
+ if (msrs->controls[i].addr)
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
}
@@ -490,15 +520,21 @@ static void op_amd_exit(void) {}
#endif /* CONFIG_OPROFILE_IBS */
-struct op_x86_model_spec const op_amd_spec = {
- .init = op_amd_init,
- .exit = op_amd_exit,
+struct op_x86_model_spec op_amd_spec = {
.num_counters = NUM_COUNTERS,
.num_controls = NUM_CONTROLS,
+ .num_virt_counters = NUM_VIRT_COUNTERS,
+ .reserved = MSR_AMD_EVENTSEL_RESERVED,
+ .event_mask = OP_EVENT_MASK,
+ .init = op_amd_init,
+ .exit = op_amd_exit,
.fill_in_addresses = &op_amd_fill_in_addresses,
.setup_ctrs = &op_amd_setup_ctrs,
.check_ctrs = &op_amd_check_ctrs,
.start = &op_amd_start,
.stop = &op_amd_stop,
- .shutdown = &op_amd_shutdown
+ .shutdown = &op_amd_shutdown,
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ .switch_ctrl = &op_mux_switch_ctrl,
+#endif
};
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 819b131fd752..ac6b354becdf 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -32,6 +32,8 @@
#define NUM_CCCRS_HT2 9
#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+#define OP_CTR_OVERFLOW (1ULL<<31)
+
static unsigned int num_counters = NUM_COUNTERS_NON_HT;
static unsigned int num_controls = NUM_CONTROLS_NON_HT;
@@ -350,8 +352,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
-#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
-#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0)
#define CCCR_RESERVED_BITS 0x38030FFF
#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
@@ -361,17 +361,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
-#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
-#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0)
#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0)
-#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0)
-#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
-
/* this assigns a "stagger" to the current CPU, which is used throughout
the code in this module as an extra array offset, to select the "even"
@@ -515,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
if (ev->bindings[i].virt_counter & counter_bit) {
/* modify ESCR */
- ESCR_READ(escr, high, ev, i);
+ rdmsr(ev->bindings[i].escr_address, escr, high);
ESCR_CLEAR(escr);
if (stag == 0) {
ESCR_SET_USR_0(escr, counter_config[ctr].user);
@@ -526,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
}
ESCR_SET_EVENT_SELECT(escr, ev->event_select);
ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
- ESCR_WRITE(escr, high, ev, i);
+ wrmsr(ev->bindings[i].escr_address, escr, high);
/* modify CCCR */
- CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+ rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+ cccr, high);
CCCR_CLEAR(cccr);
CCCR_SET_REQUIRED_BITS(cccr);
CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
@@ -537,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
CCCR_SET_PMI_OVF_0(cccr);
else
CCCR_SET_PMI_OVF_1(cccr);
- CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+ wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
+ cccr, high);
return;
}
}
@@ -548,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr)
}
-static void p4_setup_ctrs(struct op_msrs const * const msrs)
+static void p4_setup_ctrs(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
{
unsigned int i;
unsigned int low, high;
@@ -563,8 +558,8 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
}
/* clear the cccrs we will use */
- for (i = 0 ; i < num_counters ; i++) {
- if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+ for (i = 0; i < num_counters; i++) {
+ if (unlikely(!msrs->controls[i].addr))
continue;
rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_CLEAR(low);
@@ -574,17 +569,18 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
/* clear all escrs (including those outside our concern) */
for (i = num_counters; i < num_controls; i++) {
- if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+ if (unlikely(!msrs->controls[i].addr))
continue;
wrmsr(msrs->controls[i].addr, 0, 0);
}
/* setup all counters */
- for (i = 0 ; i < num_counters ; ++i) {
- if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) {
+ for (i = 0; i < num_counters; ++i) {
+ if (counter_config[i].enabled && msrs->controls[i].addr) {
reset_value[i] = counter_config[i].count;
pmc_setup_one_p4_counter(i);
- CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+ wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
+ -(u64)counter_config[i].count);
} else {
reset_value[i] = 0;
}
@@ -624,14 +620,16 @@ static int p4_check_ctrs(struct pt_regs * const regs,
real = VIRT_CTR(stag, i);
- CCCR_READ(low, high, real);
- CTR_READ(ctr, high, real);
- if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+ rdmsr(p4_counters[real].cccr_address, low, high);
+ rdmsr(p4_counters[real].counter_address, ctr, high);
+ if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
oprofile_add_sample(regs, i);
- CTR_WRITE(reset_value[i], real);
+ wrmsrl(p4_counters[real].counter_address,
+ -(u64)reset_value[i]);
CCCR_CLEAR_OVF(low);
- CCCR_WRITE(low, high, real);
- CTR_WRITE(reset_value[i], real);
+ wrmsr(p4_counters[real].cccr_address, low, high);
+ wrmsrl(p4_counters[real].counter_address,
+ -(u64)reset_value[i]);
}
}
@@ -653,9 +651,9 @@ static void p4_start(struct op_msrs const * const msrs)
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
- CCCR_READ(low, high, VIRT_CTR(stag, i));
+ rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_SET_ENABLE(low);
- CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
}
}
@@ -670,9 +668,9 @@ static void p4_stop(struct op_msrs const * const msrs)
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
- CCCR_READ(low, high, VIRT_CTR(stag, i));
+ rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
CCCR_SET_DISABLE(low);
- CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
}
}
@@ -680,8 +678,8 @@ static void p4_shutdown(struct op_msrs const * const msrs)
{
int i;
- for (i = 0 ; i < num_counters ; ++i) {
- if (CTR_IS_RESERVED(msrs, i))
+ for (i = 0; i < num_counters; ++i) {
+ if (msrs->counters[i].addr)
release_perfctr_nmi(msrs->counters[i].addr);
}
/*
@@ -689,15 +687,15 @@ static void p4_shutdown(struct op_msrs const * const msrs)
* conjunction with the counter registers (hence the starting offset).
* This saves a few bits.
*/
- for (i = num_counters ; i < num_controls ; ++i) {
- if (CTRL_IS_RESERVED(msrs, i))
+ for (i = num_counters; i < num_controls; ++i) {
+ if (msrs->controls[i].addr)
release_evntsel_nmi(msrs->controls[i].addr);
}
}
#ifdef CONFIG_SMP
-struct op_x86_model_spec const op_p4_ht2_spec = {
+struct op_x86_model_spec op_p4_ht2_spec = {
.num_counters = NUM_COUNTERS_HT2,
.num_controls = NUM_CONTROLS_HT2,
.fill_in_addresses = &p4_fill_in_addresses,
@@ -709,7 +707,7 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
};
#endif
-struct op_x86_model_spec const op_p4_spec = {
+struct op_x86_model_spec op_p4_spec = {
.num_counters = NUM_COUNTERS_NON_HT,
.num_controls = NUM_CONTROLS_NON_HT,
.fill_in_addresses = &p4_fill_in_addresses,
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 4da7230b3d17..4899215999de 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -10,6 +10,7 @@
* @author Philippe Elie
* @author Graydon Hoare
* @author Andi Kleen
+ * @author Robert Richter <robert.richter@amd.com>
*/
#include <linux/oprofile.h>
@@ -18,7 +19,6 @@
#include <asm/msr.h>
#include <asm/apic.h>
#include <asm/nmi.h>
-#include <asm/perf_counter.h>
#include "op_x86_model.h"
#include "op_counter.h"
@@ -26,20 +26,7 @@
static int num_counters = 2;
static int counter_width = 32;
-#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
-
-#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
-#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
-#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
-#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
-#define CTRL_CLEAR(x) (x &= (1<<21))
-#define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
-#define CTRL_SET_UM(val, m) (val |= (m << 8))
-#define CTRL_SET_EVENT(val, e) (val |= e)
+#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))
static u64 *reset_value;
@@ -63,9 +50,10 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
}
-static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;
if (!reset_value) {
@@ -93,36 +81,30 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
}
/* clear all counters */
- for (i = 0 ; i < num_counters; ++i) {
- if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
+ for (i = 0; i < num_counters; ++i) {
+ if (unlikely(!msrs->controls[i].addr))
continue;
- CTRL_READ(low, high, msrs, i);
- CTRL_CLEAR(low);
- CTRL_WRITE(low, high, msrs, i);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ wrmsrl(msrs->controls[i].addr, val);
}
/* avoid a false detection of ctr overflows in NMI handler */
for (i = 0; i < num_counters; ++i) {
- if (unlikely(!CTR_IS_RESERVED(msrs, i)))
+ if (unlikely(!msrs->counters[i].addr))
continue;
wrmsrl(msrs->counters[i].addr, -1LL);
}
/* enable active counters */
for (i = 0; i < num_counters; ++i) {
- if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
+ if (counter_config[i].enabled && msrs->counters[i].addr) {
reset_value[i] = counter_config[i].count;
-
wrmsrl(msrs->counters[i].addr, -reset_value[i]);
-
- CTRL_READ(low, high, msrs, i);
- CTRL_CLEAR(low);
- CTRL_SET_ENABLE(low);
- CTRL_SET_USR(low, counter_config[i].user);
- CTRL_SET_KERN(low, counter_config[i].kernel);
- CTRL_SET_UM(low, counter_config[i].unit_mask);
- CTRL_SET_EVENT(low, counter_config[i].event);
- CTRL_WRITE(low, high, msrs, i);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[i]);
+ wrmsrl(msrs->controls[i].addr, val);
} else {
reset_value[i] = 0;
}
@@ -143,14 +125,14 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
if (unlikely(!reset_value))
goto out;
- for (i = 0 ; i < num_counters; ++i) {
+ for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
rdmsrl(msrs->counters[i].addr, val);
- if (CTR_OVERFLOWED(val)) {
- oprofile_add_sample(regs, i);
- wrmsrl(msrs->counters[i].addr, -reset_value[i]);
- }
+ if (val & (1ULL << (counter_width - 1)))
+ continue;
+ oprofile_add_sample(regs, i);
+ wrmsrl(msrs->counters[i].addr, -reset_value[i]);
}
out:
@@ -171,16 +153,16 @@ out:
static void ppro_start(struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;
if (!reset_value)
return;
for (i = 0; i < num_counters; ++i) {
if (reset_value[i]) {
- CTRL_READ(low, high, msrs, i);
- CTRL_SET_ACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
+ rdmsrl(msrs->controls[i].addr, val);
+ val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
}
}
}
@@ -188,7 +170,7 @@ static void ppro_start(struct op_msrs const * const msrs)
static void ppro_stop(struct op_msrs const * const msrs)
{
- unsigned int low, high;
+ u64 val;
int i;
if (!reset_value)
@@ -196,9 +178,9 @@ static void ppro_stop(struct op_msrs const * const msrs)
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
- CTRL_READ(low, high, msrs, i);
- CTRL_SET_INACTIVE(low);
- CTRL_WRITE(low, high, msrs, i);
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsrl(msrs->controls[i].addr, val);
}
}
@@ -206,12 +188,12 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
{
int i;
- for (i = 0 ; i < num_counters ; ++i) {
- if (CTR_IS_RESERVED(msrs, i))
+ for (i = 0; i < num_counters; ++i) {
+ if (msrs->counters[i].addr)
release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
}
- for (i = 0 ; i < num_counters ; ++i) {
- if (CTRL_IS_RESERVED(msrs, i))
+ for (i = 0; i < num_counters; ++i) {
+ if (msrs->controls[i].addr)
release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
}
if (reset_value) {
@@ -222,8 +204,9 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
struct op_x86_model_spec op_ppro_spec = {
- .num_counters = 2, /* can be overriden */
- .num_controls = 2, /* dito */
+ .num_counters = 2,
+ .num_controls = 2,
+ .reserved = MSR_PPRO_EVENTSEL_RESERVED,
.fill_in_addresses = &ppro_fill_in_addresses,
.setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs,
@@ -241,7 +224,7 @@ struct op_x86_model_spec op_ppro_spec = {
* the specific CPU.
*/
-void arch_perfmon_setup_counters(void)
+static void arch_perfmon_setup_counters(void)
{
union cpuid10_eax eax;
@@ -259,11 +242,17 @@ void arch_perfmon_setup_counters(void)
op_arch_perfmon_spec.num_counters = num_counters;
op_arch_perfmon_spec.num_controls = num_counters;
- op_ppro_spec.num_counters = num_counters;
- op_ppro_spec.num_controls = num_counters;
+}
+
+static int arch_perfmon_init(struct oprofile_operations *ignore)
+{
+ arch_perfmon_setup_counters();
+ return 0;
}
struct op_x86_model_spec op_arch_perfmon_spec = {
+ .reserved = MSR_PPRO_EVENTSEL_RESERVED,
+ .init = &arch_perfmon_init,
/* num_counters/num_controls filled in at runtime */
.fill_in_addresses = &ppro_fill_in_addresses,
/* user space does the cpuid check for available events */
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 825e79064d64..b83776180c7f 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -6,51 +6,66 @@
* @remark Read the file COPYING
*
* @author Graydon Hoare
+ * @author Robert Richter <robert.richter@amd.com>
*/
#ifndef OP_X86_MODEL_H
#define OP_X86_MODEL_H
-struct op_saved_msr {
- unsigned int high;
- unsigned int low;
-};
+#include <asm/types.h>
+#include <asm/perf_counter.h>
struct op_msr {
- unsigned long addr;
- struct op_saved_msr saved;
+ unsigned long addr;
+ u64 saved;
};
struct op_msrs {
struct op_msr *counters;
struct op_msr *controls;
+ struct op_msr *multiplex;
};
struct pt_regs;
+struct oprofile_operations;
+
/* The model vtable abstracts the differences between
* various x86 CPU models' perfctr support.
*/
struct op_x86_model_spec {
- int (*init)(struct oprofile_operations *ops);
- void (*exit)(void);
- unsigned int num_counters;
- unsigned int num_controls;
- void (*fill_in_addresses)(struct op_msrs * const msrs);
- void (*setup_ctrs)(struct op_msrs const * const msrs);
- int (*check_ctrs)(struct pt_regs * const regs,
- struct op_msrs const * const msrs);
- void (*start)(struct op_msrs const * const msrs);
- void (*stop)(struct op_msrs const * const msrs);
- void (*shutdown)(struct op_msrs const * const msrs);
+ unsigned int num_counters;
+ unsigned int num_controls;
+ unsigned int num_virt_counters;
+ u64 reserved;
+ u16 event_mask;
+ int (*init)(struct oprofile_operations *ops);
+ void (*exit)(void);
+ void (*fill_in_addresses)(struct op_msrs * const msrs);
+ void (*setup_ctrs)(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs);
+ int (*check_ctrs)(struct pt_regs * const regs,
+ struct op_msrs const * const msrs);
+ void (*start)(struct op_msrs const * const msrs);
+ void (*stop)(struct op_msrs const * const msrs);
+ void (*shutdown)(struct op_msrs const * const msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ void (*switch_ctrl)(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs);
+#endif
};
+struct op_counter_config;
+
+extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
+ struct op_counter_config *counter_config);
+extern int op_x86_phys_to_virt(int phys);
+extern int op_x86_virt_to_phys(int virt);
+
extern struct op_x86_model_spec op_ppro_spec;
-extern struct op_x86_model_spec const op_p4_spec;
-extern struct op_x86_model_spec const op_p4_ht2_spec;
-extern struct op_x86_model_spec const op_amd_spec;
+extern struct op_x86_model_spec op_p4_spec;
+extern struct op_x86_model_spec op_p4_ht2_spec;
+extern struct op_x86_model_spec op_amd_spec;
extern struct op_x86_model_spec op_arch_perfmon_spec;
-extern void arch_perfmon_setup_counters(void);
-
#endif /* OP_X86_MODEL_H */