summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig83
-rw-r--r--arch/x86/Kconfig.debug24
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/boot/video-vga.c4
-rw-r--r--arch/x86/boot/video.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c68
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/bigsmp/apic.h2
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--arch/x86/include/asm/byteorder.h74
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/ds.h6
-rw-r--r--arch/x86/include/asm/dwarf2.h97
-rw-r--r--arch/x86/include/asm/elf.h2
-rw-r--r--arch/x86/include/asm/emergency-restart.h4
-rw-r--r--arch/x86/include/asm/es7000/apic.h79
-rw-r--r--arch/x86/include/asm/es7000/wakecpu.h41
-rw-r--r--arch/x86/include/asm/ftrace.h34
-rw-r--r--arch/x86/include/asm/genapic_32.h19
-rw-r--r--arch/x86/include/asm/genapic_64.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h4
-rw-r--r--arch/x86/include/asm/hypervisor.h26
-rw-r--r--arch/x86/include/asm/io_apic.h10
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/irq_regs_32.h2
-rw-r--r--arch/x86/include/asm/kexec.h31
-rw-r--r--arch/x86/include/asm/linkage.h60
-rw-r--r--arch/x86/include/asm/mach-default/mach_apic.h2
-rw-r--r--arch/x86/include/asm/mach-default/mach_wakecpu.h24
-rw-r--r--arch/x86/include/asm/mach-default/smpboot_hooks.h8
-rw-r--r--arch/x86/include/asm/mach-generic/mach_apic.h1
-rw-r--r--arch/x86/include/asm/mach-generic/mach_wakecpu.h12
-rw-r--r--arch/x86/include/asm/msr.h8
-rw-r--r--arch/x86/include/asm/numaq/wakecpu.h24
-rw-r--r--arch/x86/include/asm/pci.h2
-rw-r--r--arch/x86/include/asm/pci_64.h14
-rw-r--r--arch/x86/include/asm/percpu.h83
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/reboot.h5
-rw-r--r--arch/x86/include/asm/setup.h7
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/tsc.h8
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h8
-rw-r--r--arch/x86/include/asm/uaccess_64.h6
-rw-r--r--arch/x86/include/asm/uv/bios.h34
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h87
-rw-r--r--arch/x86/include/asm/vmware.h27
-rw-r--r--arch/x86/kernel/Makefile13
-rw-r--r--arch/x86/kernel/apic.c123
-rw-r--r--arch/x86/kernel/bios_uv.c60
-rw-r--r--arch/x86/kernel/check.c161
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h17
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c58
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c346
-rw-r--r--arch/x86/kernel/cpu/vmware.c112
-rw-r--r--arch/x86/kernel/crash.c70
-rw-r--r--arch/x86/kernel/ds.c97
-rw-r--r--arch/x86/kernel/dumpstack.c319
-rw-r--r--arch/x86/kernel/dumpstack.h39
-rw-r--r--arch/x86/kernel/dumpstack_32.c302
-rw-r--r--arch/x86/kernel/dumpstack_64.c282
-rw-r--r--arch/x86/kernel/entry_32.S81
-rw-r--r--arch/x86/kernel/entry_64.S1126
-rw-r--r--arch/x86/kernel/es7000_32.c62
-rw-r--r--arch/x86/kernel/ftrace.c312
-rw-r--r--arch/x86/kernel/genapic_64.c4
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c108
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/io_apic.c48
-rw-r--r--arch/x86/kernel/irq_64.c24
-rw-r--r--arch/x86/kernel/irqinit_32.c2
-rw-r--r--arch/x86/kernel/irqinit_64.c66
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c104
-rw-r--r--arch/x86/kernel/nmi.c58
-rw-r--r--arch/x86/kernel/numaq_32.c10
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/ptrace.c9
-rw-r--r--arch/x86/kernel/reboot.c126
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S115
-rw-r--r--arch/x86/kernel/setup.c167
-rw-r--r--arch/x86/kernel/signal_32.c161
-rw-r--r--arch/x86/kernel/signal_64.c138
-rw-r--r--arch/x86/kernel/smp.c13
-rw-r--r--arch/x86/kernel/smpboot.c17
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/tsc.c42
-rw-r--r--arch/x86/kernel/tsc_sync.c8
-rw-r--r--arch/x86/kernel/vsyscall_64.c12
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/lguest/boot.c3
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mach-generic/bigsmp.c1
-rw-r--r--arch/x86/mach-generic/default.c1
-rw-r--r--arch/x86/mach-generic/es7000.c14
-rw-r--r--arch/x86/mach-generic/probe.c16
-rw-r--r--arch/x86/mach-generic/summit.c1
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/oprofile/op_model_ppro.c2
-rw-r--r--arch/x86/pci/common.c17
-rw-r--r--arch/x86/pci/direct.c4
-rw-r--r--arch/x86/pci/fixup.c25
-rw-r--r--arch/x86/pci/pci.h1
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--arch/x86/vdso/vdso32-setup.c2
-rw-r--r--arch/x86/vdso/vma.c2
-rw-r--r--arch/x86/xen/mmu.c21
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
124 files changed, 3752 insertions, 2398 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4b50d8172574..54e052c2562f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,11 +29,14 @@ config X86
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_RET_TRACER if X86_32
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
select HAVE_ARCH_TRACEHOOK
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select USER_STACKTRACE_SUPPORT
config ARCH_DEFCONFIG
string
@@ -242,21 +245,13 @@ config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER
-if ACPI
config X86_MPPARSE
- def_bool y
- bool "Enable MPS table"
+ bool "Enable MPS table" if ACPI
+ default y
depends on X86_LOCAL_APIC
help
For old smp systems that do not have proper acpi support. Newer systems
(esp with 64bit cpus) with acpi support, MADT and DSDT will override it
-endif
-
-if !ACPI
-config X86_MPPARSE
- def_bool y
- depends on X86_LOCAL_APIC
-endif
choice
prompt "Subarchitecture Type"
@@ -367,10 +362,10 @@ config X86_RDC321X
as R-8610-(G).
If you don't have one of these chips, you should say N here.
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
def_bool y
prompt "Single-depth WCHAN output"
- depends on X86_32
+ depends on X86
help
Calculate simpler /proc/<PID>/wchan values. If this option
is disabled then wchan values will recurse back to the
@@ -465,10 +460,6 @@ config X86_CYCLONE_TIMER
def_bool y
depends on X86_GENERICARCH
-config ES7000_CLUSTERED_APIC
- def_bool y
- depends on SMP && X86_ES7000 && MPENTIUMIII
-
source "arch/x86/Kconfig.cpu"
config HPET_TIMER
@@ -569,7 +560,7 @@ config AMD_IOMMU
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
- bool
+ def_bool y if X86_64
help
Support for software bounce buffers used on x86-64 systems
which don't have a hardware IOMMU (e.g. the current generation
@@ -660,6 +651,30 @@ config X86_VISWS_APIC
def_bool y
depends on X86_32 && X86_VISWS
+config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
+ bool "Reroute for broken boot IRQs"
+ default n
+ depends on X86_IO_APIC
+ help
+ This option enables a workaround that fixes a source of
+ spurious interrupts. This is recommended when threaded
+ interrupt handling is used on systems where the generation of
+ superfluous "boot interrupts" cannot be disabled.
+
+ Some chipsets generate a legacy INTx "boot IRQ" when the IRQ
+ entry in the chipset's IO-APIC is masked (as, e.g. the RT
+ kernel does during interrupt handling). On chipsets where this
+ boot IRQ generation cannot be disabled, this workaround keeps
+ the original IRQ line masked so that only the equivalent "boot
+ IRQ" is delivered to the CPUs. The workaround also tells the
+ kernel to set up the IRQ handler on the boot IRQ line. In this
+ way only one interrupt is delivered to the kernel. Otherwise
+ the spurious second interrupt may cause the kernel to bring
+ down (vital) interrupt lines.
+
+ Only affects "broken" chipsets. Interrupt sharing may be
+ increased on these systems.
+
config X86_MCE
bool "Machine Check Exception"
depends on !X86_VOYAGER
@@ -956,24 +971,37 @@ config X86_PAE
config ARCH_PHYS_ADDR_T_64BIT
def_bool X86_64 || X86_PAE
+config DIRECT_GBPAGES
+ bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
+ default y
+ depends on X86_64
+ help
+ Allow the kernel linear mapping to use 1GB pages on CPUs that
+ support it. This can improve the kernel's performance a tiny bit by
+ reducing TLB pressure. If in doubt, say "Y".
+
# Common NUMA Features
config NUMA
- bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
+ bool "Numa Memory Allocation and Scheduler Support"
depends on SMP
depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
default n if X86_PC
default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
help
Enable NUMA (Non Uniform Memory Access) support.
+
The kernel will try to allocate memory used by a CPU on the
local memory controller of the CPU and add some more
NUMA awareness to the kernel.
- For 32-bit this is currently highly experimental and should be only
- used for kernel development. It might also cause boot failures.
- For 64-bit this is recommended on all multiprocessor Opteron systems.
- If the system is EM64T, you should say N unless your system is
- EM64T NUMA.
+ For 64-bit this is recommended if the system is Intel Core i7
+ (or later), AMD Opteron, or EM64T NUMA.
+
+ For 32-bit this is only needed on (rare) 32-bit-only platforms
+ that support NUMA topologies, such as NUMAQ / Summit, or if you
+ boot a 32-bit kernel on a 64-bit NUMA platform.
+
+ Otherwise, you should say N.
comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
@@ -1072,6 +1100,11 @@ config ARCH_MEMORY_PROBE
def_bool X86_64
depends on MEMORY_HOTPLUG
+config ILLEGAL_POINTER_VALUE
+ hex
+ default 0 if X86_32
+ default 0xdead000000000000 if X86_64
+
source "mm/Kconfig"
config HIGHPTE
@@ -1490,6 +1523,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y
depends on X86_64 || (X86_32 && HIGHMEM)
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+ def_bool y
+ depends on MEMORY_HOTPLUG
+
config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool X86_64
depends on NUMA
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 95fe606cb9a3..28f111461ca8 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -114,18 +114,6 @@ config DEBUG_RODATA
data. This is recommended so that we can catch kernel bugs sooner.
If in doubt, say "Y".
-config DIRECT_GBPAGES
- bool "Enable gbpages-mapped kernel pagetables"
- depends on DEBUG_KERNEL && EXPERIMENTAL && X86_64
- help
- Enable gigabyte pages support (if the CPU supports it). This can
- improve the kernel's performance a tiny bit by reducing TLB
- pressure.
-
- This is experimental code.
-
- If in doubt, say "N".
-
config DEBUG_RODATA_TEST
bool "Testcase for the DEBUG_RODATA feature"
depends on DEBUG_RODATA
@@ -187,14 +175,10 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
-config MMIOTRACE_HOOKS
- bool
-
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on DEBUG_KERNEL && PCI
select TRACING
- select MMIOTRACE_HOOKS
help
Mmiotrace traces Memory Mapped I/O access and is meant for
debugging and reverse engineering. It is called from the ioremap
@@ -308,10 +292,10 @@ config OPTIMIZE_INLINING
developers have marked 'inline'. Doing so takes away freedom from gcc to
do what it thinks is best, which is desirable for the gcc 3.x series of
compilers. The gcc 4.x series have a rewritten inlining algorithm and
- disabling this option will generate a smaller kernel there. Hopefully
- this algorithm is so good that allowing gcc4 to make the decision can
- become the default in the future, until then this option is there to
- test gcc for this.
+ enabling this option will generate a smaller kernel there. Hopefully
+ this algorithm is so good that allowing gcc 4.x and above to make the
+ decision will become the default in the future. Until then this option
+ is there to test gcc for this.
If unsure, say N.
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 0be77b39328a..7e8e8b25f5f6 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -74,7 +74,7 @@ static int kbd_pending(void)
{
u8 pending;
asm volatile("int $0x16; setnz %0"
- : "=rm" (pending)
+ : "=qm" (pending)
: "a" (0x0100));
return pending;
}
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index b939cb476dec..5d4742ed4aa2 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -34,7 +34,7 @@ static struct mode_info cga_modes[] = {
{ VIDEO_80x25, 80, 25, 0 },
};
-__videocard video_vga;
+static __videocard video_vga;
/* Set basic 80x25 mode */
static u8 vga_set_basic_mode(void)
@@ -259,7 +259,7 @@ static int vga_probe(void)
return mode_count[adapter];
}
-__videocard video_vga = {
+static __videocard video_vga = {
.card_name = "VGA",
.probe = vga_probe,
.set_mode = vga_set_mode,
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 83598b23093a..3bef2c1febe9 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -226,7 +226,7 @@ static unsigned int mode_menu(void)
#ifdef CONFIG_VIDEO_RETAIN
/* Save screen content to the heap */
-struct saved_screen {
+static struct saved_screen {
int x, y;
int curx, cury;
u16 *data;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 4bc02b23674b..9ddf2fa0129d 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -197,23 +197,28 @@ struct rt_sigframe
/* fp state follows here */
};
-#define COPY(x) { \
- unsigned int reg; \
- err |= __get_user(reg, &sc->x); \
- regs->x = reg; \
+#define COPY(x) { \
+ err |= __get_user(regs->x, &sc->x); \
}
-#define RELOAD_SEG(seg,mask) \
- { unsigned int cur; \
- unsigned short pre; \
- err |= __get_user(pre, &sc->seg); \
- savesegment(seg, cur); \
- pre |= mask; \
- if (pre != cur) loadsegment(seg, pre); }
+#define COPY_SEG_CPL3(seg) { \
+ unsigned short tmp; \
+ err |= __get_user(tmp, &sc->seg); \
+ regs->seg = tmp | 3; \
+}
+
+#define RELOAD_SEG(seg) { \
+ unsigned int cur, pre; \
+ err |= __get_user(pre, &sc->seg); \
+ savesegment(seg, cur); \
+ pre |= 3; \
+ if (pre != cur) \
+ loadsegment(seg, pre); \
+}
static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_ia32 __user *sc,
- unsigned int *peax)
+ unsigned int *pax)
{
unsigned int tmpflags, gs, oldgs, err = 0;
void __user *buf;
@@ -240,18 +245,16 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
if (gs != oldgs)
load_gs_index(gs);
- RELOAD_SEG(fs, 3);
- RELOAD_SEG(ds, 3);
- RELOAD_SEG(es, 3);
+ RELOAD_SEG(fs);
+ RELOAD_SEG(ds);
+ RELOAD_SEG(es);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
/* Don't touch extended registers */
- err |= __get_user(regs->cs, &sc->cs);
- regs->cs |= 3;
- err |= __get_user(regs->ss, &sc->ss);
- regs->ss |= 3;
+ COPY_SEG_CPL3(cs);
+ COPY_SEG_CPL3(ss);
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -262,9 +265,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
buf = compat_ptr(tmp);
err |= restore_i387_xstate_ia32(buf);
- err |= __get_user(tmp, &sc->ax);
- *peax = tmp;
-
+ err |= __get_user(*pax, &sc->ax);
return err;
}
@@ -359,20 +360,15 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
err |= __put_user(regs->dx, &sc->dx);
err |= __put_user(regs->cx, &sc->cx);
err |= __put_user(regs->ax, &sc->ax);
- err |= __put_user(regs->cs, &sc->cs);
- err |= __put_user(regs->ss, &sc->ss);
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user(regs->ip, &sc->ip);
+ err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(regs->sp, &sc->sp_at_signal);
+ err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
- tmp = save_i387_xstate_ia32(fpstate);
- if (tmp < 0)
- err = -EFAULT;
- else
- err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
- &sc->fpstate);
+ err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate);
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
@@ -408,6 +404,8 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
if (used_math()) {
sp = sp - sig_xstate_ia32_size;
*fpstate = (struct _fpstate_ia32 *) sp;
+ if (save_i387_xstate_ia32(*fpstate) < 0)
+ return (void __user *) -1L;
}
sp -= frame_size;
@@ -430,12 +428,10 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
u16 poplmovl;
u32 val;
u16 int80;
- u16 pad;
} __attribute__((packed)) code = {
0xb858, /* popl %eax ; movl $...,%eax */
__NR_ia32_sigreturn,
0x80cd, /* int $0x80 */
- 0,
};
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
@@ -511,8 +507,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
u8 movl;
u32 val;
u16 int80;
- u16 pad;
- u8 pad2;
+ u8 pad;
} __attribute__((packed)) code = {
0xb8,
__NR_ia32_rt_sigreturn,
@@ -572,11 +567,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->dx = (unsigned long) &frame->info;
regs->cx = (unsigned long) &frame->uc;
- /* Make -mregparm=3 work */
- regs->ax = sig;
- regs->dx = (unsigned long) &frame->info;
- regs->cx = (unsigned long) &frame->uc;
-
loadsegment(ds, __USER32_DS);
loadsegment(es, __USER32_DS);
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3b1510b4fc57..25caa0738af5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -193,6 +193,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
static inline void init_apic_mappings(void) { }
+static inline void disable_local_APIC(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 1d9543b9d358..ce547f24a1cd 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void)
#define INT_DELIVERY_MODE (dest_Fixed)
#define INT_DEST_MODE (0) /* phys delivery to target proc */
#define NO_BALANCE_IRQ (0)
-#define WAKE_SECONDARY_VIA_INIT
-
static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
{
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 360010322711..9fa9dcdf344b 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -168,7 +168,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
*/
static inline void change_bit(int nr, volatile unsigned long *addr)
{
- asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr));
+ if (IS_IMMEDIATE(nr)) {
+ asm volatile(LOCK_PREFIX "xorb %1,%0"
+ : CONST_MASK_ADDR(nr, addr)
+ : "iq" ((u8)CONST_MASK(nr)));
+ } else {
+ asm volatile(LOCK_PREFIX "btc %1,%0"
+ : BITOP_ADDR(addr)
+ : "Ir" (nr));
+ }
}
/**
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h
index e02ae2d89acf..f110ad417df3 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/asm/byteorder.h
@@ -4,26 +4,33 @@
#include <asm/types.h>
#include <linux/compiler.h>
-#ifdef __GNUC__
+#define __LITTLE_ENDIAN
-#ifdef __i386__
-
-static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
+static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
{
-#ifdef CONFIG_X86_BSWAP
- asm("bswap %0" : "=r" (x) : "0" (x));
-#else
+#ifdef __i386__
+# ifdef CONFIG_X86_BSWAP
+ asm("bswap %0" : "=r" (val) : "0" (val));
+# else
asm("xchgb %b0,%h0\n\t" /* swap lower bytes */
"rorl $16,%0\n\t" /* swap words */
"xchgb %b0,%h0" /* swap higher bytes */
- : "=q" (x)
- : "0" (x));
+ : "=q" (val)
+ : "0" (val));
+# endif
+
+#else /* __i386__ */
+ asm("bswapl %0"
+ : "=r" (val)
+ : "0" (val));
#endif
- return x;
+ return val;
}
+#define __arch_swab32 __arch_swab32
-static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
+static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
{
+#ifdef __i386__
union {
struct {
__u32 a;
@@ -32,50 +39,27 @@ static inline __attribute_const__ __u64 ___arch__swab64(__u64 val)
__u64 u;
} v;
v.u = val;
-#ifdef CONFIG_X86_BSWAP
+# ifdef CONFIG_X86_BSWAP
asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
-#else
- v.s.a = ___arch__swab32(v.s.a);
- v.s.b = ___arch__swab32(v.s.b);
+# else
+ v.s.a = __arch_swab32(v.s.a);
+ v.s.b = __arch_swab32(v.s.b);
asm("xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
-#endif
+# endif
return v.u;
-}
-
#else /* __i386__ */
-
-static inline __attribute_const__ __u64 ___arch__swab64(__u64 x)
-{
asm("bswapq %0"
- : "=r" (x)
- : "0" (x));
- return x;
-}
-
-static inline __attribute_const__ __u32 ___arch__swab32(__u32 x)
-{
- asm("bswapl %0"
- : "=r" (x)
- : "0" (x));
- return x;
-}
-
+ : "=r" (val)
+ : "0" (val));
+ return val;
#endif
+}
+#define __arch_swab64 __arch_swab64
-/* Do not define swab16. Gcc is smart enough to recognize "C" version and
- convert it into rotation or exhange. */
-
-#define __arch__swab64(x) ___arch__swab64(x)
-#define __arch__swab32(x) ___arch__swab32(x)
-
-#define __BYTEORDER_HAS_U64__
-
-#endif /* __GNUC__ */
-
-#include <linux/byteorder/little_endian.h>
+#include <linux/byteorder.h>
#endif /* _ASM_X86_BYTEORDER_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index cfdf8c2c5c31..5bce8ed02b44 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -80,7 +80,6 @@
#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */
@@ -92,6 +91,7 @@
#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */
#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
@@ -117,6 +117,7 @@
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */
@@ -237,6 +238,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
+#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 72c5a190bf48..a95008457ea4 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -23,12 +23,13 @@
#ifndef _ASM_X86_DS_H
#define _ASM_X86_DS_H
-#ifdef CONFIG_X86_DS
#include <linux/types.h>
#include <linux/init.h>
+#ifdef CONFIG_X86_DS
+
struct task_struct;
/*
@@ -232,7 +233,8 @@ extern void ds_free(struct ds_context *context);
#else /* CONFIG_X86_DS */
-#define ds_init_intel(config) do {} while (0)
+struct cpuinfo_x86;
+static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 804b6e6be929..3afc5e87cfdd 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -6,56 +6,91 @@
#endif
/*
- Macros for dwarf2 CFI unwind table entries.
- See "as.info" for details on these pseudo ops. Unfortunately
- they are only supported in very new binutils, so define them
- away for older version.
+ * Macros for dwarf2 CFI unwind table entries.
+ * See "as.info" for details on these pseudo ops. Unfortunately
+ * they are only supported in very new binutils, so define them
+ * away for older version.
*/
#ifdef CONFIG_AS_CFI
-#define CFI_STARTPROC .cfi_startproc
-#define CFI_ENDPROC .cfi_endproc
-#define CFI_DEF_CFA .cfi_def_cfa
-#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
-#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
-#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
-#define CFI_OFFSET .cfi_offset
-#define CFI_REL_OFFSET .cfi_rel_offset
-#define CFI_REGISTER .cfi_register
-#define CFI_RESTORE .cfi_restore
-#define CFI_REMEMBER_STATE .cfi_remember_state
-#define CFI_RESTORE_STATE .cfi_restore_state
-#define CFI_UNDEFINED .cfi_undefined
+#define CFI_STARTPROC .cfi_startproc
+#define CFI_ENDPROC .cfi_endproc
+#define CFI_DEF_CFA .cfi_def_cfa
+#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
+#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
+#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
+#define CFI_OFFSET .cfi_offset
+#define CFI_REL_OFFSET .cfi_rel_offset
+#define CFI_REGISTER .cfi_register
+#define CFI_RESTORE .cfi_restore
+#define CFI_REMEMBER_STATE .cfi_remember_state
+#define CFI_RESTORE_STATE .cfi_restore_state
+#define CFI_UNDEFINED .cfi_undefined
#ifdef CONFIG_AS_CFI_SIGNAL_FRAME
-#define CFI_SIGNAL_FRAME .cfi_signal_frame
+#define CFI_SIGNAL_FRAME .cfi_signal_frame
#else
#define CFI_SIGNAL_FRAME
#endif
#else
-/* Due to the structure of pre-exisiting code, don't use assembler line
- comment character # to ignore the arguments. Instead, use a dummy macro. */
+/*
+ * Due to the structure of pre-exisiting code, don't use assembler line
+ * comment character # to ignore the arguments. Instead, use a dummy macro.
+ */
.macro cfi_ignore a=0, b=0, c=0, d=0
.endm
-#define CFI_STARTPROC cfi_ignore
-#define CFI_ENDPROC cfi_ignore
-#define CFI_DEF_CFA cfi_ignore
+#define CFI_STARTPROC cfi_ignore
+#define CFI_ENDPROC cfi_ignore
+#define CFI_DEF_CFA cfi_ignore
#define CFI_DEF_CFA_REGISTER cfi_ignore
#define CFI_DEF_CFA_OFFSET cfi_ignore
#define CFI_ADJUST_CFA_OFFSET cfi_ignore
-#define CFI_OFFSET cfi_ignore
-#define CFI_REL_OFFSET cfi_ignore
-#define CFI_REGISTER cfi_ignore
-#define CFI_RESTORE cfi_ignore
-#define CFI_REMEMBER_STATE cfi_ignore
-#define CFI_RESTORE_STATE cfi_ignore
-#define CFI_UNDEFINED cfi_ignore
-#define CFI_SIGNAL_FRAME cfi_ignore
+#define CFI_OFFSET cfi_ignore
+#define CFI_REL_OFFSET cfi_ignore
+#define CFI_REGISTER cfi_ignore
+#define CFI_RESTORE cfi_ignore
+#define CFI_REMEMBER_STATE cfi_ignore
+#define CFI_RESTORE_STATE cfi_ignore
+#define CFI_UNDEFINED cfi_ignore
+#define CFI_SIGNAL_FRAME cfi_ignore
#endif
+/*
+ * An attempt to make CFI annotations more or less
+ * correct and shorter. It is implied that you know
+ * what you're doing if you use them.
+ */
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_X86_64
+ .macro pushq_cfi reg
+ pushq \reg
+ CFI_ADJUST_CFA_OFFSET 8
+ .endm
+
+ .macro popq_cfi reg
+ popq \reg
+ CFI_ADJUST_CFA_OFFSET -8
+ .endm
+
+ .macro movq_cfi reg offset=0
+ movq %\reg, \offset(%rsp)
+ CFI_REL_OFFSET \reg, \offset
+ .endm
+
+ .macro movq_cfi_restore offset reg
+ movq \offset(%rsp), %\reg
+ CFI_RESTORE \reg
+ .endm
+#else /*!CONFIG_X86_64*/
+
+ /* 32bit defenitions are missed yet */
+
+#endif /*!CONFIG_X86_64*/
+#endif /*__ASSEMBLY__*/
+
#endif /* _ASM_X86_DWARF2_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 40ca1bea7916..f51a3ddde01a 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -325,7 +325,7 @@ struct linux_binprm;
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
- int executable_stack);
+ int uses_interp);
extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
#define compat_arch_setup_additional_pages syscall32_setup_pages
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index 94826cf87455..cc70c1c78ca4 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -8,7 +8,9 @@ enum reboot_type {
BOOT_BIOS = 'b',
#endif
BOOT_ACPI = 'a',
- BOOT_EFI = 'e'
+ BOOT_EFI = 'e',
+ BOOT_CF9 = 'p',
+ BOOT_CF9_COND = 'q',
};
extern enum reboot_type reboot_type;
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 380f0b4f17ed..e24ef876915f 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,31 +9,27 @@ static inline int apic_id_registered(void)
return (1);
}
-static inline cpumask_t target_cpus(void)
+static inline cpumask_t target_cpus_cluster(void)
{
-#if defined CONFIG_ES7000_CLUSTERED_APIC
return CPU_MASK_ALL;
-#else
+}
+
+static inline cpumask_t target_cpus(void)
+{
return cpumask_of_cpu(smp_processor_id());
-#endif
}
-#if defined CONFIG_ES7000_CLUSTERED_APIC
-#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-#define INT_DELIVERY_MODE (dest_LowestPrio)
-#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */
-#define NO_BALANCE_IRQ (1)
-#undef WAKE_SECONDARY_VIA_INIT
-#define WAKE_SECONDARY_VIA_MIP
-#else
+#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
+#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
+#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
+#define NO_BALANCE_IRQ_CLUSTER (1)
+
#define APIC_DFR_VALUE (APIC_DFR_FLAT)
#define INT_DELIVERY_MODE (dest_Fixed)
#define INT_DEST_MODE (0) /* phys delivery to target procs */
#define NO_BALANCE_IRQ (0)
#undef APIC_DEST_LOGICAL
#define APIC_DEST_LOGICAL 0x0
-#define WAKE_SECONDARY_VIA_INIT
-#endif
static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
{
@@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu)
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
+static inline void init_apic_ldr_cluster(void)
+{
+ unsigned long val;
+ int cpu = smp_processor_id();
+
+ apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
+ val = calculate_ldr(cpu);
+ apic_write(APIC_LDR, val);
+}
+
static inline void init_apic_ldr(void)
{
unsigned long val;
@@ -70,10 +76,6 @@ static inline void init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-#ifndef CONFIG_X86_GENERICARCH
-extern void enable_apic_mode(void);
-#endif
-
extern int apic_version [MAX_APICS];
static inline void setup_apic_routing(void)
{
@@ -144,7 +146,7 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
return (1);
}
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask)
{
int num_bits_set;
int cpus_found = 0;
@@ -154,11 +156,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
num_bits_set = cpus_weight(cpumask);
/* Return id to all */
if (num_bits_set == NR_CPUS)
-#if defined CONFIG_ES7000_CLUSTERED_APIC
return 0xFF;
-#else
- return cpu_to_logical_apicid(0);
-#endif
/*
* The cpus in the mask must all be on the apic cluster. If are not
* on the same apicid cluster return default value of TARGET_CPUS.
@@ -171,11 +169,40 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
if (apicid_cluster(apicid) !=
apicid_cluster(new_apicid)){
printk ("%s: Not a valid mask!\n", __func__);
-#if defined CONFIG_ES7000_CLUSTERED_APIC
return 0xFF;
-#else
+ }
+ apicid = new_apicid;
+ cpus_found++;
+ }
+ cpu++;
+ }
+ return apicid;
+}
+
+static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int num_bits_set;
+ int cpus_found = 0;
+ int cpu;
+ int apicid;
+
+ num_bits_set = cpus_weight(cpumask);
+ /* Return id to all */
+ if (num_bits_set == NR_CPUS)
+ return cpu_to_logical_apicid(0);
+ /*
+ * The cpus in the mask must all be on the apic cluster. If are not
+ * on the same apicid cluster return default value of TARGET_CPUS.
+ */
+ cpu = first_cpu(cpumask);
+ apicid = cpu_to_logical_apicid(cpu);
+ while (cpus_found < num_bits_set) {
+ if (cpu_isset(cpu, cpumask)) {
+ int new_apicid = cpu_to_logical_apicid(cpu);
+ if (apicid_cluster(apicid) !=
+ apicid_cluster(new_apicid)){
+ printk ("%s: Not a valid mask!\n", __func__);
return cpu_to_logical_apicid(0);
-#endif
}
apicid = new_apicid;
cpus_found++;
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
index 398493461913..78f0daaee436 100644
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ b/arch/x86/include/asm/es7000/wakecpu.h
@@ -1,36 +1,12 @@
#ifndef __ASM_ES7000_WAKECPU_H
#define __ASM_ES7000_WAKECPU_H
-/*
- * This file copes with machines that wakeup secondary CPUs by the
- * INIT, INIT, STARTUP sequence.
- */
-
-#ifdef CONFIG_ES7000_CLUSTERED_APIC
-#define WAKE_SECONDARY_VIA_MIP
-#else
-#define WAKE_SECONDARY_VIA_INIT
-#endif
-
-#ifdef WAKE_SECONDARY_VIA_MIP
-extern int es7000_start_cpu(int cpu, unsigned long eip);
-static inline int
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
-{
- int boot_error = 0;
- boot_error = es7000_start_cpu(phys_apicid, start_eip);
- return boot_error;
-}
-#endif
-
-#define TRAMPOLINE_LOW phys_to_virt(0x467)
-#define TRAMPOLINE_HIGH phys_to_virt(0x469)
-
-#define boot_cpu_apicid boot_cpu_physical_apicid
+#define TRAMPOLINE_PHYS_LOW 0x467
+#define TRAMPOLINE_PHYS_HIGH 0x469
static inline void wait_for_init_deassert(atomic_t *deassert)
{
-#ifdef WAKE_SECONDARY_VIA_INIT
+#ifndef CONFIG_ES7000_CLUSTERED_APIC
while (!atomic_read(deassert))
cpu_relax();
#endif
@@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
}
-#define inquire_remote_apic(apicid) do { \
- if (apic_verbosity >= APIC_DEBUG) \
- __inquire_remote_apic(apicid); \
- } while (0)
+extern void __inquire_remote_apic(int apicid);
+
+static inline void inquire_remote_apic(int apicid)
+{
+ if (apic_verbosity >= APIC_DEBUG)
+ __inquire_remote_apic(apicid);
+}
#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..754a3e082f94 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,8 +17,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
*/
return addr - 1;
}
-#endif
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+struct dyn_arch_ftrace {
+ /* No extra data needed for x86 */
+};
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
+#ifdef CONFIG_FUNCTION_RET_TRACER
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Stack of return addresses for functions
+ * of a thread.
+ * Used in struct thread_info
+ */
+struct ftrace_ret_stack {
+ unsigned long ret;
+ unsigned long func;
+ unsigned long long calltime;
+};
+
+/*
+ * Primary handler of a function return.
+ * It relays on ftrace_return_to_handler.
+ * Defined in entry32.S
+ */
+extern void return_to_handler(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_FUNCTION_RET_TRACER */
+
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 5cbd4fcc06fd..0ac17d33a8c7 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -2,6 +2,7 @@
#define _ASM_X86_GENAPIC_32_H
#include <asm/mpspec.h>
+#include <asm/atomic.h>
/*
* Generic APIC driver interface.
@@ -65,6 +66,14 @@ struct genapic {
void (*send_IPI_allbutself)(int vector);
void (*send_IPI_all)(int vector);
#endif
+ int (*wakeup_cpu)(int apicid, unsigned long start_eip);
+ int trampoline_phys_low;
+ int trampoline_phys_high;
+ void (*wait_for_init_deassert)(atomic_t *deassert);
+ void (*smp_callin_clear_local_apic)(void);
+ void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
+ void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
+ void (*inquire_remote_apic)(int apicid);
};
#define APICFUNC(x) .x = x,
@@ -105,16 +114,24 @@ struct genapic {
APICFUNC(get_apic_id) \
.apic_id_mask = APIC_ID_MASK, \
APICFUNC(cpu_mask_to_apicid) \
- APICFUNC(vector_allocation_domain) \
+ APICFUNC(vector_allocation_domain) \
APICFUNC(acpi_madt_oem_check) \
IPIFUNC(send_IPI_mask) \
IPIFUNC(send_IPI_allbutself) \
IPIFUNC(send_IPI_all) \
APICFUNC(enable_apic_mode) \
APICFUNC(phys_pkg_id) \
+ .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \
+ .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \
+ APICFUNC(wait_for_init_deassert) \
+ APICFUNC(smp_callin_clear_local_apic) \
+ APICFUNC(store_NMI_vector) \
+ APICFUNC(restore_NMI_vector) \
+ APICFUNC(inquire_remote_apic) \
}
extern struct genapic *genapic;
+extern void es7000_update_genapic_to_cluster(void);
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
#define get_uv_system_type() UV_NONE
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 13c4e96199ea..2cae011668b7 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -32,6 +32,8 @@ struct genapic {
unsigned int (*get_apic_id)(unsigned long x);
unsigned long (*set_apic_id)(unsigned int id);
unsigned long apic_id_mask;
+ /* wakeup_secondary_cpu */
+ int (*wakeup_cpu)(int apicid, unsigned long start_eip);
};
extern struct genapic *genapic;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index b97aecb0b61d..8de644b6b959 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
#endif
#endif
-#ifdef CONFIG_X86_32
-extern void (*const interrupt[NR_VECTORS])(void);
-#endif
+extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
typedef int vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
new file mode 100644
index 000000000000..369f5c5d09a1
--- /dev/null
+++ b/arch/x86/include/asm/hypervisor.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2008, VMware, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#ifndef ASM_X86__HYPERVISOR_H
+#define ASM_X86__HYPERVISOR_H
+
+extern unsigned long get_hypervisor_tsc_freq(void);
+extern void init_hypervisor(struct cpuinfo_x86 *c);
+
+#endif
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6afd9933a7dd..e475e009ae5d 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -156,11 +156,21 @@ extern int sis_apic_bug;
/* 1 if "noapic" boot option passed */
extern int skip_ioapic_setup;
+/* 1 if "noapic" boot option passed */
+extern int noioapicquirk;
+
+/* -1 if "noapic" boot option passed */
+extern int noioapicreroute;
+
/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
extern int timer_through_8259;
static inline void disable_ioapic_setup(void)
{
+#ifdef CONFIG_PCI
+ noioapicquirk = 1;
+ noioapicreroute = -1;
+#endif
skip_ioapic_setup = 1;
}
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda95486..28e409fc73f3 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -31,10 +31,6 @@ static inline int irq_canonicalize(int irq)
# endif
#endif
-#ifdef CONFIG_IRQBALANCE
-extern int irqbalance_disable(char *str);
-#endif
-
#ifdef CONFIG_HOTPLUG_CPU
#include <linux/cpumask.h>
extern void fixup_irqs(cpumask_t map);
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index af2f02d27fc7..86afd7473457 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -9,6 +9,8 @@
#include <asm/percpu.h>
+#define ARCH_HAS_OWN_IRQ_REGS
+
DECLARE_PER_CPU(struct pt_regs *, irq_regs);
static inline struct pt_regs *get_irq_regs(void)
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index a1f22771a15a..c61d8b2ab8b9 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -5,21 +5,8 @@
# define PA_CONTROL_PAGE 0
# define VA_CONTROL_PAGE 1
# define PA_PGD 2
-# define VA_PGD 3
-# define PA_PTE_0 4
-# define VA_PTE_0 5
-# define PA_PTE_1 6
-# define VA_PTE_1 7
-# define PA_SWAP_PAGE 8
-# ifdef CONFIG_X86_PAE
-# define PA_PMD_0 9
-# define VA_PMD_0 10
-# define PA_PMD_1 11
-# define VA_PMD_1 12
-# define PAGES_NR 13
-# else
-# define PAGES_NR 9
-# endif
+# define PA_SWAP_PAGE 3
+# define PAGES_NR 4
#else
# define PA_CONTROL_PAGE 0
# define VA_CONTROL_PAGE 1
@@ -170,6 +157,20 @@ relocate_kernel(unsigned long indirection_page,
unsigned long start_address) ATTRIB_NORET;
#endif
+#ifdef CONFIG_X86_32
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ pgd_t *pgd;
+#ifdef CONFIG_X86_PAE
+ pmd_t *pmd0;
+ pmd_t *pmd1;
+#endif
+ pte_t *pte0;
+ pte_t *pte1;
+};
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index f61ee8f937e4..5d98d0b68ffc 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -57,5 +57,65 @@
#define __ALIGN_STR ".align 16,0x90"
#endif
+/*
+ * to check ENTRY_X86/END_X86 and
+ * KPROBE_ENTRY_X86/KPROBE_END_X86
+ * unbalanced-missed-mixed appearance
+ */
+#define __set_entry_x86 .set ENTRY_X86_IN, 0
+#define __unset_entry_x86 .set ENTRY_X86_IN, 1
+#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
+#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
+
+#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
+
+#define __check_entry_x86 \
+ .ifdef ENTRY_X86_IN; \
+ .ifeq ENTRY_X86_IN; \
+ __macro_err_x86; \
+ .abort; \
+ .endif; \
+ .endif
+
+#define __check_kprobe_x86 \
+ .ifdef KPROBE_X86_IN; \
+ .ifeq KPROBE_X86_IN; \
+ __macro_err_x86; \
+ .abort; \
+ .endif; \
+ .endif
+
+#define __check_entry_kprobe_x86 \
+ __check_entry_x86; \
+ __check_kprobe_x86
+
+#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
+
+#define ENTRY_X86(name) \
+ __check_entry_kprobe_x86; \
+ __set_entry_x86; \
+ .globl name; \
+ __ALIGN; \
+ name:
+
+#define END_X86(name) \
+ __unset_entry_x86; \
+ __check_entry_kprobe_x86; \
+ .size name, .-name
+
+#define KPROBE_ENTRY_X86(name) \
+ __check_entry_kprobe_x86; \
+ __set_kprobe_x86; \
+ .pushsection .kprobes.text, "ax"; \
+ .globl name; \
+ __ALIGN; \
+ name:
+
+#define KPROBE_END_X86(name) \
+ __unset_kprobe_x86; \
+ __check_entry_kprobe_x86; \
+ .size name, .-name; \
+ .popsection
+
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index ff3a6c236c00..6cb3a467e067 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -32,11 +32,13 @@ static inline cpumask_t target_cpus(void)
#define vector_allocation_domain (genapic->vector_allocation_domain)
#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
#define send_IPI_self (genapic->send_IPI_self)
+#define wakeup_secondary_cpu (genapic->wakeup_cpu)
extern void setup_apic_routing(void);
#else
#define INT_DELIVERY_MODE dest_LowestPrio
#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
#define TARGET_CPUS (target_cpus())
+#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
/*
* Set up the logical destination ID.
*
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index 9d80db91e992..ceb013660146 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -1,17 +1,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
-/*
- * This file copes with machines that wakeup secondary CPUs by the
- * INIT, INIT, STARTUP sequence.
- */
-
-#define WAKE_SECONDARY_VIA_INIT
-
-#define TRAMPOLINE_LOW phys_to_virt(0x467)
-#define TRAMPOLINE_HIGH phys_to_virt(0x469)
-
-#define boot_cpu_apicid boot_cpu_physical_apicid
+#define TRAMPOLINE_PHYS_LOW (0x467)
+#define TRAMPOLINE_PHYS_HIGH (0x469)
static inline void wait_for_init_deassert(atomic_t *deassert)
{
@@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
}
-#define inquire_remote_apic(apicid) do { \
- if (apic_verbosity >= APIC_DEBUG) \
- __inquire_remote_apic(apicid); \
- } while (0)
+extern void __inquire_remote_apic(int apicid);
+
+static inline void inquire_remote_apic(int apicid)
+{
+ if (apic_verbosity >= APIC_DEBUG)
+ __inquire_remote_apic(apicid);
+}
#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h
index dbab36d64d48..23bf52103b89 100644
--- a/arch/x86/include/asm/mach-default/smpboot_hooks.h
+++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h
@@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
CMOS_WRITE(0xa, 0xf);
local_flush_tlb();
pr_debug("1.\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+ start_eip >> 4;
pr_debug("2.\n");
- *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+ start_eip & 0xf;
pr_debug("3.\n");
}
@@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*/
CMOS_WRITE(0, 0xf);
- *((volatile long *) phys_to_virt(0x467)) = 0;
+ *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index 5180bd7478fb..e430f47df667 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -27,6 +27,7 @@
#define vector_allocation_domain (genapic->vector_allocation_domain)
#define enable_apic_mode (genapic->enable_apic_mode)
#define phys_pkg_id (genapic->phys_pkg_id)
+#define wakeup_secondary_cpu (genapic->wakeup_cpu)
extern void generic_bigsmp_probe(void);
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
new file mode 100644
index 000000000000..1ab16b168c8a
--- /dev/null
+++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
+#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
+
+#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
+#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
+#define wait_for_init_deassert (genapic->wait_for_init_deassert)
+#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
+#define store_NMI_vector (genapic->store_NMI_vector)
+#define restore_NMI_vector (genapic->restore_NMI_vector)
+#define inquire_remote_apic (genapic->inquire_remote_apic)
+
+#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index c2a812ebde89..397efa375581 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -22,10 +22,10 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
}
/*
- * i386 calling convention returns 64-bit value in edx:eax, while
- * x86_64 returns at rax. Also, the "A" constraint does not really
- * mean rdx:rax in x86_64, so we need specialized behaviour for each
- * architecture
+ * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A"
+ * constraint has different meanings. For i386, "A" means exactly
+ * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead,
+ * it means rax *or* rdx.
*/
#ifdef CONFIG_X86_64
#define DECLARE_ARGS(val, low, high) unsigned low, high
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h
index c577bda5b1c5..6f499df8eddb 100644
--- a/arch/x86/include/asm/numaq/wakecpu.h
+++ b/arch/x86/include/asm/numaq/wakecpu.h
@@ -3,12 +3,8 @@
/* This file copes with machines that wakeup secondary CPUs by NMIs */
-#define WAKE_SECONDARY_VIA_NMI
-
-#define TRAMPOLINE_LOW phys_to_virt(0x8)
-#define TRAMPOLINE_HIGH phys_to_virt(0xa)
-
-#define boot_cpu_apicid boot_cpu_logical_apicid
+#define TRAMPOLINE_PHYS_LOW (0x8)
+#define TRAMPOLINE_PHYS_HIGH (0xa)
/* We don't do anything here because we use NMI's to boot instead */
static inline void wait_for_init_deassert(atomic_t *deassert)
@@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void)
static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
{
printk("Storing NMI vector\n");
- *high = *((volatile unsigned short *) TRAMPOLINE_HIGH);
- *low = *((volatile unsigned short *) TRAMPOLINE_LOW);
+ *high =
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
+ *low =
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
}
static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
printk("Restoring NMI vector\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high;
- *((volatile unsigned short *) TRAMPOLINE_LOW) = *low;
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+ *high;
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+ *low;
}
-#define inquire_remote_apic(apicid) {}
+static inline void inquire_remote_apic(int apicid)
+{
+}
#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 875b38edf193..647781298e7e 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -19,6 +19,8 @@ struct pci_sysdata {
};
extern int pci_routeirq;
+extern int noioapicquirk;
+extern int noioapicreroute;
/* scan a bus after allocating a pci_sysdata for it */
extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index 5b28995d664e..d02d936840a3 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void);
*/
#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
-
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
dma_addr_t ADDR_NAME;
#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
@@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void);
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
(((PTR)->LEN_NAME) = (VAL))
-#else
-/* No IOMMU */
-
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME) (0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME) (0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
-
-#endif
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ece72053ba63..313b3d692595 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -134,6 +134,11 @@ do { \
: "+m" (var) \
: "ri" ((T__)val)); \
break; \
+ case 8: \
+ asm(op "q %1,"__percpu_seg"%0" \
+ : "+m" (var) \
+ : "ri" ((T__)val)); \
+ break; \
default: __bad_percpu_size(); \
} \
} while (0)
@@ -157,16 +162,84 @@ do { \
: "=r" (ret__) \
: "m" (var)); \
break; \
+ case 8: \
+ asm(op "q "__percpu_seg"%1,%0" \
+ : "=r" (ret__) \
+ : "m" (var)); \
+ break; \
default: __bad_percpu_size(); \
} \
ret__; \
})
-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
-#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val)
-#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
-#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
-#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
+#define percpu_addr_op(op, var) \
+({ \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm(op "b "__percpu_seg"%0" \
+ : : "m"(var)); \
+ break; \
+ case 2: \
+ asm(op "w "__percpu_seg"%0" \
+ : : "m"(var)); \
+ break; \
+ case 4: \
+ asm(op "l "__percpu_seg"%0" \
+ : : "m"(var)); \
+ break; \
+ case 8: \
+ asm(op "q "__percpu_seg"%0" \
+ : : "m"(var)); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+})
+
+#define percpu_cmpxchg_op(var, old, new) \
+({ \
+ typeof(var) prev; \
+ switch (sizeof(var)) { \
+ case 1: \
+ asm("cmpxchgb %b1, "__percpu_seg"%2" \
+ : "=a"(prev) \
+ : "q"(new), "m"(var), "0"(old) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm("cmpxchgw %w1, "__percpu_seg"%2" \
+ : "=a"(prev) \
+ : "r"(new), "m"(var), "0"(old) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm("cmpxchgl %k1, "__percpu_seg"%2" \
+ : "=a"(prev) \
+ : "r"(new), "m"(var), "0"(old) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm("cmpxchgq %1, "__percpu_seg"%2" \
+ : "=a"(prev) \
+ : "r"(new), "m"(var), "0"(old) \
+ : "memory"); \
+ break; \
+ default: \
+ __bad_percpu_size(); \
+ } \
+ return prev; \
+})
+
+#define x86_read_percpu(var) percpu_from_op("mov", per_cpu_var(var))
+#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu_var(var), val)
+#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu_var(var), val)
+#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu_var(var), val)
+#define x86_inc_percpu(var) percpu_addr_op("inc", per_cpu_var(var))
+#define x86_dec_percpu(var) percpu_addr_op("dec", per_cpu_var(var))
+#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu_var(var), val)
+#define x86_xchg_percpu(var, val) percpu_to_op("xchg", per_cpu_var(var), val)
+#define x86_cmpxchg_percpu(var, old, new) \
+ percpu_cmpxchg_op(per_cpu_var(var), old, new)
+
#endif /* !__ASSEMBLY__ */
#endif /* !CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ca01e383269..a570eafa4755 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -110,6 +110,7 @@ struct cpuinfo_x86 {
/* Index into per_cpu list: */
u16 cpu_index;
#endif
+ unsigned int x86_hyper_vendor;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define X86_VENDOR_INTEL 0
@@ -123,6 +124,9 @@ struct cpuinfo_x86 {
#define X86_VENDOR_UNKNOWN 0xff
+#define X86_HYPER_VENDOR_NONE 0
+#define X86_HYPER_VENDOR_VMWARE 1
+
/*
* capabilities of CPUs
*/
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index d1531c8480b7..eefb0594b058 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
-#define __ARCH_WANT_COMPAT_SYS_PTRACE
-
#endif /* __KERNEL__ */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index df7710354f85..562d4fd31ba8 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_REBOOT_H
#define _ASM_X86_REBOOT_H
+#include <linux/kdebug.h>
+
struct pt_regs;
struct machine_ops {
@@ -18,4 +20,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs);
void native_machine_shutdown(void);
void machine_real_restart(const unsigned char *code, int length);
+typedef void (*nmi_shootdown_cb)(int, struct die_args*);
+void nmi_shootdown_cpus(nmi_shootdown_cb callback);
+
#endif /* _ASM_X86_REBOOT_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index f12d37237465..4fcd53fd5f43 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -8,6 +8,10 @@
/* Interrupt control for vSMPowered x86_64 systems */
void vsmp_init(void);
+
+void setup_bios_corruption_check(void);
+
+
#ifdef CONFIG_X86_VISWS
extern void visws_early_detect(void);
extern int is_visws_box(void);
@@ -16,6 +20,8 @@ static inline void visws_early_detect(void) { }
static inline int is_visws_box(void) { return 0; }
#endif
+extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
+extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
/*
* Any setup quirks to be performed?
*/
@@ -39,6 +45,7 @@ struct x86_quirks {
void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
unsigned short oemsize);
int (*setup_ioapic_ids)(void);
+ int (*update_genapic)(void);
};
extern struct x86_quirks *x86_quirks;
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index c1fe6c77a1a2..f2025b0c69d4 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -318,6 +318,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
void default_idle(void);
+void stop_this_cpu(void *dummy);
+
/*
* Force strict CPU ordering.
* And yes, this is required on UP too when we're talking
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad2..0921b4018c11 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,6 +20,8 @@
struct task_struct;
struct exec_domain;
#include <asm/processor.h>
+#include <asm/ftrace.h>
+#include <asm/atomic.h>
struct thread_info {
struct task_struct *task; /* main task structure */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 9cd83a8e40d5..38ae163cc91b 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -34,8 +34,6 @@ static inline cycles_t get_cycles(void)
static __always_inline cycles_t vget_cycles(void)
{
- cycles_t cycles;
-
/*
* We only do VDSOs on TSC capable CPUs, so this shouldnt
* access boot_cpu_data (which is not VDSO-safe):
@@ -44,11 +42,7 @@ static __always_inline cycles_t vget_cycles(void)
if (!cpu_has_tsc)
return 0;
#endif
- rdtsc_barrier();
- cycles = (cycles_t)__native_read_tsc();
- rdtsc_barrier();
-
- return cycles;
+ return (cycles_t)__native_read_tsc();
}
extern void tsc_init(void);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c54921b2e4..99192bb55a53 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -157,6 +157,7 @@ extern int __get_user_bad(void);
int __ret_gu; \
unsigned long __val_gu; \
__chk_user_ptr(ptr); \
+ might_fault(); \
switch (sizeof(*(ptr))) { \
case 1: \
__get_user_x(1, __ret_gu, __val_gu, ptr); \
@@ -241,6 +242,7 @@ extern void __put_user_8(void);
int __ret_pu; \
__typeof__(*(ptr)) __pu_val; \
__chk_user_ptr(ptr); \
+ might_fault(); \
__pu_val = x; \
switch (sizeof(*(ptr))) { \
case 1: \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index d095a3aeea1b..5e06259e90e5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
static __always_inline unsigned long __must_check
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
- might_sleep();
- return __copy_to_user_inatomic(to, from, n);
+ might_fault();
+ return __copy_to_user_inatomic(to, from, n);
}
static __always_inline unsigned long
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
static __always_inline unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
- might_sleep();
+ might_fault();
if (__builtin_constant_p(n)) {
unsigned long ret;
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
static __always_inline unsigned long __copy_from_user_nocache(void *to,
const void __user *from, unsigned long n)
{
- might_sleep();
+ might_fault();
if (__builtin_constant_p(n)) {
unsigned long ret;
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index f8cfd00db450..84210c479fca 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -29,6 +29,8 @@ static __always_inline __must_check
int __copy_from_user(void *dst, const void __user *src, unsigned size)
{
int ret = 0;
+
+ might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
@@ -71,6 +73,8 @@ static __always_inline __must_check
int __copy_to_user(void __user *dst, const void *src, unsigned size)
{
int ret = 0;
+
+ might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size);
switch (size) {
@@ -113,6 +117,8 @@ static __always_inline __must_check
int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
{
int ret = 0;
+
+ might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst,
(__force void *)src, size);
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index d931d3b7e6f7..da1c4e8e78fc 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -32,13 +32,18 @@
enum uv_bios_cmd {
UV_BIOS_COMMON,
UV_BIOS_GET_SN_INFO,
- UV_BIOS_FREQ_BASE
+ UV_BIOS_FREQ_BASE,
+ UV_BIOS_WATCHLIST_ALLOC,
+ UV_BIOS_WATCHLIST_FREE,
+ UV_BIOS_MEMPROTECT,
+ UV_BIOS_GET_PARTITION_ADDR
};
/*
* Status values returned from a BIOS call.
*/
enum {
+ BIOS_STATUS_MORE_PASSES = 1,
BIOS_STATUS_SUCCESS = 0,
BIOS_STATUS_UNIMPLEMENTED = -ENOSYS,
BIOS_STATUS_EINVAL = -EINVAL,
@@ -71,6 +76,21 @@ union partition_info_u {
};
};
+union uv_watchlist_u {
+ u64 val;
+ struct {
+ u64 blade : 16,
+ size : 32,
+ filler : 16;
+ };
+};
+
+enum uv_memprotect {
+ UV_MEMPROT_RESTRICT_ACCESS,
+ UV_MEMPROT_ALLOW_AMO,
+ UV_MEMPROT_ALLOW_RW
+};
+
/*
* bios calls have 6 parameters
*/
@@ -80,14 +100,20 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
extern s64 uv_bios_freq_base(u64, u64 *);
+extern int uv_bios_mq_watchlist_alloc(int, void *, unsigned int,
+ unsigned long *);
+extern int uv_bios_mq_watchlist_free(int, int);
+extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
+extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
extern void uv_bios_init(void);
+extern unsigned long sn_rtc_cycles_per_second;
extern int uv_type;
extern long sn_partition_id;
-extern long uv_coherency_id;
-extern long uv_region_size;
-#define partition_coherence_id() (uv_coherency_id)
+extern long sn_coherency_id;
+extern long sn_region_size;
+#define partition_coherence_id() (sn_coherency_id)
extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 7a5782610b2b..52aa943c634f 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -113,25 +113,37 @@
*/
#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2)
+struct uv_scir_s {
+ struct timer_list timer;
+ unsigned long offset;
+ unsigned long last;
+ unsigned long idle_on;
+ unsigned long idle_off;
+ unsigned char state;
+ unsigned char enabled;
+};
+
/*
* The following defines attributes of the HUB chip. These attributes are
* frequently referenced and are kept in the per-cpu data areas of each cpu.
* They are kept together in a struct to minimize cache misses.
*/
struct uv_hub_info_s {
- unsigned long global_mmr_base;
- unsigned long gpa_mask;
- unsigned long gnode_upper;
- unsigned long lowmem_remap_top;
- unsigned long lowmem_remap_base;
- unsigned short pnode;
- unsigned short pnode_mask;
- unsigned short coherency_domain_number;
- unsigned short numa_blade_id;
- unsigned char blade_processor_id;
- unsigned char m_val;
- unsigned char n_val;
+ unsigned long global_mmr_base;
+ unsigned long gpa_mask;
+ unsigned long gnode_upper;
+ unsigned long lowmem_remap_top;
+ unsigned long lowmem_remap_base;
+ unsigned short pnode;
+ unsigned short pnode_mask;
+ unsigned short coherency_domain_number;
+ unsigned short numa_blade_id;
+ unsigned char blade_processor_id;
+ unsigned char m_val;
+ unsigned char n_val;
+ struct uv_scir_s scir;
};
+
DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
#define uv_hub_info (&__get_cpu_var(__uv_hub_info))
#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu))
@@ -163,6 +175,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
#define UV_APIC_PNODE_SHIFT 6
+/* Local Bus from cpu's perspective */
+#define LOCAL_BUS_BASE 0x1c00000
+#define LOCAL_BUS_SIZE (4 * 1024 * 1024)
+
+/*
+ * System Controller Interface Reg
+ *
+ * Note there are NO leds on a UV system. This register is only
+ * used by the system controller to monitor system-wide operation.
+ * There are 64 regs per node. With Nahelem cpus (2 cores per node,
+ * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
+ * a node.
+ *
+ * The window is located at top of ACPI MMR space
+ */
+#define SCIR_WINDOW_COUNT 64
+#define SCIR_LOCAL_MMR_BASE (LOCAL_BUS_BASE + \
+ LOCAL_BUS_SIZE - \
+ SCIR_WINDOW_COUNT)
+
+#define SCIR_CPU_HEARTBEAT 0x01 /* timer interrupt */
+#define SCIR_CPU_ACTIVITY 0x02 /* not idle */
+#define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */
+
/*
* Macros for converting between kernel virtual addresses, socket local physical
* addresses, and UV global physical addresses.
@@ -277,6 +313,16 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
*uv_local_mmr_address(offset) = val;
}
+static inline unsigned char uv_read_local_mmr8(unsigned long offset)
+{
+ return *((unsigned char *)uv_local_mmr_address(offset));
+}
+
+static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
+{
+ *((unsigned char *)uv_local_mmr_address(offset)) = val;
+}
+
/*
* Structures and definitions for converting between cpu, node, pnode, and blade
* numbers.
@@ -351,5 +397,20 @@ static inline int uv_num_possible_blades(void)
return uv_possible_blades;
}
-#endif /* _ASM_X86_UV_UV_HUB_H */
+/* Update SCIR state */
+static inline void uv_set_scir_bits(unsigned char value)
+{
+ if (uv_hub_info->scir.state != value) {
+ uv_hub_info->scir.state = value;
+ uv_write_local_mmr8(uv_hub_info->scir.offset, value);
+ }
+}
+static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
+{
+ if (uv_cpu_hub_info(cpu)->scir.state != value) {
+ uv_cpu_hub_info(cpu)->scir.state = value;
+ uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value);
+ }
+}
+#endif /* _ASM_X86_UV_UV_HUB_H */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
new file mode 100644
index 000000000000..c11b7e100d83
--- /dev/null
+++ b/arch/x86/include/asm/vmware.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2008, VMware, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#ifndef ASM_X86__VMWARE_H
+#define ASM_X86__VMWARE_H
+
+extern unsigned long vmware_get_tsc_khz(void);
+extern int vmware_platform(void);
+extern void vmware_set_feature_bits(struct cpuinfo_x86 *c);
+
+#endif
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f2527488ca81..01272a466fd9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -12,6 +12,12 @@ CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_early_printk.o = -pg
+endif
+
+ifdef CONFIG_FUNCTION_RET_TRACER
+# Don't trace __switch_to() but let it for function tracer
+CFLAGS_REMOVE_process_32.o = -pg
endif
#
@@ -26,7 +32,7 @@ CFLAGS_paravirt.o := $(nostackp)
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
-obj-y += time_$(BITS).o ioport.o ldt.o
+obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
@@ -42,7 +48,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
-obj-y += ds.o
+obj-$(CONFIG_X86_DS) += ds.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
@@ -66,6 +72,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_RET_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
@@ -106,6 +113,8 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
obj-$(CONFIG_MICROCODE) += microcode.o
+obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 04a7f960bbc0..fc26ff390f14 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -441,6 +441,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
v = apic_read(APIC_LVTT);
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write(APIC_LVTT, v);
+ apic_write(APIC_TMICT, 0xffffffff);
break;
case CLOCK_EVT_MODE_RESUME:
/* Nothing to do here */
@@ -559,13 +560,13 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
} else {
res = (((u64)deltapm) * mult) >> 22;
do_div(res, 1000000);
- printk(KERN_WARNING "APIC calibration not consistent "
+ pr_warning("APIC calibration not consistent "
"with PM Timer: %ldms instead of 100ms\n",
(long)res);
/* Correct the lapic counter value */
res = (((u64)(*delta)) * pm_100ms);
do_div(res, deltapm);
- printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+ pr_info("APIC delta adjusted to PM-Timer: "
"%lu (%ld)\n", (unsigned long)res, *delta);
*delta = (long)res;
}
@@ -645,8 +646,7 @@ static int __init calibrate_APIC_clock(void)
*/
if (calibration_result < (1000000 / HZ)) {
local_irq_enable();
- printk(KERN_WARNING
- "APIC frequency too slow, disabling apic timer\n");
+ pr_warning("APIC frequency too slow, disabling apic timer\n");
return -1;
}
@@ -672,13 +672,9 @@ static int __init calibrate_APIC_clock(void)
while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
cpu_relax();
- local_irq_disable();
-
/* Stop the lapic timer */
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
- local_irq_enable();
-
/* Jiffies delta */
deltaj = lapic_cal_j2 - lapic_cal_j1;
apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
@@ -692,8 +688,7 @@ static int __init calibrate_APIC_clock(void)
local_irq_enable();
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
- printk(KERN_WARNING
- "APIC timer disabled due to verification failure.\n");
+ pr_warning("APIC timer disabled due to verification failure.\n");
return -1;
}
@@ -714,7 +709,7 @@ void __init setup_boot_APIC_clock(void)
* broadcast mechanism is used. On UP systems simply ignore it.
*/
if (disable_apic_timer) {
- printk(KERN_INFO "Disabling APIC timer\n");
+ pr_info("Disabling APIC timer\n");
/* No broadcast on UP ! */
if (num_possible_cpus() > 1) {
lapic_clockevent.mult = 1;
@@ -741,7 +736,7 @@ void __init setup_boot_APIC_clock(void)
if (nmi_watchdog != NMI_IO_APIC)
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
else
- printk(KERN_WARNING "APIC timer registered as dummy,"
+ pr_warning("APIC timer registered as dummy,"
" due to nmi_watchdog=%d!\n", nmi_watchdog);
/* Setup the lapic or request the broadcast */
@@ -773,8 +768,7 @@ static void local_apic_timer_interrupt(void)
* spurious.
*/
if (!evt->event_handler) {
- printk(KERN_WARNING
- "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+ pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
/* Switch it off */
lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
return;
@@ -1093,7 +1087,7 @@ static void __cpuinit lapic_setup_esr(void)
unsigned int oldvalue, value, maxlvt;
if (!lapic_is_integrated()) {
- printk(KERN_INFO "No ESR for 82489DX.\n");
+ pr_info("No ESR for 82489DX.\n");
return;
}
@@ -1104,7 +1098,7 @@ static void __cpuinit lapic_setup_esr(void)
* ESR disabled - we can't do anything useful with the
* errors anyway - mbligh
*/
- printk(KERN_INFO "Leaving ESR disabled.\n");
+ pr_info("Leaving ESR disabled.\n");
return;
}
@@ -1298,7 +1292,7 @@ void check_x2apic(void)
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (msr & X2APIC_ENABLE) {
- printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+ pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
x2apic_preenabled = x2apic = 1;
apic_ops = &x2apic_ops;
}
@@ -1310,12 +1304,12 @@ void enable_x2apic(void)
rdmsr(MSR_IA32_APICBASE, msr, msr2);
if (!(msr & X2APIC_ENABLE)) {
- printk("Enabling x2apic\n");
+ pr_info("Enabling x2apic\n");
wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
}
}
-void enable_IR_x2apic(void)
+void __init enable_IR_x2apic(void)
{
#ifdef CONFIG_INTR_REMAP
int ret;
@@ -1325,9 +1319,8 @@ void enable_IR_x2apic(void)
return;
if (!x2apic_preenabled && disable_x2apic) {
- printk(KERN_INFO
- "Skipped enabling x2apic and Interrupt-remapping "
- "because of nox2apic\n");
+ pr_info("Skipped enabling x2apic and Interrupt-remapping "
+ "because of nox2apic\n");
return;
}
@@ -1335,22 +1328,19 @@ void enable_IR_x2apic(void)
panic("Bios already enabled x2apic, can't enforce nox2apic");
if (!x2apic_preenabled && skip_ioapic_setup) {
- printk(KERN_INFO
- "Skipped enabling x2apic and Interrupt-remapping "
- "because of skipping io-apic setup\n");
+ pr_info("Skipped enabling x2apic and Interrupt-remapping "
+ "because of skipping io-apic setup\n");
return;
}
ret = dmar_table_init();
if (ret) {
- printk(KERN_INFO
- "dmar_table_init() failed with %d:\n", ret);
+ pr_info("dmar_table_init() failed with %d:\n", ret);
if (x2apic_preenabled)
panic("x2apic enabled by bios. But IR enabling failed");
else
- printk(KERN_INFO
- "Not enabling x2apic,Intr-remapping\n");
+ pr_info("Not enabling x2apic,Intr-remapping\n");
return;
}
@@ -1359,7 +1349,7 @@ void enable_IR_x2apic(void)
ret = save_mask_IO_APIC_setup();
if (ret) {
- printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+ pr_info("Saving IO-APIC state failed: %d\n", ret);
goto end;
}
@@ -1394,14 +1384,11 @@ end:
if (!ret) {
if (!x2apic_preenabled)
- printk(KERN_INFO
- "Enabled x2apic and interrupt-remapping\n");
+ pr_info("Enabled x2apic and interrupt-remapping\n");
else
- printk(KERN_INFO
- "Enabled Interrupt-remapping\n");
+ pr_info("Enabled Interrupt-remapping\n");
} else
- printk(KERN_ERR
- "Failed to enable Interrupt-remapping and x2apic\n");
+ pr_err("Failed to enable Interrupt-remapping and x2apic\n");
#else
if (!cpu_has_x2apic)
return;
@@ -1410,8 +1397,8 @@ end:
panic("x2apic enabled prior OS handover,"
" enable CONFIG_INTR_REMAP");
- printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
- " and x2apic\n");
+ pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+ " and x2apic\n");
#endif
return;
@@ -1428,7 +1415,7 @@ end:
static int __init detect_init_APIC(void)
{
if (!cpu_has_apic) {
- printk(KERN_INFO "No local APIC present\n");
+ pr_info("No local APIC present\n");
return -1;
}
@@ -1469,8 +1456,8 @@ static int __init detect_init_APIC(void)
* "lapic" specified.
*/
if (!force_enable_local_apic) {
- printk(KERN_INFO "Local APIC disabled by BIOS -- "
- "you can enable it with \"lapic\"\n");
+ pr_info("Local APIC disabled by BIOS -- "
+ "you can enable it with \"lapic\"\n");
return -1;
}
/*
@@ -1480,8 +1467,7 @@ static int __init detect_init_APIC(void)
*/
rdmsr(MSR_IA32_APICBASE, l, h);
if (!(l & MSR_IA32_APICBASE_ENABLE)) {
- printk(KERN_INFO
- "Local APIC disabled by BIOS -- reenabling.\n");
+ pr_info("Local APIC disabled by BIOS -- reenabling.\n");
l &= ~MSR_IA32_APICBASE_BASE;
l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
wrmsr(MSR_IA32_APICBASE, l, h);
@@ -1494,7 +1480,7 @@ static int __init detect_init_APIC(void)
*/
features = cpuid_edx(1);
if (!(features & (1 << X86_FEATURE_APIC))) {
- printk(KERN_WARNING "Could not enable APIC!\n");
+ pr_warning("Could not enable APIC!\n");
return -1;
}
set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -1505,14 +1491,14 @@ static int __init detect_init_APIC(void)
if (l & MSR_IA32_APICBASE_ENABLE)
mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
- printk(KERN_INFO "Found and enabled local APIC!\n");
+ pr_info("Found and enabled local APIC!\n");
apic_pm_activate();
return 0;
no_apic:
- printk(KERN_INFO "No local APIC present or hardware disabled\n");
+ pr_info("No local APIC present or hardware disabled\n");
return -1;
}
#endif
@@ -1588,12 +1574,12 @@ int __init APIC_init_uniprocessor(void)
{
#ifdef CONFIG_X86_64
if (disable_apic) {
- printk(KERN_INFO "Apic disabled\n");
+ pr_info("Apic disabled\n");
return -1;
}
if (!cpu_has_apic) {
disable_apic = 1;
- printk(KERN_INFO "Apic disabled by BIOS\n");
+ pr_info("Apic disabled by BIOS\n");
return -1;
}
#else
@@ -1605,8 +1591,8 @@ int __init APIC_init_uniprocessor(void)
*/
if (!cpu_has_apic &&
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
- printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
- boot_cpu_physical_apicid);
+ pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+ boot_cpu_physical_apicid);
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
return -1;
}
@@ -1699,8 +1685,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
add_pda(irq_spurious_count, 1);
#else
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
- printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
- "should never happen.\n", smp_processor_id());
+ pr_info("spurious APIC interrupt on CPU#%d, "
+ "should never happen.\n", smp_processor_id());
__get_cpu_var(irq_stat).irq_spurious_count++;
#endif
irq_exit();
@@ -1724,17 +1710,18 @@ void smp_error_interrupt(struct pt_regs *regs)
ack_APIC_irq();
atomic_inc(&irq_err_count);
- /* Here is what the APIC error bits mean:
- 0: Send CS error
- 1: Receive CS error
- 2: Send accept error
- 3: Receive accept error
- 4: Reserved
- 5: Send illegal vector
- 6: Received illegal vector
- 7: Illegal register address
- */
- printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
+ /*
+ * Here is what the APIC error bits mean:
+ * 0: Send CS error
+ * 1: Receive CS error
+ * 2: Send accept error
+ * 3: Receive accept error
+ * 4: Reserved
+ * 5: Send illegal vector
+ * 6: Received illegal vector
+ * 7: Illegal register address
+ */
+ pr_debug("APIC error on CPU%d: %02x(%02x)\n",
smp_processor_id(), v , v1);
irq_exit();
}
@@ -1838,15 +1825,15 @@ void __cpuinit generic_processor_info(int apicid, int version)
* Validate version
*/
if (version == 0x0) {
- printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
+ pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
+ "fixing up to 0x10. (tell your hw vendor)\n",
+ version);
version = 0x10;
}
apic_version[apicid] = version;
if (num_processors >= NR_CPUS) {
- printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+ pr_warning("WARNING: NR_CPUS limit of %i reached."
" Processor ignored.\n", NR_CPUS);
return;
}
@@ -2209,7 +2196,7 @@ static int __init apic_set_verbosity(char *arg)
else if (strcmp("verbose", arg) == 0)
apic_verbosity = APIC_VERBOSE;
else {
- printk(KERN_WARNING "APIC Verbosity level %s not recognised"
+ pr_warning("APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug\n", arg);
return -EINVAL;
}
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index f0dfe6f17e7e..d22d0f1bbea0 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
long sn_partition_id;
EXPORT_SYMBOL_GPL(sn_partition_id);
-long uv_coherency_id;
-EXPORT_SYMBOL_GPL(uv_coherency_id);
-long uv_region_size;
-EXPORT_SYMBOL_GPL(uv_region_size);
+long sn_coherency_id;
+EXPORT_SYMBOL_GPL(sn_coherency_id);
+long sn_region_size;
+EXPORT_SYMBOL_GPL(sn_region_size);
int uv_type;
@@ -100,6 +100,58 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
return ret;
}
+int
+uv_bios_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size,
+ unsigned long *intr_mmr_offset)
+{
+ union uv_watchlist_u size_blade;
+ unsigned long addr;
+ u64 watchlist;
+ s64 ret;
+
+ addr = (unsigned long)mq;
+ size_blade.size = mq_size;
+ size_blade.blade = blade;
+
+ /*
+ * bios returns watchlist number or negative error number.
+ */
+ ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr,
+ size_blade.val, (u64)intr_mmr_offset,
+ (u64)&watchlist, 0);
+ if (ret < BIOS_STATUS_SUCCESS)
+ return ret;
+
+ return watchlist;
+}
+EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc);
+
+int
+uv_bios_mq_watchlist_free(int blade, int watchlist_num)
+{
+ return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE,
+ blade, watchlist_num, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free);
+
+s64
+uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms)
+{
+ return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len,
+ perms, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_change_memprotect);
+
+s64
+uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
+{
+ s64 ret;
+
+ ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
+ (u64)addr, buf, (u64)len, 0);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
{
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
new file mode 100644
index 000000000000..2ac0ab71412a
--- /dev/null
+++ b/arch/x86/kernel/check.c
@@ -0,0 +1,161 @@
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <asm/e820.h>
+#include <asm/proto.h>
+
+/*
+ * Some BIOSes seem to corrupt the low 64k of memory during events
+ * like suspend/resume and unplugging an HDMI cable. Reserve all
+ * remaining free memory in that area and fill it with a distinct
+ * pattern.
+ */
+#define MAX_SCAN_AREAS 8
+
+static int __read_mostly memory_corruption_check = -1;
+
+static unsigned __read_mostly corruption_check_size = 64*1024;
+static unsigned __read_mostly corruption_check_period = 60; /* seconds */
+
+static struct e820entry scan_areas[MAX_SCAN_AREAS];
+static int num_scan_areas;
+
+
+static __init int set_corruption_check(char *arg)
+{
+ char *end;
+
+ memory_corruption_check = simple_strtol(arg, &end, 10);
+
+ return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check", set_corruption_check);
+
+static __init int set_corruption_check_period(char *arg)
+{
+ char *end;
+
+ corruption_check_period = simple_strtoul(arg, &end, 10);
+
+ return (*end == 0) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_period", set_corruption_check_period);
+
+static __init int set_corruption_check_size(char *arg)
+{
+ char *end;
+ unsigned size;
+
+ size = memparse(arg, &end);
+
+ if (*end == '\0')
+ corruption_check_size = size;
+
+ return (size == corruption_check_size) ? 0 : -EINVAL;
+}
+early_param("memory_corruption_check_size", set_corruption_check_size);
+
+
+void __init setup_bios_corruption_check(void)
+{
+ u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
+
+ if (memory_corruption_check == -1) {
+ memory_corruption_check =
+#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
+ 1
+#else
+ 0
+#endif
+ ;
+ }
+
+ if (corruption_check_size == 0)
+ memory_corruption_check = 0;
+
+ if (!memory_corruption_check)
+ return;
+
+ corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
+
+ while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
+ u64 size;
+ addr = find_e820_area_size(addr, &size, PAGE_SIZE);
+
+ if (addr == 0)
+ break;
+
+ if ((addr + size) > corruption_check_size)
+ size = corruption_check_size - addr;
+
+ if (size == 0)
+ break;
+
+ e820_update_range(addr, size, E820_RAM, E820_RESERVED);
+ scan_areas[num_scan_areas].addr = addr;
+ scan_areas[num_scan_areas].size = size;
+ num_scan_areas++;
+
+ /* Assume we've already mapped this early memory */
+ memset(__va(addr), 0, size);
+
+ addr += size;
+ }
+
+ printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
+ num_scan_areas);
+ update_e820();
+}
+
+
+void check_for_bios_corruption(void)
+{
+ int i;
+ int corruption = 0;
+
+ if (!memory_corruption_check)
+ return;
+
+ for (i = 0; i < num_scan_areas; i++) {
+ unsigned long *addr = __va(scan_areas[i].addr);
+ unsigned long size = scan_areas[i].size;
+
+ for (; size; addr++, size -= sizeof(unsigned long)) {
+ if (!*addr)
+ continue;
+ printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
+ addr, __pa(addr), *addr);
+ corruption = 1;
+ *addr = 0;
+ }
+ }
+
+ WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n");
+}
+
+static void check_corruption(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(bios_check_work, check_corruption);
+
+static void check_corruption(struct work_struct *dummy)
+{
+ check_for_bios_corruption();
+ schedule_delayed_work(&bios_check_work,
+ round_jiffies_relative(corruption_check_period*HZ));
+}
+
+static int start_periodic_check_for_corruption(void)
+{
+ if (!memory_corruption_check || corruption_check_period == 0)
+ return 0;
+
+ printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
+ corruption_check_period);
+
+ /* First time we run the checks right away */
+ schedule_delayed_work(&bios_check_work, 0);
+ return 0;
+}
+
+module_init(start_periodic_check_for_corruption);
+
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82ec6075c057..a5c04e88777e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -4,6 +4,7 @@
obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o
+obj-y += vmware.o hypervisor.o
obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b9c9ea0217a9..b88595c36254 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -36,6 +36,7 @@
#include <asm/proto.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/hypervisor.h>
#include "cpu.h"
@@ -703,6 +704,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
detect_ht(c);
#endif
+ init_hypervisor(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d3dcd58b87cd..7f05f44b97e9 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
u32 i = 0;
if (cpu_family == CPU_HW_PSTATE) {
- rdmsr(MSR_PSTATE_STATUS, lo, hi);
- i = lo & HW_PSTATE_MASK;
- data->currpstate = i;
+ if (data->currpstate == HW_PSTATE_INVALID) {
+ /* read (initial) hw pstate if not yet set */
+ rdmsr(MSR_PSTATE_STATUS, lo, hi);
+ i = lo & HW_PSTATE_MASK;
+
+ /*
+ * a workaround for family 11h erratum 311 might cause
+ * an "out-of-range Pstate if the core is in Pstate-0
+ */
+ if (i >= data->numps)
+ data->currpstate = HW_PSTATE_0;
+ else
+ data->currpstate = i;
+ }
return 0;
}
do {
@@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
}
data->cpu = pol->cpu;
+ data->currpstate = HW_PSTATE_INVALID;
if (powernow_k8_cpu_init_acpi(data)) {
/*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d96..65cfb5d7f77f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -5,6 +5,19 @@
* http://www.gnu.org/licenses/gpl.html
*/
+
+enum pstate {
+ HW_PSTATE_INVALID = 0xff,
+ HW_PSTATE_0 = 0,
+ HW_PSTATE_1 = 1,
+ HW_PSTATE_2 = 2,
+ HW_PSTATE_3 = 3,
+ HW_PSTATE_4 = 4,
+ HW_PSTATE_5 = 5,
+ HW_PSTATE_6 = 6,
+ HW_PSTATE_7 = 7,
+};
+
struct powernow_k8_data {
unsigned int cpu;
@@ -23,7 +36,9 @@ struct powernow_k8_data {
u32 exttype; /* extended interface = 1 */
/* keep track of the current fid / vid or pstate */
- u32 currvid, currfid, currpstate;
+ u32 currvid;
+ u32 currfid;
+ enum pstate currpstate;
/* the powernow_table includes all frequency and vid/fid pairings:
* fid are the lower 8 bits of the index, vid are the upper 8 bits.
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
new file mode 100644
index 000000000000..fb5b86af0b01
--- /dev/null
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -0,0 +1,58 @@
+/*
+ * Common hypervisor code
+ *
+ * Copyright (C) 2008, VMware, Inc.
+ * Author : Alok N Kataria <akataria@vmware.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/vmware.h>
+#include <asm/hypervisor.h>
+
+static inline void __cpuinit
+detect_hypervisor_vendor(struct cpuinfo_x86 *c)
+{
+ if (vmware_platform()) {
+ c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
+ } else {
+ c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
+ }
+}
+
+unsigned long get_hypervisor_tsc_freq(void)
+{
+ if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
+ return vmware_get_tsc_khz();
+ return 0;
+}
+
+static inline void __cpuinit
+hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
+{
+ if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) {
+ vmware_set_feature_bits(c);
+ return;
+ }
+}
+
+void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
+{
+ detect_hypervisor_vendor(c);
+ hypervisor_set_feature_bits(c);
+}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..816f27f289b1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_P4);
if (c->x86 == 6)
set_cpu_cap(c, X86_FEATURE_P3);
+#endif
if (cpu_has_bts)
ptrace_bts_init_intel(c);
-#endif
-
detect_extended_topology(c);
if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
/*
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c78c04821ea1..1159e269e596 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
}
static struct res_range __initdata range[RANGE_NUM];
+static int __initdata nr_range;
#ifdef CONFIG_MTRR_SANITIZER
@@ -1206,39 +1207,43 @@ struct mtrr_cleanup_result {
#define PSHIFT (PAGE_SHIFT - 10)
static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
-static struct res_range __initdata range_new[RANGE_NUM];
static unsigned long __initdata min_loss_pfn[RANGE_NUM];
-static int __init mtrr_cleanup(unsigned address_bits)
+static void __init print_out_mtrr_range_state(void)
{
- unsigned long extra_remove_base, extra_remove_size;
- unsigned long base, size, def, dummy;
- mtrr_type type;
- int nr_range, nr_range_new;
- u64 chunk_size, gran_size;
- unsigned long range_sums, range_sums_new;
- int index_good;
- int num_reg_good;
int i;
+ char start_factor = 'K', size_factor = 'K';
+ unsigned long start_base, size_base;
+ mtrr_type type;
- /* extra one for all 0 */
- int num[MTRR_NUM_TYPES + 1];
+ for (i = 0; i < num_var_ranges; i++) {
- if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
- return 0;
- rdmsr(MTRRdefType_MSR, def, dummy);
- def &= 0xff;
- if (def != MTRR_TYPE_UNCACHABLE)
- return 0;
+ size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+ if (!size_base)
+ continue;
- /* get it and store it aside */
- memset(range_state, 0, sizeof(range_state));
- for (i = 0; i < num_var_ranges; i++) {
- mtrr_if->get(i, &base, &size, &type);
- range_state[i].base_pfn = base;
- range_state[i].size_pfn = size;
- range_state[i].type = type;
+ size_base = to_size_factor(size_base, &size_factor),
+ start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+ start_base = to_size_factor(start_base, &start_factor),
+ type = range_state[i].type;
+
+ printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+ i, start_base, start_factor,
+ size_base, size_factor,
+ (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+ ((type == MTRR_TYPE_WRPROT) ? "WP" :
+ ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+ );
}
+}
+
+static int __init mtrr_need_cleanup(void)
+{
+ int i;
+ mtrr_type type;
+ unsigned long size;
+ /* extra one for all 0 */
+ int num[MTRR_NUM_TYPES + 1];
/* check entries number */
memset(num, 0, sizeof(num));
@@ -1263,29 +1268,133 @@ static int __init mtrr_cleanup(unsigned address_bits)
num_var_ranges - num[MTRR_NUM_TYPES])
return 0;
- /* print original var MTRRs at first, for debugging: */
- printk(KERN_DEBUG "original variable MTRRs\n");
- for (i = 0; i < num_var_ranges; i++) {
- char start_factor = 'K', size_factor = 'K';
- unsigned long start_base, size_base;
+ return 1;
+}
- size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
- if (!size_base)
- continue;
+static unsigned long __initdata range_sums;
+static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
+ unsigned long extra_remove_base,
+ unsigned long extra_remove_size,
+ int i)
+{
+ int num_reg;
+ static struct res_range range_new[RANGE_NUM];
+ static int nr_range_new;
+ unsigned long range_sums_new;
+
+ /* convert ranges to var ranges state */
+ num_reg = x86_setup_var_mtrrs(range, nr_range,
+ chunk_size, gran_size);
+
+ /* we got new setting in range_state, check it */
+ memset(range_new, 0, sizeof(range_new));
+ nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+ extra_remove_base, extra_remove_size);
+ range_sums_new = sum_ranges(range_new, nr_range_new);
+
+ result[i].chunk_sizek = chunk_size >> 10;
+ result[i].gran_sizek = gran_size >> 10;
+ result[i].num_reg = num_reg;
+ if (range_sums < range_sums_new) {
+ result[i].lose_cover_sizek =
+ (range_sums_new - range_sums) << PSHIFT;
+ result[i].bad = 1;
+ } else
+ result[i].lose_cover_sizek =
+ (range_sums - range_sums_new) << PSHIFT;
- size_base = to_size_factor(size_base, &size_factor),
- start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
- start_base = to_size_factor(start_base, &start_factor),
- type = range_state[i].type;
+ /* double check it */
+ if (!result[i].bad && !result[i].lose_cover_sizek) {
+ if (nr_range_new != nr_range ||
+ memcmp(range, range_new, sizeof(range)))
+ result[i].bad = 1;
+ }
- printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
- i, start_base, start_factor,
- size_base, size_factor,
- (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
- ((type == MTRR_TYPE_WRPROT) ? "WP" :
- ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
- );
+ if (!result[i].bad && (range_sums - range_sums_new <
+ min_loss_pfn[num_reg])) {
+ min_loss_pfn[num_reg] =
+ range_sums - range_sums_new;
}
+}
+
+static void __init mtrr_print_out_one_result(int i)
+{
+ char gran_factor, chunk_factor, lose_factor;
+ unsigned long gran_base, chunk_base, lose_base;
+
+ gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+ chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+ lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+ printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+ result[i].bad ? "*BAD*" : " ",
+ gran_base, gran_factor, chunk_base, chunk_factor);
+ printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
+ result[i].num_reg, result[i].bad ? "-" : "",
+ lose_base, lose_factor);
+}
+
+static int __init mtrr_search_optimal_index(void)
+{
+ int i;
+ int num_reg_good;
+ int index_good;
+
+ if (nr_mtrr_spare_reg >= num_var_ranges)
+ nr_mtrr_spare_reg = num_var_ranges - 1;
+ num_reg_good = -1;
+ for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
+ if (!min_loss_pfn[i])
+ num_reg_good = i;
+ }
+
+ index_good = -1;
+ if (num_reg_good != -1) {
+ for (i = 0; i < NUM_RESULT; i++) {
+ if (!result[i].bad &&
+ result[i].num_reg == num_reg_good &&
+ !result[i].lose_cover_sizek) {
+ index_good = i;
+ break;
+ }
+ }
+ }
+
+ return index_good;
+}
+
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+ unsigned long extra_remove_base, extra_remove_size;
+ unsigned long base, size, def, dummy;
+ mtrr_type type;
+ u64 chunk_size, gran_size;
+ int index_good;
+ int i;
+
+ if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+ return 0;
+ rdmsr(MTRRdefType_MSR, def, dummy);
+ def &= 0xff;
+ if (def != MTRR_TYPE_UNCACHABLE)
+ return 0;
+
+ /* get it and store it aside */
+ memset(range_state, 0, sizeof(range_state));
+ for (i = 0; i < num_var_ranges; i++) {
+ mtrr_if->get(i, &base, &size, &type);
+ range_state[i].base_pfn = base;
+ range_state[i].size_pfn = size;
+ range_state[i].type = type;
+ }
+
+ /* check if we need handle it and can handle it */
+ if (!mtrr_need_cleanup())
+ return 0;
+
+ /* print original var MTRRs at first, for debugging: */
+ printk(KERN_DEBUG "original variable MTRRs\n");
+ print_out_mtrr_range_state();
memset(range, 0, sizeof(range));
extra_remove_size = 0;
@@ -1309,176 +1418,64 @@ static int __init mtrr_cleanup(unsigned address_bits)
range_sums >> (20 - PAGE_SHIFT));
if (mtrr_chunk_size && mtrr_gran_size) {
- int num_reg;
- char gran_factor, chunk_factor, lose_factor;
- unsigned long gran_base, chunk_base, lose_base;
-
- debug_print++;
- /* convert ranges to var ranges state */
- num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
- mtrr_gran_size);
+ i = 0;
+ mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
+ extra_remove_base, extra_remove_size, i);
- /* we got new setting in range_state, check it */
- memset(range_new, 0, sizeof(range_new));
- nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
- extra_remove_base,
- extra_remove_size);
- range_sums_new = sum_ranges(range_new, nr_range_new);
+ mtrr_print_out_one_result(i);
- i = 0;
- result[i].chunk_sizek = mtrr_chunk_size >> 10;
- result[i].gran_sizek = mtrr_gran_size >> 10;
- result[i].num_reg = num_reg;
- if (range_sums < range_sums_new) {
- result[i].lose_cover_sizek =
- (range_sums_new - range_sums) << PSHIFT;
- result[i].bad = 1;
- } else
- result[i].lose_cover_sizek =
- (range_sums - range_sums_new) << PSHIFT;
-
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
- result[i].bad?"*BAD*":" ",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
- result[i].num_reg, result[i].bad?"-":"",
- lose_base, lose_factor);
if (!result[i].bad) {
set_var_mtrr_all(address_bits);
return 1;
}
printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
"will find optimal one\n");
- debug_print--;
- memset(result, 0, sizeof(result[0]));
}
i = 0;
memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
memset(result, 0, sizeof(result));
for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
- char gran_factor;
- unsigned long gran_base;
-
- if (debug_print)
- gran_base = to_size_factor(gran_size >> 10, &gran_factor);
for (chunk_size = gran_size; chunk_size < (1ULL<<32);
chunk_size <<= 1) {
- int num_reg;
- if (debug_print) {
- char chunk_factor;
- unsigned long chunk_base;
-
- chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
- printk(KERN_INFO "\n");
- printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n",
- gran_base, gran_factor, chunk_base, chunk_factor);
- }
if (i >= NUM_RESULT)
continue;
- /* convert ranges to var ranges state */
- num_reg = x86_setup_var_mtrrs(range, nr_range,
- chunk_size, gran_size);
-
- /* we got new setting in range_state, check it */
- memset(range_new, 0, sizeof(range_new));
- nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
- extra_remove_base, extra_remove_size);
- range_sums_new = sum_ranges(range_new, nr_range_new);
-
- result[i].chunk_sizek = chunk_size >> 10;
- result[i].gran_sizek = gran_size >> 10;
- result[i].num_reg = num_reg;
- if (range_sums < range_sums_new) {
- result[i].lose_cover_sizek =
- (range_sums_new - range_sums) << PSHIFT;
- result[i].bad = 1;
- } else
- result[i].lose_cover_sizek =
- (range_sums - range_sums_new) << PSHIFT;
-
- /* double check it */
- if (!result[i].bad && !result[i].lose_cover_sizek) {
- if (nr_range_new != nr_range ||
- memcmp(range, range_new, sizeof(range)))
- result[i].bad = 1;
+ mtrr_calc_range_state(chunk_size, gran_size,
+ extra_remove_base, extra_remove_size, i);
+ if (debug_print) {
+ mtrr_print_out_one_result(i);
+ printk(KERN_INFO "\n");
}
- if (!result[i].bad && (range_sums - range_sums_new <
- min_loss_pfn[num_reg])) {
- min_loss_pfn[num_reg] =
- range_sums - range_sums_new;
- }
i++;
}
}
- /* print out all */
- for (i = 0; i < NUM_RESULT; i++) {
- char gran_factor, chunk_factor, lose_factor;
- unsigned long gran_base, chunk_base, lose_base;
-
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
- result[i].bad?"*BAD*":" ",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
- result[i].num_reg, result[i].bad?"-":"",
- lose_base, lose_factor);
- }
-
/* try to find the optimal index */
- if (nr_mtrr_spare_reg >= num_var_ranges)
- nr_mtrr_spare_reg = num_var_ranges - 1;
- num_reg_good = -1;
- for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
- if (!min_loss_pfn[i])
- num_reg_good = i;
- }
-
- index_good = -1;
- if (num_reg_good != -1) {
- for (i = 0; i < NUM_RESULT; i++) {
- if (!result[i].bad &&
- result[i].num_reg == num_reg_good &&
- !result[i].lose_cover_sizek) {
- index_good = i;
- break;
- }
- }
- }
+ index_good = mtrr_search_optimal_index();
if (index_good != -1) {
- char gran_factor, chunk_factor, lose_factor;
- unsigned long gran_base, chunk_base, lose_base;
-
printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
i = index_good;
- gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
- chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
- lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
- printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
- gran_base, gran_factor, chunk_base, chunk_factor);
- printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n",
- result[i].num_reg, lose_base, lose_factor);
+ mtrr_print_out_one_result(i);
+
/* convert ranges to var ranges state */
chunk_size = result[i].chunk_sizek;
chunk_size <<= 10;
gran_size = result[i].gran_sizek;
gran_size <<= 10;
- debug_print++;
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
- debug_print--;
set_var_mtrr_all(address_bits);
+ printk(KERN_DEBUG "New variable MTRRs\n");
+ print_out_mtrr_range_state();
return 1;
+ } else {
+ /* print out all */
+ for (i = 0; i < NUM_RESULT; i++)
+ mtrr_print_out_one_result(i);
}
printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
@@ -1562,7 +1559,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
{
unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
- int nr_range;
u64 total_trim_size;
/* extra one for all 0 */
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
new file mode 100644
index 000000000000..284c399e3234
--- /dev/null
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -0,0 +1,112 @@
+/*
+ * VMware Detection code.
+ *
+ * Copyright (C) 2008, VMware, Inc.
+ * Author : Alok N Kataria <akataria@vmware.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/dmi.h>
+#include <asm/div64.h>
+#include <asm/vmware.h>
+
+#define CPUID_VMWARE_INFO_LEAF 0x40000000
+#define VMWARE_HYPERVISOR_MAGIC 0x564D5868
+#define VMWARE_HYPERVISOR_PORT 0x5658
+
+#define VMWARE_PORT_CMD_GETVERSION 10
+#define VMWARE_PORT_CMD_GETHZ 45
+
+#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
+ __asm__("inl (%%dx)" : \
+ "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
+ "0"(VMWARE_HYPERVISOR_MAGIC), \
+ "1"(VMWARE_PORT_CMD_##cmd), \
+ "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \
+ "memory");
+
+static inline int __vmware_platform(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx);
+ return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
+}
+
+static unsigned long __vmware_get_tsc_khz(void)
+{
+ uint64_t tsc_hz;
+ uint32_t eax, ebx, ecx, edx;
+
+ VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
+
+ if (ebx == UINT_MAX)
+ return 0;
+ tsc_hz = eax | (((uint64_t)ebx) << 32);
+ do_div(tsc_hz, 1000);
+ BUG_ON(tsc_hz >> 32);
+ return tsc_hz;
+}
+
+/*
+ * While checking the dmi string infomation, just checking the product
+ * serial key should be enough, as this will always have a VMware
+ * specific string when running under VMware hypervisor.
+ */
+int vmware_platform(void)
+{
+ if (cpu_has_hypervisor) {
+ unsigned int eax, ebx, ecx, edx;
+ char hyper_vendor_id[13];
+
+ cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx);
+ memcpy(hyper_vendor_id + 0, &ebx, 4);
+ memcpy(hyper_vendor_id + 4, &ecx, 4);
+ memcpy(hyper_vendor_id + 8, &edx, 4);
+ hyper_vendor_id[12] = '\0';
+ if (!strcmp(hyper_vendor_id, "VMwareVMware"))
+ return 1;
+ } else if (dmi_available && dmi_name_in_serial("VMware") &&
+ __vmware_platform())
+ return 1;
+
+ return 0;
+}
+
+unsigned long vmware_get_tsc_khz(void)
+{
+ BUG_ON(!vmware_platform());
+ return __vmware_get_tsc_khz();
+}
+
+/*
+ * VMware hypervisor takes care of exporting a reliable TSC to the guest.
+ * Still, due to timing difference when running on virtual cpus, the TSC can
+ * be marked as unstable in some cases. For example, the TSC sync check at
+ * bootup can fail due to a marginal offset between vcpus' TSCs (though the
+ * TSCs do not drift from each other). Also, the ACPI PM timer clocksource
+ * is not suitable as a watchdog when running on a hypervisor because the
+ * kernel may miss a wrap of the counter if the vcpu is descheduled for a
+ * long time. To skip these checks at runtime we set these capability bits,
+ * so that the kernel could just trust the hypervisor with providing a
+ * reliable virtual TSC that is suitable for timekeeping.
+ */
+void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c)
+{
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 268553817909..d84a852e4cd7 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -29,34 +29,17 @@
#include <mach_ipi.h>
-/* This keeps a track of which one is crashing cpu. */
-static int crashing_cpu;
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
-static atomic_t waiting_for_crash_ipi;
-static int crash_nmi_callback(struct notifier_block *self,
- unsigned long val, void *data)
+static void kdump_nmi_callback(int cpu, struct die_args *args)
{
struct pt_regs *regs;
#ifdef CONFIG_X86_32
struct pt_regs fixed_regs;
#endif
- int cpu;
- if (val != DIE_NMI_IPI)
- return NOTIFY_OK;
-
- regs = ((struct die_args *)data)->regs;
- cpu = raw_smp_processor_id();
-
- /* Don't do anything if this handler is invoked on crashing cpu.
- * Otherwise, system will completely hang. Crashing cpu can get
- * an NMI if system was initially booted with nmi_watchdog parameter.
- */
- if (cpu == crashing_cpu)
- return NOTIFY_STOP;
- local_irq_disable();
+ regs = args->regs;
#ifdef CONFIG_X86_32
if (!user_mode_vm(regs)) {
@@ -65,54 +48,19 @@ static int crash_nmi_callback(struct notifier_block *self,
}
#endif
crash_save_cpu(regs, cpu);
- disable_local_APIC();
- atomic_dec(&waiting_for_crash_ipi);
- /* Assume hlt works */
- halt();
- for (;;)
- cpu_relax();
-
- return 1;
-}
-static void smp_send_nmi_allbutself(void)
-{
- cpumask_t mask = cpu_online_map;
- cpu_clear(safe_smp_processor_id(), mask);
- if (!cpus_empty(mask))
- send_IPI_mask(mask, NMI_VECTOR);
+ disable_local_APIC();
}
-static struct notifier_block crash_nmi_nb = {
- .notifier_call = crash_nmi_callback,
-};
-
-static void nmi_shootdown_cpus(void)
+static void kdump_nmi_shootdown_cpus(void)
{
- unsigned long msecs;
-
- atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- /* Would it be better to replace the trap vector here? */
- if (register_die_notifier(&crash_nmi_nb))
- return; /* return what? */
- /* Ensure the new callback function is set before sending
- * out the NMI
- */
- wmb();
+ nmi_shootdown_cpus(kdump_nmi_callback);
- smp_send_nmi_allbutself();
-
- msecs = 1000; /* Wait at most a second for the other cpus to stop */
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
- mdelay(1);
- msecs--;
- }
-
- /* Leave the nmi callback set */
disable_local_APIC();
}
+
#else
-static void nmi_shootdown_cpus(void)
+static void kdump_nmi_shootdown_cpus(void)
{
/* There are no cpus to shootdown */
}
@@ -131,9 +79,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
/* The kernel is broken so disable interrupts */
local_irq_disable();
- /* Make a note of crashing cpu. Will be used in NMI callback.*/
- crashing_cpu = safe_smp_processor_id();
- nmi_shootdown_cpus();
+ kdump_nmi_shootdown_cpus();
lapic_shutdown();
#if defined(CONFIG_X86_IO_APIC)
disable_IO_APIC();
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d1a121443bde..d6938d9351cf 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -21,8 +21,6 @@
*/
-#ifdef CONFIG_X86_DS
-
#include <asm/ds.h>
#include <linux/errno.h>
@@ -211,14 +209,15 @@ static DEFINE_PER_CPU(struct ds_context *, system_context);
static inline struct ds_context *ds_get_context(struct task_struct *task)
{
struct ds_context *context;
+ unsigned long irq;
- spin_lock(&ds_lock);
+ spin_lock_irqsave(&ds_lock, irq);
context = (task ? task->thread.ds_ctx : this_system_context);
if (context)
context->count++;
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
return context;
}
@@ -226,55 +225,46 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
/*
* Same as ds_get_context, but allocates the context and it's DS
* structure, if necessary; returns NULL; if out of memory.
- *
- * pre: requires ds_lock to be held
*/
static inline struct ds_context *ds_alloc_context(struct task_struct *task)
{
struct ds_context **p_context =
(task ? &task->thread.ds_ctx : &this_system_context);
struct ds_context *context = *p_context;
+ unsigned long irq;
if (!context) {
- spin_unlock(&ds_lock);
-
context = kzalloc(sizeof(*context), GFP_KERNEL);
-
- if (!context) {
- spin_lock(&ds_lock);
+ if (!context)
return NULL;
- }
context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
if (!context->ds) {
kfree(context);
- spin_lock(&ds_lock);
return NULL;
}
- spin_lock(&ds_lock);
- /*
- * Check for race - another CPU could have allocated
- * it meanwhile:
- */
+ spin_lock_irqsave(&ds_lock, irq);
+
if (*p_context) {
kfree(context->ds);
kfree(context);
- return *p_context;
- }
- *p_context = context;
+ context = *p_context;
+ } else {
+ *p_context = context;
- context->this = p_context;
- context->task = task;
+ context->this = p_context;
+ context->task = task;
- if (task)
- set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+ if (task)
+ set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
- if (!task || (task == current))
- wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
-
- get_tracer(task);
+ if (!task || (task == current))
+ wrmsrl(MSR_IA32_DS_AREA,
+ (unsigned long)context->ds);
+ }
+ spin_unlock_irqrestore(&ds_lock, irq);
}
context->count++;
@@ -288,10 +278,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
*/
static inline void ds_put_context(struct ds_context *context)
{
+ unsigned long irq;
+
if (!context)
return;
- spin_lock(&ds_lock);
+ spin_lock_irqsave(&ds_lock, irq);
if (--context->count)
goto out;
@@ -313,7 +305,7 @@ static inline void ds_put_context(struct ds_context *context)
kfree(context->ds);
kfree(context);
out:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
}
@@ -384,6 +376,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
struct ds_context *context;
unsigned long buffer, adj;
const unsigned long alignment = (1 << 3);
+ unsigned long irq;
int error = 0;
if (!ds_cfg.sizeof_ds)
@@ -398,26 +391,27 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
return -EOPNOTSUPP;
- spin_lock(&ds_lock);
-
- error = -ENOMEM;
context = ds_alloc_context(task);
if (!context)
- goto out_unlock;
+ return -ENOMEM;
+
+ spin_lock_irqsave(&ds_lock, irq);
error = -EPERM;
if (!check_tracer(task))
goto out_unlock;
+ get_tracer(task);
+
error = -EALREADY;
if (context->owner[qual] == current)
- goto out_unlock;
+ goto out_put_tracer;
error = -EPERM;
if (context->owner[qual] != NULL)
- goto out_unlock;
+ goto out_put_tracer;
context->owner[qual] = current;
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
error = -ENOMEM;
@@ -465,10 +459,17 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
out_release:
context->owner[qual] = NULL;
ds_put_context(context);
+ put_tracer(task);
+ return error;
+
+ out_put_tracer:
+ spin_unlock_irqrestore(&ds_lock, irq);
+ ds_put_context(context);
+ put_tracer(task);
return error;
out_unlock:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
ds_put_context(context);
return error;
}
@@ -818,13 +819,21 @@ static const struct ds_configuration ds_cfg_var = {
.sizeof_ds = sizeof(long) * 12,
.sizeof_field = sizeof(long),
.sizeof_rec[ds_bts] = sizeof(long) * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = sizeof(long) * 10
+#else
+ .sizeof_rec[ds_pebs] = sizeof(long) * 18
+#endif
};
static const struct ds_configuration ds_cfg_64 = {
.sizeof_ds = 8 * 12,
.sizeof_field = 8,
.sizeof_rec[ds_bts] = 8 * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = 8 * 10
+#else
+ .sizeof_rec[ds_pebs] = 8 * 18
+#endif
};
static inline void
@@ -838,17 +847,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
+ case 0 ... 0xC:
+ /* sorry, don't know about them */
+ break;
case 0xD:
case 0xE: /* Pentium M */
ds_configure(&ds_cfg_var);
break;
- case 0xF: /* Core2 */
- case 0x1C: /* Atom */
+ default: /* Core2, Atom, ... */
ds_configure(&ds_cfg_64);
break;
- default:
- /* sorry, don't know about them */
- break;
}
break;
case 0xF:
@@ -878,4 +886,3 @@ void ds_free(struct ds_context *context)
while (leftovers--)
ds_put_context(context);
}
-#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
new file mode 100644
index 000000000000..5962176dfabb
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/hardirq.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/kexec.h>
+#include <linux/bug.h>
+#include <linux/nmi.h>
+#include <linux/sysfs.h>
+
+#include <asm/stacktrace.h>
+
+#include "dumpstack.h"
+
+int panic_on_unrecovered_nmi;
+unsigned int code_bytes = 64;
+int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
+static int die_counter;
+
+void printk_address(unsigned long address, int reliable)
+{
+ printk(" [<%p>] %s%pS\n", (void *) address,
+ reliable ? "" : "? ", (void *) address);
+}
+
+/*
+ * x86-64 can have up to three kernel stacks:
+ * process stack
+ * interrupt stack
+ * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+ */
+
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+ void *p, unsigned int size, void *end)
+{
+ void *t = tinfo;
+ if (end) {
+ if (p < end && p >= (end-THREAD_SIZE))
+ return 1;
+ else
+ return 0;
+ }
+ return p > t && p < t + THREAD_SIZE - size;
+}
+
+unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end)
+{
+ struct stack_frame *frame = (struct stack_frame *)bp;
+
+ while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+ unsigned long addr;
+
+ addr = *stack;
+ if (__kernel_text_address(addr)) {
+ if ((unsigned long) stack == bp + sizeof(long)) {
+ ops->address(data, addr, 1);
+ frame = frame->next_frame;
+ bp = (unsigned long) frame;
+ } else {
+ ops->address(data, addr, bp == 0);
+ }
+ }
+ stack++;
+ }
+ return bp;
+}
+
+
+static void
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+ printk(data);
+ print_symbol(msg, symbol);
+ printk("\n");
+}
+
+static void print_trace_warning(void *data, char *msg)
+{
+ printk("%s%s\n", (char *)data, msg);
+}
+
+static int print_trace_stack(void *data, char *name)
+{
+ printk("%s <%s> ", (char *)data, name);
+ return 0;
+}
+
+/*
+ * Print one address/symbol entries per line.
+ */
+static void print_trace_address(void *data, unsigned long addr, int reliable)
+{
+ touch_nmi_watchdog();
+ printk(data);
+ printk_address(addr, reliable);
+}
+
+static const struct stacktrace_ops print_trace_ops = {
+ .warning = print_trace_warning,
+ .warning_symbol = print_trace_warning_symbol,
+ .stack = print_trace_stack,
+ .address = print_trace_address,
+};
+
+void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl)
+{
+ printk("%sCall Trace:\n", log_lvl);
+ dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+}
+
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp)
+{
+ show_trace_log_lvl(task, regs, stack, bp, "");
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+ show_stack_log_lvl(task, NULL, sp, 0, "");
+}
+
+/*
+ * The architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+ unsigned long bp = 0;
+ unsigned long stack;
+
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp)
+ get_bp(bp);
+#endif
+
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ current->pid, current->comm, print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+ show_trace(NULL, NULL, &stack, bp);
+}
+EXPORT_SYMBOL(dump_stack);
+
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+unsigned __kprobes long oops_begin(void)
+{
+ int cpu;
+ unsigned long flags;
+
+ oops_enter();
+
+ /* racy, but better than risking deadlock. */
+ raw_local_irq_save(flags);
+ cpu = smp_processor_id();
+ if (!__raw_spin_trylock(&die_lock)) {
+ if (cpu == die_owner)
+ /* nested oops. should stop eventually */;
+ else
+ __raw_spin_lock(&die_lock);
+ }
+ die_nest_count++;
+ die_owner = cpu;
+ console_verbose();
+ bust_spinlocks(1);
+ return flags;
+}
+
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+ if (regs && kexec_should_crash(current))
+ crash_kexec(regs);
+
+ bust_spinlocks(0);
+ die_owner = -1;
+ add_taint(TAINT_DIE);
+ die_nest_count--;
+ if (!die_nest_count)
+ /* Nest count reaches zero, release the lock. */
+ __raw_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
+ oops_exit();
+
+ if (!signr)
+ return;
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception");
+ do_exit(signr);
+}
+
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+{
+#ifdef CONFIG_X86_32
+ unsigned short ss;
+ unsigned long sp;
+#endif
+ printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+ sysfs_printk_last_file();
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+ return 1;
+
+ show_registers(regs);
+#ifdef CONFIG_X86_32
+ sp = (unsigned long) (&regs->sp);
+ savesegment(ss, ss);
+ if (user_mode(regs)) {
+ sp = regs->sp;
+ ss = regs->ss & 0xffff;
+ }
+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+ print_symbol("%s", regs->ip);
+ printk(" SS:ESP %04x:%08lx\n", ss, sp);
+#else
+ /* Executive summary in case the oops scrolled away */
+ printk(KERN_ALERT "RIP ");
+ printk_address(regs->ip, 1);
+ printk(" RSP <%016lx>\n", regs->sp);
+#endif
+ return 0;
+}
+
+/*
+ * This is gone through when something in the kernel has done something bad
+ * and is about to be terminated:
+ */
+void die(const char *str, struct pt_regs *regs, long err)
+{
+ unsigned long flags = oops_begin();
+ int sig = SIGSEGV;
+
+ if (!user_mode_vm(regs))
+ report_bug(regs->ip, regs);
+
+ if (__die(str, regs, err))
+ sig = 0;
+ oops_end(flags, regs, sig);
+}
+
+void notrace __kprobes
+die_nmi(char *str, struct pt_regs *regs, int do_panic)
+{
+ unsigned long flags;
+
+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+ return;
+
+ /*
+ * We are in trouble anyway, lets at least try
+ * to get a message out.
+ */
+ flags = oops_begin();
+ printk(KERN_EMERG "%s", str);
+ printk(" on CPU%d, ip %08lx, registers:\n",
+ smp_processor_id(), regs->ip);
+ show_registers(regs);
+ oops_end(flags, regs, 0);
+ if (do_panic || panic_on_oops)
+ panic("Non maskable interrupt");
+ nmi_exit();
+ local_irq_enable();
+ do_exit(SIGBUS);
+}
+
+static int __init oops_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ if (!strcmp(s, "panic"))
+ panic_on_oops = 1;
+ return 0;
+}
+early_param("oops", oops_setup);
+
+static int __init kstack_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+ return 0;
+}
+early_param("kstack", kstack_setup);
+
+static int __init code_bytes_setup(char *s)
+{
+ code_bytes = simple_strtoul(s, NULL, 0);
+ if (code_bytes > 8192)
+ code_bytes = 8192;
+
+ return 1;
+}
+__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
new file mode 100644
index 000000000000..3119a801c32b
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+
+#ifndef DUMPSTACK_H
+#define DUMPSTACK_H
+
+#ifdef CONFIG_X86_32
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+#else
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+#endif
+
+extern unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end);
+
+extern void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl);
+
+extern void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp, char *log_lvl);
+
+extern unsigned int code_bytes;
+extern int kstack_depth_to_print;
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+ struct stack_frame *next_frame;
+ unsigned long return_address;
+};
+#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b3614752197b..7b031b106ec8 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,64 +17,7 @@
#include <asm/stacktrace.h>
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
-static unsigned int code_bytes = 64;
-static int die_counter;
-
-void printk_address(unsigned long address, int reliable)
-{
- printk(" [<%p>] %s%pS\n", (void *) address,
- reliable ? "" : "? ", (void *) address);
-}
-
-static inline int valid_stack_ptr(struct thread_info *tinfo,
- void *p, unsigned int size, void *end)
-{
- void *t = tinfo;
- if (end) {
- if (p < end && p >= (end-THREAD_SIZE))
- return 1;
- else
- return 0;
- }
- return p > t && p < t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
-
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
- unsigned long addr;
-
- addr = *stack;
- if (__kernel_text_address(addr)) {
- if ((unsigned long) stack == bp + sizeof(long)) {
- ops->address(data, addr, 1);
- frame = frame->next_frame;
- bp = (unsigned long) frame;
- } else {
- ops->address(data, addr, bp == 0);
- }
- }
- stack++;
- }
- return bp;
-}
+#include "dumpstack.h"
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
@@ -119,57 +62,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
}
EXPORT_SYMBOL(dump_trace);
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- printk(data);
- print_symbol(msg, symbol);
- printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
- printk("%s%s\n", (char *)data, msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
- printk("%s <%s> ", (char *)data, name);
- return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
- touch_nmi_watchdog();
- printk(data);
- printk_address(addr, reliable);
-}
-
-static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
-};
-
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
-{
- printk("%sCall Trace:\n", log_lvl);
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
-{
- show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
-static void
+void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
{
@@ -196,33 +89,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- show_stack_log_lvl(task, NULL, sp, 0, "");
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long bp = 0;
- unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- get_bp(bp);
-#endif
-
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- show_trace(NULL, NULL, &stack, bp);
-}
-
-EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
{
@@ -283,167 +149,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- unsigned long flags;
-
- oops_enter();
-
- if (die_owner != raw_smp_processor_id()) {
- console_verbose();
- raw_local_irq_save(flags);
- __raw_spin_lock(&die_lock);
- die_owner = smp_processor_id();
- die_nest_count = 0;
- bust_spinlocks(1);
- } else {
- raw_local_irq_save(flags);
- }
- die_nest_count++;
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
- bust_spinlocks(0);
- die_owner = -1;
- add_taint(TAINT_DIE);
- __raw_spin_unlock(&die_lock);
- raw_local_irq_restore(flags);
-
- if (!regs)
- return;
-
- if (kexec_should_crash(current))
- crash_kexec(regs);
- if (in_interrupt())
- panic("Fatal exception in interrupt");
- if (panic_on_oops)
- panic("Fatal exception");
- oops_exit();
- do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned short ss;
- unsigned long sp;
-
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- sysfs_printk_last_file();
- if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
- return 1;
-
- show_registers(regs);
- /* Executive summary in case the oops scrolled away */
- sp = (unsigned long) (&regs->sp);
- savesegment(ss, ss);
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss & 0xffff;
- }
- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
- print_symbol("%s", regs->ip);
- printk(" SS:ESP %04x:%08lx\n", ss, sp);
- return 0;
-}
-
-/*
- * This is gone through when something in the kernel has done something bad
- * and is about to be terminated:
- */
-void die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned long flags = oops_begin();
-
- if (die_nest_count < 3) {
- report_bug(regs->ip, regs);
-
- if (__die(str, regs, err))
- regs = NULL;
- } else {
- printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
- }
-
- oops_end(flags, regs, SIGSEGV);
-}
-
-static DEFINE_SPINLOCK(nmi_print_lock);
-
-void notrace __kprobes
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- spin_lock(&nmi_print_lock);
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out:
- */
- bust_spinlocks(1);
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- if (do_panic)
- panic("Non maskable interrupt");
- console_silent();
- spin_unlock(&nmi_print_lock);
-
- /*
- * If we are in kernel we are probably nested up pretty bad
- * and might aswell get out now while we still can:
- */
- if (!user_mode_vm(regs)) {
- current->thread.trap_no = 2;
- crash_kexec(regs);
- }
-
- bust_spinlocks(0);
- do_exit(SIGSEGV);
-}
-
-static int __init oops_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- if (!strcmp(s, "panic"))
- panic_on_oops = 1;
- return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
-}
-early_param("kstack", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
- code_bytes = simple_strtoul(s, NULL, 0);
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 96a5db7da8a7..33ff10287a5d 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -17,19 +17,7 @@
#include <asm/stacktrace.h>
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
-static unsigned int code_bytes = 64;
-static int die_counter;
-
-void printk_address(unsigned long address, int reliable)
-{
- printk(" [<%p>] %s%pS\n", (void *) address,
- reliable ? "" : "? ", (void *) address);
-}
+#include "dumpstack.h"
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
-static inline int valid_stack_ptr(struct thread_info *tinfo,
- void *p, unsigned int size, void *end)
-{
- void *t = tinfo;
- if (end) {
- if (p < end && p >= (end-THREAD_SIZE))
- return 1;
- else
- return 0;
- }
- return p > t && p < t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
-
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
- unsigned long addr;
-
- addr = *stack;
- if (__kernel_text_address(addr)) {
- if ((unsigned long) stack == bp + sizeof(long)) {
- ops->address(data, addr, 1);
- frame = frame->next_frame;
- bp = (unsigned long) frame;
- } else {
- ops->address(data, addr, bp == 0);
- }
- }
- stack++;
- }
- return bp;
-}
-
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
@@ -248,57 +191,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
}
EXPORT_SYMBOL(dump_trace);
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- printk(data);
- print_symbol(msg, symbol);
- printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
- printk("%s%s\n", (char *)data, msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
- printk("%s <%s> ", (char *)data, name);
- return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
- touch_nmi_watchdog();
- printk(data);
- printk_address(addr, reliable);
-}
-
-static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
-};
-
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
-{
- printk("%sCall Trace:\n", log_lvl);
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
-{
- show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
-static void
+void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
{
@@ -342,33 +235,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- show_stack_log_lvl(task, NULL, sp, 0, "");
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long bp = 0;
- unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- get_bp(bp);
-#endif
-
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- show_trace(NULL, NULL, &stack, bp);
-}
-EXPORT_SYMBOL(dump_stack);
-
void show_registers(struct pt_regs *regs)
{
int i;
@@ -429,147 +295,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- int cpu;
- unsigned long flags;
-
- oops_enter();
-
- /* racy, but better than risking deadlock. */
- raw_local_irq_save(flags);
- cpu = smp_processor_id();
- if (!__raw_spin_trylock(&die_lock)) {
- if (cpu == die_owner)
- /* nested oops. should stop eventually */;
- else
- __raw_spin_lock(&die_lock);
- }
- die_nest_count++;
- die_owner = cpu;
- console_verbose();
- bust_spinlocks(1);
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
- die_owner = -1;
- bust_spinlocks(0);
- die_nest_count--;
- if (!die_nest_count)
- /* Nest count reaches zero, release the lock. */
- __raw_spin_unlock(&die_lock);
- raw_local_irq_restore(flags);
- if (!regs) {
- oops_exit();
- return;
- }
- if (in_interrupt())
- panic("Fatal exception in interrupt");
- if (panic_on_oops)
- panic("Fatal exception");
- oops_exit();
- do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- sysfs_printk_last_file();
- if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
- return 1;
-
- show_registers(regs);
- add_taint(TAINT_DIE);
- /* Executive summary in case the oops scrolled away */
- printk(KERN_ALERT "RIP ");
- printk_address(regs->ip, 1);
- printk(" RSP <%016lx>\n", regs->sp);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- return 0;
-}
-
-void die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned long flags = oops_begin();
-
- if (!user_mode(regs))
- report_bug(regs->ip, regs);
-
- if (__die(str, regs, err))
- regs = NULL;
- oops_end(flags, regs, SIGSEGV);
-}
-
-notrace __kprobes void
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- unsigned long flags;
-
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- flags = oops_begin();
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- if (do_panic || panic_on_oops)
- panic("Non maskable interrupt");
- oops_end(flags, NULL, SIGBUS);
- nmi_exit();
- local_irq_enable();
- do_exit(SIGBUS);
-}
-
-static int __init oops_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- if (!strcmp(s, "panic"))
- panic_on_oops = 1;
- return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
-}
-early_param("kstack", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
- code_bytes = simple_strtoul(s, NULL, 0);
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..7a934c0dfea6 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -619,28 +619,37 @@ END(syscall_badsys)
27:;
/*
- * Build the entry stubs and pointer table with
- * some assembler magic.
+ * Build the entry stubs and pointer table with some assembler magic.
+ * We pack 7 stubs into a single 32-byte chunk, which will fit in a
+ * single cache line on all modern x86 implementations.
*/
-.section .rodata,"a"
+.section .init.rodata,"a"
ENTRY(interrupt)
.text
-
+ .p2align 5
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
RING0_INT_FRAME
-vector=0
-.rept NR_VECTORS
- ALIGN
- .if vector
+vector=FIRST_EXTERNAL_VECTOR
+.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+ .balign 32
+ .rept 7
+ .if vector < NR_VECTORS
+ .if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -4
- .endif
-1: pushl $~(vector)
+ .endif
+1: pushl $(~vector+0x80) /* Note: always in signed byte range */
CFI_ADJUST_CFA_OFFSET 4
- jmp common_interrupt
- .previous
+ .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
+ jmp 2f
+ .endif
+ .previous
.long 1b
- .text
+ .text
vector=vector+1
+ .endif
+ .endr
+2: jmp common_interrupt
.endr
END(irq_entries_start)
@@ -652,8 +661,9 @@ END(interrupt)
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
*/
- ALIGN
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
+ addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
SAVE_ALL
TRACE_IRQS_OFF
movl %esp,%eax
@@ -1051,6 +1061,7 @@ ENTRY(kernel_thread_helper)
push %eax
CFI_ADJUST_CFA_OFFSET 4
call do_exit
+ ud2 # padding for call trace
CFI_ENDPROC
ENDPROC(kernel_thread_helper)
@@ -1157,6 +1168,9 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
pushl %eax
pushl %ecx
pushl %edx
@@ -1180,8 +1194,15 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
cmpl $ftrace_stub, ftrace_trace_function
jnz trace
+#ifdef CONFIG_FUNCTION_RET_TRACER
+ cmpl $ftrace_stub, ftrace_function_return
+ jnz ftrace_return_caller
+#endif
.globl ftrace_stub
ftrace_stub:
ret
@@ -1200,12 +1221,42 @@ trace:
popl %edx
popl %ecx
popl %eax
-
jmp ftrace_stub
END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
+#ifdef CONFIG_FUNCTION_RET_TRACER
+ENTRY(ftrace_return_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %edx
+ lea 0x4(%ebp), %eax
+ call prepare_ftrace_return
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+END(ftrace_return_caller)
+
+.globl return_to_handler
+return_to_handler:
+ pushl $0
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ call ftrace_return_to_handler
+ movl %eax, 0xc(%esp)
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+#endif
+
.section .rodata,"a"
#include "syscall_table_32.S"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..64688dbc24cc 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -11,15 +11,15 @@
*
* NOTE: This code handles signal-recognition, which happens every time
* after an interrupt and after each system call.
- *
- * Normal syscalls and interrupts don't save a full stack frame, this is
+ *
+ * Normal syscalls and interrupts don't save a full stack frame, this is
* only done for syscall tracing, signals or fork/exec et.al.
- *
- * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
- * at the top of the kernel process stack.
+ *
+ * A note on terminology:
+ * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * at the top of the kernel process stack.
* - partial stack frame: partially saved registers upto R11.
- * - full stack frame: Like partial stack frame, but all register saved.
+ * - full stack frame: Like partial stack frame, but all register saved.
*
* Some macro usage:
* - CFI macros are used to generate dwarf2 unwind information for better
@@ -60,7 +60,6 @@
#define __AUDIT_ARCH_LE 0x40000000
.code64
-
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
@@ -68,6 +67,8 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
/* taken from glibc */
subq $0x38, %rsp
@@ -103,6 +104,9 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
cmpq $ftrace_stub, ftrace_trace_function
jnz trace
.globl ftrace_stub
@@ -142,7 +146,7 @@ END(mcount)
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
-#endif
+#endif
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret64)
@@ -161,29 +165,29 @@ ENTRY(native_usergs_sysret64)
.endm
/*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
+ * C code is not supposed to know about undefined top of stack. Every time
+ * a C function with an pt_regs argument is called from the SYSCALL based
* fast path FIXUP_TOP_OF_STACK is needed.
* RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
* manipulation.
- */
-
- /* %rsp:at FRAMEEND */
- .macro FIXUP_TOP_OF_STACK tmp
- movq %gs:pda_oldrsp,\tmp
- movq \tmp,RSP(%rsp)
- movq $__USER_DS,SS(%rsp)
- movq $__USER_CS,CS(%rsp)
- movq $-1,RCX(%rsp)
- movq R11(%rsp),\tmp /* get eflags */
- movq \tmp,EFLAGS(%rsp)
+ */
+
+ /* %rsp:at FRAMEEND */
+ .macro FIXUP_TOP_OF_STACK tmp offset=0
+ movq %gs:pda_oldrsp,\tmp
+ movq \tmp,RSP+\offset(%rsp)
+ movq $__USER_DS,SS+\offset(%rsp)
+ movq $__USER_CS,CS+\offset(%rsp)
+ movq $-1,RCX+\offset(%rsp)
+ movq R11+\offset(%rsp),\tmp /* get eflags */
+ movq \tmp,EFLAGS+\offset(%rsp)
.endm
- .macro RESTORE_TOP_OF_STACK tmp,offset=0
- movq RSP-\offset(%rsp),\tmp
- movq \tmp,%gs:pda_oldrsp
- movq EFLAGS-\offset(%rsp),\tmp
- movq \tmp,R11-\offset(%rsp)
+ .macro RESTORE_TOP_OF_STACK tmp offset=0
+ movq RSP+\offset(%rsp),\tmp
+ movq \tmp,%gs:pda_oldrsp
+ movq EFLAGS+\offset(%rsp),\tmp
+ movq \tmp,R11+\offset(%rsp)
.endm
.macro FAKE_STACK_FRAME child_rip
@@ -213,41 +217,160 @@ ENTRY(native_usergs_sysret64)
CFI_ADJUST_CFA_OFFSET -(6*8)
.endm
- .macro CFI_DEFAULT_STACK start=1
+/*
+ * initial frame state for interrupts (and exceptions without error code)
+ */
+ .macro EMPTY_FRAME start=1 offset=0
.if \start
- CFI_STARTPROC simple
+ CFI_STARTPROC simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8
+ CFI_DEF_CFA rsp,8+\offset
.else
- CFI_DEF_CFA_OFFSET SS+8
+ CFI_DEF_CFA_OFFSET 8+\offset
.endif
- CFI_REL_OFFSET r15,R15
- CFI_REL_OFFSET r14,R14
- CFI_REL_OFFSET r13,R13
- CFI_REL_OFFSET r12,R12
- CFI_REL_OFFSET rbp,RBP
- CFI_REL_OFFSET rbx,RBX
- CFI_REL_OFFSET r11,R11
- CFI_REL_OFFSET r10,R10
- CFI_REL_OFFSET r9,R9
- CFI_REL_OFFSET r8,R8
- CFI_REL_OFFSET rax,RAX
- CFI_REL_OFFSET rcx,RCX
- CFI_REL_OFFSET rdx,RDX
- CFI_REL_OFFSET rsi,RSI
- CFI_REL_OFFSET rdi,RDI
- CFI_REL_OFFSET rip,RIP
- /*CFI_REL_OFFSET cs,CS*/
- /*CFI_REL_OFFSET rflags,EFLAGS*/
- CFI_REL_OFFSET rsp,RSP
- /*CFI_REL_OFFSET ss,SS*/
.endm
+
+/*
+ * initial frame state for interrupts (and exceptions without error code)
+ */
+ .macro INTR_FRAME start=1 offset=0
+ EMPTY_FRAME \start, SS+8+\offset-RIP
+ /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
+ CFI_REL_OFFSET rsp, RSP+\offset-RIP
+ /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
+ /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
+ CFI_REL_OFFSET rip, RIP+\offset-RIP
+ .endm
+
+/*
+ * initial frame state for exceptions with error code (and interrupts
+ * with vector already pushed)
+ */
+ .macro XCPT_FRAME start=1 offset=0
+ INTR_FRAME \start, RIP+\offset-ORIG_RAX
+ /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
+ .endm
+
+/*
+ * frame that enables calling into C.
+ */
+ .macro PARTIAL_FRAME start=1 offset=0
+ XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
+ CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
+ CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
+ CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
+ CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
+ CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
+ CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
+ CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
+ CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
+ CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+ .endm
+
+/*
+ * frame that enables passing a complete pt_regs to a C function.
+ */
+ .macro DEFAULT_FRAME start=1 offset=0
+ PARTIAL_FRAME \start, R11+\offset-R15
+ CFI_REL_OFFSET rbx, RBX+\offset
+ CFI_REL_OFFSET rbp, RBP+\offset
+ CFI_REL_OFFSET r12, R12+\offset
+ CFI_REL_OFFSET r13, R13+\offset
+ CFI_REL_OFFSET r14, R14+\offset
+ CFI_REL_OFFSET r15, R15+\offset
+ .endm
+
+/* save partial stack frame */
+ENTRY(save_args)
+ XCPT_FRAME
+ cld
+ movq_cfi rdi, RDI+16-ARGOFFSET
+ movq_cfi rsi, RSI+16-ARGOFFSET
+ movq_cfi rdx, RDX+16-ARGOFFSET
+ movq_cfi rcx, RCX+16-ARGOFFSET
+ movq_cfi rax, RAX+16-ARGOFFSET
+ movq_cfi r8, R8+16-ARGOFFSET
+ movq_cfi r9, R9+16-ARGOFFSET
+ movq_cfi r10, R10+16-ARGOFFSET
+ movq_cfi r11, R11+16-ARGOFFSET
+
+ leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
+ movq_cfi rbp, 8 /* push %rbp */
+ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
+ testl $3, CS(%rdi)
+ je 1f
+ SWAPGS
+ /*
+ * irqcount is used to check if a CPU is already on an interrupt stack
+ * or not. While this is essentially redundant with preempt_count it is
+ * a little cheaper to use a separate counter in the PDA (short of
+ * moving irq_enter into assembly, which would be too much work)
+ */
+1: incl %gs:pda_irqcount
+ jne 2f
+ popq_cfi %rax /* move return address... */
+ mov %gs:pda_irqstackptr,%rsp
+ EMPTY_FRAME 0
+ pushq_cfi %rax /* ... to the new stack */
+ /*
+ * We entered an interrupt context - irqs are off:
+ */
+2: TRACE_IRQS_OFF
+ ret
+ CFI_ENDPROC
+END(save_args)
+
+ENTRY(save_rest)
+ PARTIAL_FRAME 1 REST_SKIP+8
+ movq 5*8+16(%rsp), %r11 /* save return address */
+ movq_cfi rbx, RBX+16
+ movq_cfi rbp, RBP+16
+ movq_cfi r12, R12+16
+ movq_cfi r13, R13+16
+ movq_cfi r14, R14+16
+ movq_cfi r15, R15+16
+ movq %r11, 8(%rsp) /* return address */
+ FIXUP_TOP_OF_STACK %r11, 16
+ ret
+ CFI_ENDPROC
+END(save_rest)
+
+/* save complete stack frame */
+ENTRY(save_paranoid)
+ XCPT_FRAME 1 RDI+8
+ cld
+ movq_cfi rdi, RDI+8
+ movq_cfi rsi, RSI+8
+ movq_cfi rdx, RDX+8
+ movq_cfi rcx, RCX+8
+ movq_cfi rax, RAX+8
+ movq_cfi r8, R8+8
+ movq_cfi r9, R9+8
+ movq_cfi r10, R10+8
+ movq_cfi r11, R11+8
+ movq_cfi rbx, RBX+8
+ movq_cfi rbp, RBP+8
+ movq_cfi r12, R12+8
+ movq_cfi r13, R13+8
+ movq_cfi r14, R14+8
+ movq_cfi r15, R15+8
+ movl $1,%ebx
+ movl $MSR_GS_BASE,%ecx
+ rdmsr
+ testl %edx,%edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
+ xorl %ebx,%ebx
+1: ret
+ CFI_ENDPROC
+END(save_paranoid)
+
/*
* A newly forked process directly context switches into this.
- */
-/* rdi: prev */
+ */
+/* rdi: prev */
ENTRY(ret_from_fork)
- CFI_DEFAULT_STACK
+ DEFAULT_FRAME
push kernel_eflags(%rip)
CFI_ADJUST_CFA_OFFSET 8
popf # reset kernel eflags
@@ -255,19 +378,21 @@ ENTRY(ret_from_fork)
call schedule_tail
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+ CFI_REMEMBER_STATE
jnz rff_trace
-rff_action:
+rff_action:
RESTORE_REST
testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
je int_ret_from_sys_call
testl $_TIF_IA32,TI_flags(%rcx)
jnz int_ret_from_sys_call
- RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
+ RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call
+ CFI_RESTORE_STATE
rff_trace:
movq %rsp,%rdi
call syscall_trace_leave
- GET_THREAD_INFO(%rcx)
+ GET_THREAD_INFO(%rcx)
jmp rff_action
CFI_ENDPROC
END(ret_from_fork)
@@ -278,20 +403,20 @@ END(ret_from_fork)
* SYSCALL does not save anything on the stack and does not change the
* stack pointer.
*/
-
+
/*
- * Register setup:
+ * Register setup:
* rax system call number
* rdi arg0
- * rcx return address for syscall/sysret, C arg3
+ * rcx return address for syscall/sysret, C arg3
* rsi arg1
- * rdx arg2
+ * rdx arg2
* r10 arg3 (--> moved to rcx for C)
* r8 arg4
* r9 arg5
* r11 eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
- *
+ * r12-r15,rbp,rbx saved by C code, not touched.
+ *
* Interrupts are off on entry.
* Only called from user space.
*
@@ -301,7 +426,7 @@ END(ret_from_fork)
* When user can change the frames always force IRET. That is because
* it deals with uncanonical addresses better. SYSRET has trouble
* with them due to bugs in both AMD and Intel CPUs.
- */
+ */
ENTRY(system_call)
CFI_STARTPROC simple
@@ -317,7 +442,7 @@ ENTRY(system_call)
*/
ENTRY(system_call_after_swapgs)
- movq %rsp,%gs:pda_oldrsp
+ movq %rsp,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
/*
* No need to follow this irqs off/on section - it's straight
@@ -325,7 +450,7 @@ ENTRY(system_call_after_swapgs)
*/
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_ARGS 8,1
- movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
+ movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
@@ -339,19 +464,19 @@ system_call_fastpath:
movq %rax,RAX-ARGOFFSET(%rsp)
/*
* Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
- */
+ * Has incomplete stack frame and undefined top of stack.
+ */
ret_from_sys_call:
movl $_TIF_ALLWORK_MASK,%edi
/* edi: flagmask */
-sysret_check:
+sysret_check:
LOCKDEP_SYS_EXIT
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
movl TI_flags(%rcx),%edx
andl %edi,%edx
- jnz sysret_careful
+ jnz sysret_careful
CFI_REMEMBER_STATE
/*
* sysretq will re-enable interrupts:
@@ -366,7 +491,7 @@ sysret_check:
CFI_RESTORE_STATE
/* Handle reschedules */
- /* edx: work, edi: workmask */
+ /* edx: work, edi: workmask */
sysret_careful:
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
@@ -379,7 +504,7 @@ sysret_careful:
CFI_ADJUST_CFA_OFFSET -8
jmp sysret_check
- /* Handle a signal */
+ /* Handle a signal */
sysret_signal:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
@@ -388,17 +513,20 @@ sysret_signal:
jc sysret_audit
#endif
/* edx: work flags (arg3) */
- leaq do_notify_resume(%rip),%rax
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
xorl %esi,%esi # oldset -> arg2
- call ptregscall_common
+ SAVE_REST
+ FIXUP_TOP_OF_STACK %r11
+ call do_notify_resume
+ RESTORE_TOP_OF_STACK %r11
+ RESTORE_REST
movl $_TIF_WORK_MASK,%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
-
+
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
jmp ret_from_sys_call
@@ -437,7 +565,7 @@ sysret_audit:
#endif /* CONFIG_AUDITSYSCALL */
/* Do syscall tracing */
-tracesys:
+tracesys:
#ifdef CONFIG_AUDITSYSCALL
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
jz auditsys
@@ -460,8 +588,8 @@ tracesys:
call *sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
-
-/*
+
+/*
* Syscall return path ending with IRET.
* Has correct top of stack, but partial stack frame.
*/
@@ -505,18 +633,18 @@ int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
- /* Check for syscall exit trace */
+ /* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
- leaq 8(%rsp),%rdi # &ptregs -> arg1
+ leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
popq %rdi
CFI_ADJUST_CFA_OFFSET -8
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
jmp int_restore_rest
-
+
int_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz 1f
@@ -531,22 +659,24 @@ int_restore_rest:
jmp int_with_check
CFI_ENDPROC
END(system_call)
-
-/*
+
+/*
* Certain special system calls that need to save a complete full stack frame.
- */
-
+ */
.macro PTREGSCALL label,func,arg
- .globl \label
-\label:
- leaq \func(%rip),%rax
- leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
- jmp ptregscall_common
+ENTRY(\label)
+ PARTIAL_FRAME 1 8 /* offset 8: return address */
+ subq $REST_SKIP, %rsp
+ CFI_ADJUST_CFA_OFFSET REST_SKIP
+ call save_rest
+ DEFAULT_FRAME 0 8 /* offset 8: return address */
+ leaq 8(%rsp), \arg /* pt_regs pointer */
+ call \func
+ jmp ptregscall_common
+ CFI_ENDPROC
END(\label)
.endm
- CFI_STARTPROC
-
PTREGSCALL stub_clone, sys_clone, %r8
PTREGSCALL stub_fork, sys_fork, %rdi
PTREGSCALL stub_vfork, sys_vfork, %rdi
@@ -554,25 +684,18 @@ END(\label)
PTREGSCALL stub_iopl, sys_iopl, %rsi
ENTRY(ptregscall_common)
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
- SAVE_REST
- movq %r11, %r15
- CFI_REGISTER rip, r15
- FIXUP_TOP_OF_STACK %r11
- call *%rax
- RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- CFI_REGISTER rip, r11
- RESTORE_REST
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip, 0
- ret
+ DEFAULT_FRAME 1 8 /* offset 8: return address */
+ RESTORE_TOP_OF_STACK %r11, 8
+ movq_cfi_restore R15+8, r15
+ movq_cfi_restore R14+8, r14
+ movq_cfi_restore R13+8, r13
+ movq_cfi_restore R12+8, r12
+ movq_cfi_restore RBP+8, rbp
+ movq_cfi_restore RBX+8, rbx
+ ret $REST_SKIP /* pop extended registers */
CFI_ENDPROC
END(ptregscall_common)
-
+
ENTRY(stub_execve)
CFI_STARTPROC
popq %r11
@@ -588,11 +711,11 @@ ENTRY(stub_execve)
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_execve)
-
+
/*
* sigreturn is special because it needs to restore all registers on return.
* This cannot be done with SYSRET, so use the IRET return path instead.
- */
+ */
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
@@ -608,70 +731,70 @@ ENTRY(stub_rt_sigreturn)
END(stub_rt_sigreturn)
/*
- * initial frame state for interrupts and exceptions
+ * Build the entry stubs and pointer table with some assembler magic.
+ * We pack 7 stubs into a single 32-byte chunk, which will fit in a
+ * single cache line on all modern x86 implementations.
*/
- .macro _frame ref
- CFI_STARTPROC simple
- CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8-\ref
- /*CFI_REL_OFFSET ss,SS-\ref*/
- CFI_REL_OFFSET rsp,RSP-\ref
- /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
- /*CFI_REL_OFFSET cs,CS-\ref*/
- CFI_REL_OFFSET rip,RIP-\ref
- .endm
+ .section .init.rodata,"a"
+ENTRY(interrupt)
+ .text
+ .p2align 5
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+ENTRY(irq_entries_start)
+ INTR_FRAME
+vector=FIRST_EXTERNAL_VECTOR
+.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+ .balign 32
+ .rept 7
+ .if vector < NR_VECTORS
+ .if vector <> FIRST_EXTERNAL_VECTOR
+ CFI_ADJUST_CFA_OFFSET -8
+ .endif
+1: pushq $(~vector+0x80) /* Note: always in signed byte range */
+ CFI_ADJUST_CFA_OFFSET 8
+ .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
+ jmp 2f
+ .endif
+ .previous
+ .quad 1b
+ .text
+vector=vector+1
+ .endif
+ .endr
+2: jmp common_interrupt
+.endr
+ CFI_ENDPROC
+END(irq_entries_start)
-/* initial frame state for interrupts (and exceptions without error code) */
-#define INTR_FRAME _frame RIP
-/* initial frame state for exceptions with error code (and interrupts with
- vector already pushed) */
-#define XCPT_FRAME _frame ORIG_RAX
+.previous
+END(interrupt)
+.previous
-/*
+/*
* Interrupt entry/exit.
*
* Interrupt entry points save only callee clobbered registers in fast path.
- *
- * Entry runs with interrupts off.
- */
+ *
+ * Entry runs with interrupts off.
+ */
-/* 0(%rsp): interrupt number */
+/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
- cld
- SAVE_ARGS
- leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
- pushq %rbp
- /*
- * Save rbp twice: One is for marking the stack frame, as usual, and the
- * other, to fill pt_regs properly. This is because bx comes right
- * before the last saved register in that structure, and not bp. If the
- * base pointer were in the place bx is today, this would not be needed.
- */
- movq %rbp, -8(%rsp)
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rbp, 0
- movq %rsp,%rbp
- CFI_DEF_CFA_REGISTER rbp
- testl $3,CS(%rdi)
- je 1f
- SWAPGS
- /* irqcount is used to check if a CPU is already on an interrupt
- stack or not. While this is essentially redundant with preempt_count
- it is a little cheaper to use a separate counter in the PDA
- (short of moving irq_enter into assembly, which would be too
- much work) */
-1: incl %gs:pda_irqcount
- cmoveq %gs:pda_irqstackptr,%rsp
- push %rbp # backlink for old unwinder
- /*
- * We entered an interrupt context - irqs are off:
- */
- TRACE_IRQS_OFF
+ subq $10*8, %rsp
+ CFI_ADJUST_CFA_OFFSET 10*8
+ call save_args
+ PARTIAL_FRAME 0
call \func
.endm
-ENTRY(common_interrupt)
+ /*
+ * The interrupt stubs push (~vector+0x80) onto the stack and
+ * then jump to common_interrupt.
+ */
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+common_interrupt:
XCPT_FRAME
+ addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
/* 0(%rsp): oldrsp-ARGOFFSET */
ret_from_intr:
@@ -685,12 +808,12 @@ exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
je retint_kernel
-
+
/* Interrupt came from user space */
/*
* Has a correct top of stack, but a partial stack frame
* %rcx: thread info. Interrupts off.
- */
+ */
retint_with_reschedule:
movl $_TIF_WORK_MASK,%edi
retint_check:
@@ -763,20 +886,20 @@ retint_careful:
pushq %rdi
CFI_ADJUST_CFA_OFFSET 8
call schedule
- popq %rdi
+ popq %rdi
CFI_ADJUST_CFA_OFFSET -8
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp retint_check
-
+
retint_signal:
testl $_TIF_DO_NOTIFY_MASK,%edx
jz retint_swapgs
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
SAVE_REST
- movq $-1,ORIG_RAX(%rsp)
+ movq $-1,ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
@@ -798,139 +921,203 @@ ENTRY(retint_kernel)
jnc retint_restore_args
call preempt_schedule_irq
jmp exit_intr
-#endif
+#endif
CFI_ENDPROC
END(common_interrupt)
-
+
/*
* APIC interrupts.
- */
- .macro apicinterrupt num,func
+ */
+.macro apicinterrupt num sym do_sym
+ENTRY(\sym)
INTR_FRAME
pushq $~(\num)
CFI_ADJUST_CFA_OFFSET 8
- interrupt \func
+ interrupt \do_sym
jmp ret_from_intr
CFI_ENDPROC
- .endm
-
-ENTRY(thermal_interrupt)
- apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
-END(thermal_interrupt)
-
-ENTRY(threshold_interrupt)
- apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
-END(threshold_interrupt)
-
-#ifdef CONFIG_SMP
-ENTRY(reschedule_interrupt)
- apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
-END(reschedule_interrupt)
-
- .macro INVALIDATE_ENTRY num
-ENTRY(invalidate_interrupt\num)
- apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
-END(invalidate_interrupt\num)
- .endm
+END(\sym)
+.endm
- INVALIDATE_ENTRY 0
- INVALIDATE_ENTRY 1
- INVALIDATE_ENTRY 2
- INVALIDATE_ENTRY 3
- INVALIDATE_ENTRY 4
- INVALIDATE_ENTRY 5
- INVALIDATE_ENTRY 6
- INVALIDATE_ENTRY 7
-
-ENTRY(call_function_interrupt)
- apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
-END(call_function_interrupt)
-ENTRY(call_function_single_interrupt)
- apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
-END(call_function_single_interrupt)
-ENTRY(irq_move_cleanup_interrupt)
- apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
-END(irq_move_cleanup_interrupt)
+#ifdef CONFIG_SMP
+apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
+ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
#endif
-ENTRY(apic_timer_interrupt)
- apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
-END(apic_timer_interrupt)
+apicinterrupt 220 \
+ uv_bau_message_intr1 uv_bau_message_interrupt
+apicinterrupt LOCAL_TIMER_VECTOR \
+ apic_timer_interrupt smp_apic_timer_interrupt
+
+#ifdef CONFIG_SMP
+apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
+ invalidate_interrupt0 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
+ invalidate_interrupt1 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
+ invalidate_interrupt2 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
+ invalidate_interrupt3 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
+ invalidate_interrupt4 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
+ invalidate_interrupt5 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
+ invalidate_interrupt6 smp_invalidate_interrupt
+apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
+ invalidate_interrupt7 smp_invalidate_interrupt
+#endif
-ENTRY(uv_bau_message_intr1)
- apicinterrupt 220,uv_bau_message_interrupt
-END(uv_bau_message_intr1)
+apicinterrupt THRESHOLD_APIC_VECTOR \
+ threshold_interrupt mce_threshold_interrupt
+apicinterrupt THERMAL_APIC_VECTOR \
+ thermal_interrupt smp_thermal_interrupt
+
+#ifdef CONFIG_SMP
+apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
+ call_function_single_interrupt smp_call_function_single_interrupt
+apicinterrupt CALL_FUNCTION_VECTOR \
+ call_function_interrupt smp_call_function_interrupt
+apicinterrupt RESCHEDULE_VECTOR \
+ reschedule_interrupt smp_reschedule_interrupt
+#endif
-ENTRY(error_interrupt)
- apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
-END(error_interrupt)
+apicinterrupt ERROR_APIC_VECTOR \
+ error_interrupt smp_error_interrupt
+apicinterrupt SPURIOUS_APIC_VECTOR \
+ spurious_interrupt smp_spurious_interrupt
-ENTRY(spurious_interrupt)
- apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
-END(spurious_interrupt)
-
/*
* Exception entry points.
- */
- .macro zeroentry sym
+ */
+.macro zeroentry sym do_sym
+ENTRY(\sym)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $0 /* push error code/oldrax */
+ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
+ subq $15*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 15*8
+ call error_entry
+ DEFAULT_FRAME 0
+ movq %rsp,%rdi /* pt_regs pointer */
+ xorl %esi,%esi /* no error code */
+ call \do_sym
+ jmp error_exit /* %ebx: no swapgs flag */
+ CFI_ENDPROC
+END(\sym)
+.endm
+
+.macro paranoidzeroentry sym do_sym
+KPROBE_ENTRY(\sym)
+ INTR_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
CFI_ADJUST_CFA_OFFSET 8
- pushq %rax /* push real oldrax to the rdi slot */
+ subq $15*8, %rsp
+ call save_paranoid
+ TRACE_IRQS_OFF
+ movq %rsp,%rdi /* pt_regs pointer */
+ xorl %esi,%esi /* no error code */
+ call \do_sym
+ jmp paranoid_exit /* %ebx: no swapgs flag */
+ CFI_ENDPROC
+KPROBE_END(\sym)
+.endm
+
+.macro paranoidzeroentry_ist sym do_sym ist
+KPROBE_ENTRY(\sym)
+ INTR_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ pushq $-1 /* ORIG_RAX: no syscall to restart */
CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rax,0
- leaq \sym(%rip),%rax
- jmp error_entry
+ subq $15*8, %rsp
+ call save_paranoid
+ TRACE_IRQS_OFF
+ movq %rsp,%rdi /* pt_regs pointer */
+ xorl %esi,%esi /* no error code */
+ movq %gs:pda_data_offset, %rbp
+ subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+ call \do_sym
+ addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+ jmp paranoid_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
- .endm
+KPROBE_END(\sym)
+.endm
- .macro errorentry sym
+.macro errorentry sym do_sym entry=0
+.if \entry
+KPROBE_ENTRY(\sym)
+.else
+ENTRY(\sym)
+.endif
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rax,0
- leaq \sym(%rip),%rax
- jmp error_entry
+ subq $15*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 15*8
+ call error_entry
+ DEFAULT_FRAME 0
+ movq %rsp,%rdi /* pt_regs pointer */
+ movq ORIG_RAX(%rsp),%rsi /* get error code */
+ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
+ call \do_sym
+ jmp error_exit /* %ebx: no swapgs flag */
CFI_ENDPROC
- .endm
+.if \entry
+KPROBE_END(\sym)
+.else
+END(\sym)
+.endif
+.endm
/* error code is on the stack already */
- /* handle NMI like exceptions that can happen everywhere */
- .macro paranoidentry sym, ist=0, irqtrace=1
- SAVE_ALL
- cld
- movl $1,%ebx
- movl $MSR_GS_BASE,%ecx
- rdmsr
- testl %edx,%edx
- js 1f
- SWAPGS
- xorl %ebx,%ebx
-1:
- .if \ist
- movq %gs:pda_data_offset, %rbp
- .endif
- .if \irqtrace
- TRACE_IRQS_OFF
- .endif
- movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi
- movq $-1,ORIG_RAX(%rsp)
- .if \ist
- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
- .endif
- call \sym
- .if \ist
- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
- .endif
- DISABLE_INTERRUPTS(CLBR_NONE)
- .if \irqtrace
+.macro paranoiderrorentry sym do_sym entry=1
+.if \entry
+KPROBE_ENTRY(\sym)
+.else
+ENTRY(\sym)
+.endif
+ XCPT_FRAME
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ subq $15*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 15*8
+ call save_paranoid
+ DEFAULT_FRAME 0
TRACE_IRQS_OFF
- .endif
- .endm
+ movq %rsp,%rdi /* pt_regs pointer */
+ movq ORIG_RAX(%rsp),%rsi /* get error code */
+ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
+ call \do_sym
+ jmp paranoid_exit /* %ebx: no swapgs flag */
+ CFI_ENDPROC
+.if \entry
+KPROBE_END(\sym)
+.else
+END(\sym)
+.endif
+.endm
+
+zeroentry divide_error do_divide_error
+paranoidzeroentry_ist debug do_debug DEBUG_STACK
+paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
+zeroentry overflow do_overflow
+zeroentry bounds do_bounds
+zeroentry invalid_op do_invalid_op
+zeroentry device_not_available do_device_not_available
+paranoiderrorentry double_fault do_double_fault 0
+zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
+errorentry invalid_TSS do_invalid_TSS
+errorentry segment_not_present do_segment_not_present
+paranoiderrorentry stack_segment do_stack_segment
+errorentry general_protection do_general_protection 1
+errorentry page_fault do_page_fault 1
+zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
+zeroentry coprocessor_error do_coprocessor_error
+errorentry alignment_check do_alignment_check
+#ifdef CONFIG_X86_MCE
+paranoidzeroentry machine_check do_machine_check
+#endif
+zeroentry simd_coprocessor_error do_simd_coprocessor_error
/*
* "Paranoid" exit path from exception stack.
@@ -944,178 +1131,156 @@ END(spurious_interrupt)
* is fundamentally NMI-unsafe. (we cannot change the soft and
* hard flags at once, atomically)
*/
- .macro paranoidexit trace=1
+
/* ebx: no swapgs flag */
-paranoid_exit\trace:
+KPROBE_ENTRY(paranoid_exit)
+ INTR_FRAME
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ TRACE_IRQS_OFF
testl %ebx,%ebx /* swapgs needed? */
- jnz paranoid_restore\trace
+ jnz paranoid_restore
testl $3,CS(%rsp)
- jnz paranoid_userspace\trace
-paranoid_swapgs\trace:
- .if \trace
+ jnz paranoid_userspace
+paranoid_swapgs:
TRACE_IRQS_IRETQ 0
- .endif
SWAPGS_UNSAFE_STACK
-paranoid_restore\trace:
+paranoid_restore:
RESTORE_ALL 8
jmp irq_return
-paranoid_userspace\trace:
+paranoid_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
andl $_TIF_WORK_MASK,%ebx
- jz paranoid_swapgs\trace
+ jz paranoid_swapgs
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
testl $_TIF_NEED_RESCHED,%ebx
- jnz paranoid_schedule\trace
+ jnz paranoid_schedule
movl %ebx,%edx /* arg3: thread flags */
- .if \trace
TRACE_IRQS_ON
- .endif
ENABLE_INTERRUPTS(CLBR_NONE)
xorl %esi,%esi /* arg2: oldset */
movq %rsp,%rdi /* arg1: &pt_regs */
call do_notify_resume
DISABLE_INTERRUPTS(CLBR_NONE)
- .if \trace
TRACE_IRQS_OFF
- .endif
- jmp paranoid_userspace\trace
-paranoid_schedule\trace:
- .if \trace
+ jmp paranoid_userspace
+paranoid_schedule:
TRACE_IRQS_ON
- .endif
ENABLE_INTERRUPTS(CLBR_ANY)
call schedule
DISABLE_INTERRUPTS(CLBR_ANY)
- .if \trace
TRACE_IRQS_OFF
- .endif
- jmp paranoid_userspace\trace
+ jmp paranoid_userspace
CFI_ENDPROC
- .endm
+KPROBE_END(paranoid_exit)
/*
- * Exception entry point. This expects an error code/orig_rax on the stack
- * and the exception handler in %rax.
- */
+ * Exception entry point. This expects an error code/orig_rax on the stack.
+ * returns in "no swapgs flag" in %ebx.
+ */
KPROBE_ENTRY(error_entry)
- _frame RDI
- CFI_REL_OFFSET rax,0
- /* rdi slot contains rax, oldrax contains error code */
- cld
- subq $14*8,%rsp
- CFI_ADJUST_CFA_OFFSET (14*8)
- movq %rsi,13*8(%rsp)
- CFI_REL_OFFSET rsi,RSI
- movq 14*8(%rsp),%rsi /* load rax from rdi slot */
- CFI_REGISTER rax,rsi
- movq %rdx,12*8(%rsp)
- CFI_REL_OFFSET rdx,RDX
- movq %rcx,11*8(%rsp)
- CFI_REL_OFFSET rcx,RCX
- movq %rsi,10*8(%rsp) /* store rax */
- CFI_REL_OFFSET rax,RAX
- movq %r8, 9*8(%rsp)
- CFI_REL_OFFSET r8,R8
- movq %r9, 8*8(%rsp)
- CFI_REL_OFFSET r9,R9
- movq %r10,7*8(%rsp)
- CFI_REL_OFFSET r10,R10
- movq %r11,6*8(%rsp)
- CFI_REL_OFFSET r11,R11
- movq %rbx,5*8(%rsp)
- CFI_REL_OFFSET rbx,RBX
- movq %rbp,4*8(%rsp)
- CFI_REL_OFFSET rbp,RBP
- movq %r12,3*8(%rsp)
- CFI_REL_OFFSET r12,R12
- movq %r13,2*8(%rsp)
- CFI_REL_OFFSET r13,R13
- movq %r14,1*8(%rsp)
- CFI_REL_OFFSET r14,R14
- movq %r15,(%rsp)
- CFI_REL_OFFSET r15,R15
- xorl %ebx,%ebx
- testl $3,CS(%rsp)
- je error_kernelspace
-error_swapgs:
+ XCPT_FRAME
+ CFI_ADJUST_CFA_OFFSET 15*8
+ /* oldrax contains error code */
+ cld
+ movq_cfi rdi, RDI+8
+ movq_cfi rsi, RSI+8
+ movq_cfi rdx, RDX+8
+ movq_cfi rcx, RCX+8
+ movq_cfi rax, RAX+8
+ movq_cfi r8, R8+8
+ movq_cfi r9, R9+8
+ movq_cfi r10, R10+8
+ movq_cfi r11, R11+8
+ movq_cfi rbx, RBX+8
+ movq_cfi rbp, RBP+8
+ movq_cfi r12, R12+8
+ movq_cfi r13, R13+8
+ movq_cfi r14, R14+8
+ movq_cfi r15, R15+8
+ xorl %ebx,%ebx
+ testl $3,CS+8(%rsp)
+ je error_kernelspace
+error_swapgs:
SWAPGS
error_sti:
TRACE_IRQS_OFF
- movq %rdi,RDI(%rsp)
- CFI_REL_OFFSET rdi,RDI
- movq %rsp,%rdi
- movq ORIG_RAX(%rsp),%rsi /* get error code */
- movq $-1,ORIG_RAX(%rsp)
- call *%rax
- /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
-error_exit:
+ ret
+ CFI_ENDPROC
+
+/*
+ * There are two places in the kernel that can potentially fault with
+ * usergs. Handle them here. The exception handlers after iret run with
+ * kernel gs again, so don't set the user space flag. B stepping K8s
+ * sometimes report an truncated RIP for IRET exceptions returning to
+ * compat mode. Check for these here too.
+ */
+error_kernelspace:
+ incl %ebx
+ leaq irq_return(%rip),%rcx
+ cmpq %rcx,RIP+8(%rsp)
+ je error_swapgs
+ movl %ecx,%ecx /* zero extend */
+ cmpq %rcx,RIP+8(%rsp)
+ je error_swapgs
+ cmpq $gs_change,RIP+8(%rsp)
+ je error_swapgs
+ jmp error_sti
+KPROBE_END(error_entry)
+
+
+/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
+KPROBE_ENTRY(error_exit)
+ DEFAULT_FRAME
movl %ebx,%eax
RESTORE_REST
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- GET_THREAD_INFO(%rcx)
+ GET_THREAD_INFO(%rcx)
testl %eax,%eax
- jne retint_kernel
+ jne retint_kernel
LOCKDEP_SYS_EXIT_IRQ
- movl TI_flags(%rcx),%edx
- movl $_TIF_WORK_MASK,%edi
- andl %edi,%edx
- jnz retint_careful
+ movl TI_flags(%rcx),%edx
+ movl $_TIF_WORK_MASK,%edi
+ andl %edi,%edx
+ jnz retint_careful
jmp retint_swapgs
CFI_ENDPROC
+KPROBE_END(error_exit)
-error_kernelspace:
- incl %ebx
- /* There are two places in the kernel that can potentially fault with
- usergs. Handle them here. The exception handlers after
- iret run with kernel gs again, so don't set the user space flag.
- B stepping K8s sometimes report an truncated RIP for IRET
- exceptions returning to compat mode. Check for these here too. */
- leaq irq_return(%rip),%rcx
- cmpq %rcx,RIP(%rsp)
- je error_swapgs
- movl %ecx,%ecx /* zero extend */
- cmpq %rcx,RIP(%rsp)
- je error_swapgs
- cmpq $gs_change,RIP(%rsp)
- je error_swapgs
- jmp error_sti
-KPROBE_END(error_entry)
-
/* Reload gs selector with exception handling */
- /* edi: new selector */
+ /* edi: new selector */
ENTRY(native_load_gs_index)
CFI_STARTPROC
pushf
CFI_ADJUST_CFA_OFFSET 8
DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
SWAPGS
-gs_change:
- movl %edi,%gs
+gs_change:
+ movl %edi,%gs
2: mfence /* workaround */
SWAPGS
popf
CFI_ADJUST_CFA_OFFSET -8
ret
CFI_ENDPROC
-ENDPROC(native_load_gs_index)
-
+END(native_load_gs_index)
+
.section __ex_table,"a"
.align 8
.quad gs_change,bad_gs
.previous
.section .fixup,"ax"
/* running with kernelgs */
-bad_gs:
+bad_gs:
SWAPGS /* switch back to user gs */
xorl %eax,%eax
movl %eax,%gs
jmp 2b
- .previous
-
+ .previous
+
/*
* Create a kernel thread.
*
@@ -1138,7 +1303,7 @@ ENTRY(kernel_thread)
xorl %r8d,%r8d
xorl %r9d,%r9d
-
+
# clone now
call do_fork
movq %rax,RAX(%rsp)
@@ -1149,14 +1314,14 @@ ENTRY(kernel_thread)
* so internally to the x86_64 port you can rely on kernel_thread()
* not to reschedule the child before returning, this avoids the need
* of hacks for example to fork off the per-CPU idle tasks.
- * [Hopefully no generic code relies on the reschedule -AK]
+ * [Hopefully no generic code relies on the reschedule -AK]
*/
RESTORE_ALL
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
-ENDPROC(kernel_thread)
-
+END(kernel_thread)
+
child_rip:
pushq $0 # fake return address
CFI_STARTPROC
@@ -1170,8 +1335,9 @@ child_rip:
# exit
mov %eax, %edi
call do_exit
+ ud2 # padding for call trace
CFI_ENDPROC
-ENDPROC(child_rip)
+END(child_rip)
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1191,10 +1357,10 @@ ENDPROC(child_rip)
ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
- SAVE_ALL
+ SAVE_ALL
movq %rsp,%rcx
call sys_execve
- movq %rax, RAX(%rsp)
+ movq %rax, RAX(%rsp)
RESTORE_REST
testq %rax,%rax
je int_ret_from_sys_call
@@ -1202,130 +1368,63 @@ ENTRY(kernel_execve)
UNFAKE_STACK_FRAME
ret
CFI_ENDPROC
-ENDPROC(kernel_execve)
-
-KPROBE_ENTRY(page_fault)
- errorentry do_page_fault
-KPROBE_END(page_fault)
-
-ENTRY(coprocessor_error)
- zeroentry do_coprocessor_error
-END(coprocessor_error)
-
-ENTRY(simd_coprocessor_error)
- zeroentry do_simd_coprocessor_error
-END(simd_coprocessor_error)
-
-ENTRY(device_not_available)
- zeroentry do_device_not_available
-END(device_not_available)
+END(kernel_execve)
/* runs on exception stack */
-KPROBE_ENTRY(debug)
- INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_debug, DEBUG_STACK
- paranoidexit
-KPROBE_END(debug)
-
- /* runs on exception stack */
KPROBE_ENTRY(nmi)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $-1
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_nmi, 0, 0
+ pushq_cfi $-1
+ subq $15*8, %rsp
+ CFI_ADJUST_CFA_OFFSET 15*8
+ call save_paranoid
+ DEFAULT_FRAME 0
+ /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ movq %rsp,%rdi
+ movq $-1,%rsi
+ call do_nmi
#ifdef CONFIG_TRACE_IRQFLAGS
- paranoidexit 0
+ /* paranoidexit; without TRACE_IRQS_OFF */
+ /* ebx: no swapgs flag */
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ testl %ebx,%ebx /* swapgs needed? */
+ jnz nmi_restore
+ testl $3,CS(%rsp)
+ jnz nmi_userspace
+nmi_swapgs:
+ SWAPGS_UNSAFE_STACK
+nmi_restore:
+ RESTORE_ALL 8
+ jmp irq_return
+nmi_userspace:
+ GET_THREAD_INFO(%rcx)
+ movl TI_flags(%rcx),%ebx
+ andl $_TIF_WORK_MASK,%ebx
+ jz nmi_swapgs
+ movq %rsp,%rdi /* &pt_regs */
+ call sync_regs
+ movq %rax,%rsp /* switch stack for scheduling */
+ testl $_TIF_NEED_RESCHED,%ebx
+ jnz nmi_schedule
+ movl %ebx,%edx /* arg3: thread flags */
+ ENABLE_INTERRUPTS(CLBR_NONE)
+ xorl %esi,%esi /* arg2: oldset */
+ movq %rsp,%rdi /* arg1: &pt_regs */
+ call do_notify_resume
+ DISABLE_INTERRUPTS(CLBR_NONE)
+ jmp nmi_userspace
+nmi_schedule:
+ ENABLE_INTERRUPTS(CLBR_ANY)
+ call schedule
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ jmp nmi_userspace
+ CFI_ENDPROC
#else
- jmp paranoid_exit1
+ jmp paranoid_exit
CFI_ENDPROC
#endif
KPROBE_END(nmi)
-KPROBE_ENTRY(int3)
- INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_int3, DEBUG_STACK
- jmp paranoid_exit1
- CFI_ENDPROC
-KPROBE_END(int3)
-
-ENTRY(overflow)
- zeroentry do_overflow
-END(overflow)
-
-ENTRY(bounds)
- zeroentry do_bounds
-END(bounds)
-
-ENTRY(invalid_op)
- zeroentry do_invalid_op
-END(invalid_op)
-
-ENTRY(coprocessor_segment_overrun)
- zeroentry do_coprocessor_segment_overrun
-END(coprocessor_segment_overrun)
-
- /* runs on exception stack */
-ENTRY(double_fault)
- XCPT_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- paranoidentry do_double_fault
- jmp paranoid_exit1
- CFI_ENDPROC
-END(double_fault)
-
-ENTRY(invalid_TSS)
- errorentry do_invalid_TSS
-END(invalid_TSS)
-
-ENTRY(segment_not_present)
- errorentry do_segment_not_present
-END(segment_not_present)
-
- /* runs on exception stack */
-ENTRY(stack_segment)
- XCPT_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- paranoidentry do_stack_segment
- jmp paranoid_exit1
- CFI_ENDPROC
-END(stack_segment)
-
-KPROBE_ENTRY(general_protection)
- errorentry do_general_protection
-KPROBE_END(general_protection)
-
-ENTRY(alignment_check)
- errorentry do_alignment_check
-END(alignment_check)
-
-ENTRY(divide_error)
- zeroentry do_divide_error
-END(divide_error)
-
-ENTRY(spurious_interrupt_bug)
- zeroentry do_spurious_interrupt_bug
-END(spurious_interrupt_bug)
-
-#ifdef CONFIG_X86_MCE
- /* runs on exception stack */
-ENTRY(machine_check)
- INTR_FRAME
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_machine_check
- jmp paranoid_exit1
- CFI_ENDPROC
-END(machine_check)
-#endif
-
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
CFI_STARTPROC
@@ -1344,19 +1443,17 @@ ENTRY(call_softirq)
decl %gs:pda_irqcount
ret
CFI_ENDPROC
-ENDPROC(call_softirq)
+END(call_softirq)
KPROBE_ENTRY(ignore_sysret)
CFI_STARTPROC
mov $-ENOSYS,%eax
sysret
CFI_ENDPROC
-ENDPROC(ignore_sysret)
+KPROBE_END(ignore_sysret)
#ifdef CONFIG_XEN
-ENTRY(xen_hypervisor_callback)
- zeroentry xen_do_hypervisor_callback
-END(xen_hypervisor_callback)
+zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
/*
# A note on the "critical region" in our callback handler.
@@ -1377,7 +1474,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
see the correct pointer to the pt_regs */
movq %rdi, %rsp # we don't return, adjust the stack frame
CFI_ENDPROC
- CFI_DEFAULT_STACK
+ DEFAULT_FRAME
11: incl %gs:pda_irqcount
movq %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
@@ -1405,10 +1502,13 @@ END(do_hypervisor_callback)
# with its current contents: any discrepancy means we in category 1.
*/
ENTRY(xen_failsafe_callback)
- framesz = (RIP-0x30) /* workaround buggy gas */
- _frame framesz
- CFI_REL_OFFSET rcx, 0
- CFI_REL_OFFSET r11, 8
+ INTR_FRAME 1 (6*8)
+ /*CFI_REL_OFFSET gs,GS*/
+ /*CFI_REL_OFFSET fs,FS*/
+ /*CFI_REL_OFFSET es,ES*/
+ /*CFI_REL_OFFSET ds,DS*/
+ CFI_REL_OFFSET r11,8
+ CFI_REL_OFFSET rcx,0
movw %ds,%cx
cmpw %cx,0x10(%rsp)
CFI_REMEMBER_STATE
@@ -1429,12 +1529,9 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- pushq %rcx
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $0 /* RIP */
+ pushq_cfi %r11
+ pushq_cfi %rcx
jmp general_protection
CFI_RESTORE_STATE
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
@@ -1444,8 +1541,7 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $0
SAVE_ALL
jmp error_exit
CFI_ENDPROC
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index 0aa2c443d600..53699c931ad4 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -38,8 +38,11 @@
#include <asm/io.h>
#include <asm/nmi.h>
#include <asm/smp.h>
+#include <asm/atomic.h>
#include <asm/apicdef.h>
#include <mach_mpparse.h>
+#include <asm/genapic.h>
+#include <asm/setup.h>
/*
* ES7000 chipsets
@@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi)
return gsi;
}
+static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+{
+ unsigned long vect = 0, psaival = 0;
+
+ if (psai == NULL)
+ return -1;
+
+ vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+ psaival = (0x1000000 | vect | cpu);
+
+ while (*psai & 0x1000000)
+ ;
+
+ *psai = psaival;
+
+ return 0;
+}
+
+static void noop_wait_for_deassert(atomic_t *deassert_not_used)
+{
+}
+
+static int __init es7000_update_genapic(void)
+{
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+
+ /* MPENTIUMIII */
+ if (boot_cpu_data.x86 == 6 &&
+ (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
+ es7000_update_genapic_to_cluster();
+ genapic->wait_for_init_deassert = noop_wait_for_deassert;
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+ }
+
+ return 0;
+}
+
void __init
setup_unisys(void)
{
@@ -176,6 +216,8 @@ setup_unisys(void)
else
es7000_plat = ES7000_CLASSIC;
ioapic_renumber_irq = es7000_rename_gsi;
+
+ x86_quirks->update_genapic = es7000_update_genapic;
}
/*
@@ -317,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg)
return status;
}
-int
-es7000_start_cpu(int cpu, unsigned long eip)
-{
- unsigned long vect = 0, psaival = 0;
-
- if (psai == NULL)
- return -1;
-
- vect = ((unsigned long)__pa(eip)/0x1000) << 16;
- psaival = (0x1000000 | vect | cpu);
-
- while (*psai & 0x1000000)
- ;
-
- *psai = psaival;
-
- return 0;
-
-}
-
void __init
es7000_sw_apic(void)
{
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..bb137f7297ed 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -14,14 +14,17 @@
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
+#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <asm/ftrace.h>
+#include <linux/ftrace.h>
#include <asm/nops.h>
+#include <asm/nmi.h>
-static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+#ifdef CONFIG_DYNAMIC_FTRACE
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
@@ -31,18 +34,12 @@ union ftrace_code_union {
} __attribute__((packed));
};
-
static int ftrace_calc_offset(long ip, long addr)
{
return (int)(addr - ip);
}
-unsigned char *ftrace_nop_replace(void)
-{
- return ftrace_nop;
-}
-
-unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
@@ -56,7 +53,143 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code;
}
-int
+/*
+ * Modifying code must take extra care. On an SMP machine, if
+ * the code being modified is also being executed on another CPU
+ * that CPU will have undefined results and possibly take a GPF.
+ * We use kstop_machine to stop other CPUS from exectuing code.
+ * But this does not stop NMIs from happening. We still need
+ * to protect against that. We separate out the modification of
+ * the code to take care of this.
+ *
+ * Two buffers are added: An IP buffer and a "code" buffer.
+ *
+ * 1) Put the instruction pointer into the IP buffer
+ * and the new code into the "code" buffer.
+ * 2) Set a flag that says we are modifying code
+ * 3) Wait for any running NMIs to finish.
+ * 4) Write the code
+ * 5) clear the flag.
+ * 6) Wait for any running NMIs to finish.
+ *
+ * If an NMI is executed, the first thing it does is to call
+ * "ftrace_nmi_enter". This will check if the flag is set to write
+ * and if it is, it will write what is in the IP and "code" buffers.
+ *
+ * The trick is, it does not matter if everyone is writing the same
+ * content to the code location. Also, if a CPU is executing code
+ * it is OK to write to that code location if the contents being written
+ * are the same as what exists.
+ */
+
+static atomic_t in_nmi = ATOMIC_INIT(0);
+static int mod_code_status; /* holds return value of text write */
+static int mod_code_write; /* set when NMI should do the write */
+static void *mod_code_ip; /* holds the IP to write to */
+static void *mod_code_newcode; /* holds the text to write to the IP */
+
+static unsigned nmi_wait_count;
+static atomic_t nmi_update_count = ATOMIC_INIT(0);
+
+int ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ int r;
+
+ r = snprintf(buf, size, "%u %u",
+ nmi_wait_count,
+ atomic_read(&nmi_update_count));
+ return r;
+}
+
+static void ftrace_mod_code(void)
+{
+ /*
+ * Yes, more than one CPU process can be writing to mod_code_status.
+ * (and the code itself)
+ * But if one were to fail, then they all should, and if one were
+ * to succeed, then they all should.
+ */
+ mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
+ MCOUNT_INSN_SIZE);
+
+}
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+ /* Must have in_nmi seen before reading write flag */
+ smp_mb();
+ if (mod_code_write) {
+ ftrace_mod_code();
+ atomic_inc(&nmi_update_count);
+ }
+}
+
+void ftrace_nmi_exit(void)
+{
+ /* Finish all executions before clearing in_nmi */
+ smp_wmb();
+ atomic_dec(&in_nmi);
+}
+
+static void wait_for_nmi(void)
+{
+ int waited = 0;
+
+ while (atomic_read(&in_nmi)) {
+ waited = 1;
+ cpu_relax();
+ }
+
+ if (waited)
+ nmi_wait_count++;
+}
+
+static int
+do_ftrace_mod_code(unsigned long ip, void *new_code)
+{
+ mod_code_ip = (void *)ip;
+ mod_code_newcode = new_code;
+
+ /* The buffers need to be visible before we let NMIs write them */
+ smp_wmb();
+
+ mod_code_write = 1;
+
+ /* Make sure write bit is visible before we wait on NMIs */
+ smp_mb();
+
+ wait_for_nmi();
+
+ /* Make sure all running NMIs have finished before we write the code */
+ smp_mb();
+
+ ftrace_mod_code();
+
+ /* Make sure the write happens before clearing the bit */
+ smp_wmb();
+
+ mod_code_write = 0;
+
+ /* make sure NMIs see the cleared bit */
+ smp_mb();
+
+ wait_for_nmi();
+
+ return mod_code_status;
+}
+
+
+
+
+static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+
+static unsigned char *ftrace_nop_replace(void)
+{
+ return ftrace_nop;
+}
+
+static int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
@@ -81,7 +214,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return -EINVAL;
/* replace the text with the new text */
- if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+ if (do_ftrace_mod_code(ip, new_code))
return -EPERM;
sync_core();
@@ -89,6 +222,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return 0;
}
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_call_replace(ip, addr);
+ new = ftrace_nop_replace();
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_nop_replace();
+ new = ftrace_call_replace(ip, addr);
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -165,3 +321,139 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
+#endif
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+
+/*
+ * These functions are picked from those used on
+ * this page for dynamic ftrace. They have been
+ * simplified to ignore all traces in NMI context.
+ */
+static atomic_t in_nmi;
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+}
+
+void ftrace_nmi_exit(void)
+{
+ atomic_dec(&in_nmi);
+}
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+ unsigned long func)
+{
+ int index;
+
+ if (!current->ret_stack)
+ return -EBUSY;
+
+ /* The return trace stack is full */
+ if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+ atomic_inc(&current->trace_overrun);
+ return -EBUSY;
+ }
+
+ index = ++current->curr_ret_stack;
+ barrier();
+ current->ret_stack[index].ret = ret;
+ current->ret_stack[index].func = func;
+ current->ret_stack[index].calltime = time;
+
+ return 0;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void pop_return_trace(unsigned long *ret, unsigned long long *time,
+ unsigned long *func, unsigned long *overrun)
+{
+ int index;
+
+ index = current->curr_ret_stack;
+ *ret = current->ret_stack[index].ret;
+ *func = current->ret_stack[index].func;
+ *time = current->ret_stack[index].calltime;
+ *overrun = atomic_read(&current->trace_overrun);
+ current->curr_ret_stack--;
+}
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(void)
+{
+ struct ftrace_retfunc trace;
+ pop_return_trace(&trace.ret, &trace.calltime, &trace.func,
+ &trace.overrun);
+ trace.rettime = cpu_clock(raw_smp_processor_id());
+ ftrace_function_return(&trace);
+
+ return trace.ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+ unsigned long old;
+ unsigned long long calltime;
+ int faulted;
+ unsigned long return_hooker = (unsigned long)
+ &return_to_handler;
+
+ /* Nmi's are currently unsupported */
+ if (atomic_read(&in_nmi))
+ return;
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ */
+ asm volatile(
+ "1: movl (%[parent_old]), %[old]\n"
+ "2: movl %[return_hooker], (%[parent_replaced])\n"
+ " movl $0, %[faulted]\n"
+
+ ".section .fixup, \"ax\"\n"
+ "3: movl $1, %[faulted]\n"
+ ".previous\n"
+
+ ".section __ex_table, \"a\"\n"
+ " .long 1b, 3b\n"
+ " .long 2b, 3b\n"
+ ".previous\n"
+
+ : [parent_replaced] "=r" (parent), [old] "=r" (old),
+ [faulted] "=r" (faulted)
+ : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (WARN_ON(faulted)) {
+ unregister_ftrace_return();
+ return;
+ }
+
+ if (WARN_ON(!__kernel_text_address(old))) {
+ unregister_ftrace_return();
+ *parent = old;
+ return;
+ }
+
+ calltime = cpu_clock(raw_smp_processor_id());
+
+ if (push_return_trace(old, calltime, self_addr) == -EBUSY)
+ *parent = old;
+}
+
+#endif /* CONFIG_FUNCTION_RET_TRACER */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 6c9bfc9e1e95..2bced78b0b8e 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,6 +21,7 @@
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
+#include <asm/setup.h>
extern struct genapic apic_flat;
extern struct genapic apic_physflat;
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void)
genapic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
}
+
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
}
/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2c7dbdb98278..221299f4509f 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -10,6 +10,7 @@
#include <linux/kernel.h>
#include <linux/threads.h>
+#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/string.h>
#include <linux/ctype.h>
@@ -17,6 +18,9 @@
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/hardirq.h>
+#include <linux/timer.h>
+#include <linux/proc_fs.h>
+#include <asm/current.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
@@ -356,6 +360,103 @@ static __init void uv_rtc_init(void)
}
/*
+ * percpu heartbeat timer
+ */
+static void uv_heartbeat(unsigned long ignored)
+{
+ struct timer_list *timer = &uv_hub_info->scir.timer;
+ unsigned char bits = uv_hub_info->scir.state;
+
+ /* flip heartbeat bit */
+ bits ^= SCIR_CPU_HEARTBEAT;
+
+ /* is this cpu idle? */
+ if (idle_cpu(raw_smp_processor_id()))
+ bits &= ~SCIR_CPU_ACTIVITY;
+ else
+ bits |= SCIR_CPU_ACTIVITY;
+
+ /* update system controller interface reg */
+ uv_set_scir_bits(bits);
+
+ /* enable next timer period */
+ mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+}
+
+static void __cpuinit uv_heartbeat_enable(int cpu)
+{
+ if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+ struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+
+ uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+ setup_timer(timer, uv_heartbeat, cpu);
+ timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+ add_timer_on(timer, cpu);
+ uv_cpu_hub_info(cpu)->scir.enabled = 1;
+ }
+
+ /* check boot cpu */
+ if (!uv_cpu_hub_info(0)->scir.enabled)
+ uv_heartbeat_enable(0);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __cpuinit uv_heartbeat_disable(int cpu)
+{
+ if (uv_cpu_hub_info(cpu)->scir.enabled) {
+ uv_cpu_hub_info(cpu)->scir.enabled = 0;
+ del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+ }
+ uv_set_cpu_scir_bits(cpu, 0xff);
+}
+
+/*
+ * cpu hotplug notifier
+ */
+static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ uv_heartbeat_enable(cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ uv_heartbeat_disable(cpu);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+ hotcpu_notifier(uv_scir_cpu_notify, 0);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+}
+
+static __init int uv_init_heartbeat(void)
+{
+ int cpu;
+
+ if (is_uv_system())
+ for_each_online_cpu(cpu)
+ uv_heartbeat_enable(cpu);
+ return 0;
+}
+
+late_initcall(uv_init_heartbeat);
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
+/*
* Called on each cpu to initialize the per_cpu UV data area.
* ZZZ hotplug not supported yet
*/
@@ -428,7 +529,7 @@ void __init uv_system_init(void)
uv_bios_init();
uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
- &uv_coherency_id, &uv_region_size);
+ &sn_coherency_id, &sn_region_size);
uv_rtc_init();
for_each_present_cpu(cpu) {
@@ -450,7 +551,8 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
- uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
+ uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
+ uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
uv_node_to_blade[nid] = blade;
uv_cpu_to_blade[cpu] = blade;
max_pnode = max(pnode, max_pnode);
@@ -467,4 +569,6 @@ void __init uv_system_init(void)
map_mmioh_high(max_pnode);
uv_cpu_init();
+ uv_scir_register_cpu_notifier();
+ proc_mkdir("sgi_uv", NULL);
}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f6..15fcaacc1f84 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -33,7 +33,7 @@
* HPET address is set in acpi/boot.c, when an ACPI entry exists
*/
unsigned long hpet_address;
-unsigned long hpet_num_timers;
+static unsigned long hpet_num_timers;
static void __iomem *hpet_virt_address;
struct hpet_dev {
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f20608d4ca8..b0f61f0dcd0a 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
stts();
}
-void __init init_thread_xstate(void)
+void __cpuinit init_thread_xstate(void)
{
if (!HAVE_HWFP) {
xstate_size = sizeof(struct i387_soft_struct);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index c9513e1ff28d..9043251210fb 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3608,27 +3608,7 @@ int __init io_apic_get_redir_entries (int ioapic)
int __init probe_nr_irqs(void)
{
- int idx;
- int nr = 0;
-#ifndef CONFIG_XEN
- int nr_min = 32;
-#else
- int nr_min = NR_IRQS;
-#endif
-
- for (idx = 0; idx < nr_ioapics; idx++)
- nr += io_apic_get_redir_entries(idx) + 1;
-
- /* double it for hotplug and msi and nmi */
- nr <<= 1;
-
- /* something wrong ? */
- if (nr < nr_min)
- nr = nr_min;
- if (WARN_ON(nr > NR_IRQS))
- nr = NR_IRQS;
-
- return nr;
+ return NR_IRQS;
}
/* --------------------------------------------------------------------------
@@ -3775,7 +3755,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
+ struct irq_desc *desc;
struct irq_cfg *cfg;
+ cpumask_t mask;
if (skip_ioapic_setup == 1)
return;
@@ -3792,16 +3774,30 @@ void __init setup_ioapic_dest(void)
* cpu is online.
*/
cfg = irq_cfg(irq);
- if (!cfg->vector)
+ if (!cfg->vector) {
setup_IO_APIC_irq(ioapic, pin, irq,
irq_trigger(irq_entry),
irq_polarity(irq_entry));
+ continue;
+
+ }
+
+ /*
+ * Honour affinities which have been set in early boot
+ */
+ desc = irq_to_desc(irq);
+ if (desc->status &
+ (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+ mask = desc->affinity;
+ else
+ mask = TARGET_CPUS;
+
#ifdef CONFIG_INTR_REMAP
- else if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
-#endif
+ if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq(irq, mask);
else
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
+ set_ioapic_affinity_irq(irq, mask);
}
}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..1d3d0e71b044 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,7 +18,6 @@
#include <asm/idle.h>
#include <asm/smp.h>
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
/*
* Probabilistic stack overflow check:
*
@@ -28,19 +27,18 @@
*/
static inline void stack_overflow_check(struct pt_regs *regs)
{
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
u64 curbase = (u64)task_stack_page(current);
- static unsigned long warned = -60*HZ;
-
- if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE &&
- regs->sp < curbase + sizeof(struct thread_info) + 128 &&
- time_after(jiffies, warned + 60*HZ)) {
- printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
- current->comm, curbase, regs->sp);
- show_stack(NULL,NULL);
- warned = jiffies;
- }
-}
+
+ WARN_ONCE(regs->sp >= curbase &&
+ regs->sp <= curbase + THREAD_SIZE &&
+ regs->sp < curbase + sizeof(struct thread_info) +
+ sizeof(struct pt_regs) + 128,
+
+ "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n",
+ current->comm, curbase, regs->sp);
#endif
+}
/*
* do_IRQ handles all normal device IRQ's (the special
@@ -60,9 +58,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
irq_enter();
irq = __get_cpu_var(vector_irq)[vector];
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
stack_overflow_check(regs);
-#endif
desc = irq_to_desc(irq);
if (likely(desc))
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..607db63044a5 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -129,7 +129,7 @@ void __init native_init_IRQ(void)
for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
/* SYSCALL_VECTOR was reserved in trap_init. */
if (i != SYSCALL_VECTOR)
- set_intr_gate(i, interrupt[i]);
+ set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
}
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..8670b3ce626e 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -24,41 +24,6 @@
#include <asm/i8259.h>
/*
- * Common place to define all x86 IRQ vectors
- *
- * This builds up the IRQ handler stubs using some ugly macros in irq.h
- *
- * These macros create the low-level assembly IRQ routines that save
- * register context and call do_IRQ(). do_IRQ() then does all the
- * operations that are needed to keep the AT (or SMP IOAPIC)
- * interrupt-controller happy.
- */
-
-#define IRQ_NAME2(nr) nr##_interrupt(void)
-#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
-
-/*
- * SMP has a few special interrupts for IPI messages
- */
-
-#define BUILD_IRQ(nr) \
- asmlinkage void IRQ_NAME(nr); \
- asm("\n.text\n.p2align\n" \
- "IRQ" #nr "_interrupt:\n\t" \
- "push $~(" #nr ") ; " \
- "jmp common_interrupt\n" \
- ".previous");
-
-#define BI(x,y) \
- BUILD_IRQ(x##y)
-
-#define BUILD_16_IRQS(x) \
- BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
- BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
- BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
- BI(x,c) BI(x,d) BI(x,e) BI(x,f)
-
-/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
* (these are usually mapped to vectors 0x30-0x3f)
*/
@@ -73,37 +38,6 @@
*
* (these are usually mapped into the 0x30-0xff vector range)
*/
- BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
-BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
-BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
-BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
-
-#undef BUILD_16_IRQS
-#undef BI
-
-
-#define IRQ(x,y) \
- IRQ##x##y##_interrupt
-
-#define IRQLIST_16(x) \
- IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
- IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
- IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
- IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-
-/* for the irq vectors */
-static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
- IRQLIST_16(0x2), IRQLIST_16(0x3),
- IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
- IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
- IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
-};
-
-#undef IRQ
-#undef IRQLIST_16
-
-
-
/*
* IRQ2 is cascade interrupt to second interrupt controller
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1c9cc431ea4f..e169ae9b6a62 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt)
}
#ifdef CONFIG_X86_LOCAL_APIC
-static void __devinit kvm_setup_secondary_clock(void)
+static void __cpuinit kvm_setup_secondary_clock(void)
{
/*
* Now that the first cpu already had this clocksource initialized,
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 7a385746509a..37f420018a41 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -13,6 +13,7 @@
#include <linux/numa.h>
#include <linux/ftrace.h>
#include <linux/suspend.h>
+#include <linux/gfp.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -25,15 +26,6 @@
#include <asm/system.h>
#include <asm/cacheflush.h>
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-static u32 kexec_pgd[1024] PAGE_ALIGNED;
-#ifdef CONFIG_X86_PAE
-static u32 kexec_pmd0[1024] PAGE_ALIGNED;
-static u32 kexec_pmd1[1024] PAGE_ALIGNED;
-#endif
-static u32 kexec_pte0[1024] PAGE_ALIGNED;
-static u32 kexec_pte1[1024] PAGE_ALIGNED;
-
static void set_idt(void *newidt, __u16 limit)
{
struct desc_ptr curidt;
@@ -76,6 +68,76 @@ static void load_segments(void)
#undef __STR
}
+static void machine_kexec_free_page_tables(struct kimage *image)
+{
+ free_page((unsigned long)image->arch.pgd);
+#ifdef CONFIG_X86_PAE
+ free_page((unsigned long)image->arch.pmd0);
+ free_page((unsigned long)image->arch.pmd1);
+#endif
+ free_page((unsigned long)image->arch.pte0);
+ free_page((unsigned long)image->arch.pte1);
+}
+
+static int machine_kexec_alloc_page_tables(struct kimage *image)
+{
+ image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+#ifdef CONFIG_X86_PAE
+ image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+ image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+#endif
+ image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL);
+ image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL);
+ if (!image->arch.pgd ||
+#ifdef CONFIG_X86_PAE
+ !image->arch.pmd0 || !image->arch.pmd1 ||
+#endif
+ !image->arch.pte0 || !image->arch.pte1) {
+ machine_kexec_free_page_tables(image);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void machine_kexec_page_table_set_one(
+ pgd_t *pgd, pmd_t *pmd, pte_t *pte,
+ unsigned long vaddr, unsigned long paddr)
+{
+ pud_t *pud;
+
+ pgd += pgd_index(vaddr);
+#ifdef CONFIG_X86_PAE
+ if (!(pgd_val(*pgd) & _PAGE_PRESENT))
+ set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
+#endif
+ pud = pud_offset(pgd, vaddr);
+ pmd = pmd_offset(pud, vaddr);
+ if (!(pmd_val(*pmd) & _PAGE_PRESENT))
+ set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
+ pte = pte_offset_kernel(pmd, vaddr);
+ set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+}
+
+static void machine_kexec_prepare_page_tables(struct kimage *image)
+{
+ void *control_page;
+ pmd_t *pmd = 0;
+
+ control_page = page_address(image->control_code_page);
+#ifdef CONFIG_X86_PAE
+ pmd = image->arch.pmd0;
+#endif
+ machine_kexec_page_table_set_one(
+ image->arch.pgd, pmd, image->arch.pte0,
+ (unsigned long)control_page, __pa(control_page));
+#ifdef CONFIG_X86_PAE
+ pmd = image->arch.pmd1;
+#endif
+ machine_kexec_page_table_set_one(
+ image->arch.pgd, pmd, image->arch.pte1,
+ __pa(control_page), __pa(control_page));
+}
+
/*
* A architecture hook called to validate the
* proposed image and prepare the control pages
@@ -87,12 +149,20 @@ static void load_segments(void)
* reboot code buffer to allow us to avoid allocations
* later.
*
- * Make control page executable.
+ * - Make control page executable.
+ * - Allocate page tables
+ * - Setup page tables
*/
int machine_kexec_prepare(struct kimage *image)
{
+ int error;
+
if (nx_enabled)
set_pages_x(image->control_code_page, 1);
+ error = machine_kexec_alloc_page_tables(image);
+ if (error)
+ return error;
+ machine_kexec_prepare_page_tables(image);
return 0;
}
@@ -104,6 +174,7 @@ void machine_kexec_cleanup(struct kimage *image)
{
if (nx_enabled)
set_pages_nx(image->control_code_page, 1);
+ machine_kexec_free_page_tables(image);
}
/*
@@ -150,18 +221,7 @@ void machine_kexec(struct kimage *image)
relocate_kernel_ptr = control_page;
page_list[PA_CONTROL_PAGE] = __pa(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
- page_list[PA_PGD] = __pa(kexec_pgd);
- page_list[VA_PGD] = (unsigned long)kexec_pgd;
-#ifdef CONFIG_X86_PAE
- page_list[PA_PMD_0] = __pa(kexec_pmd0);
- page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
- page_list[PA_PMD_1] = __pa(kexec_pmd1);
- page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
-#endif
- page_list[PA_PTE_0] = __pa(kexec_pte0);
- page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
- page_list[PA_PTE_1] = __pa(kexec_pte1);
- page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+ page_list[PA_PGD] = __pa(image->arch.pgd);
if (image->type == KEXEC_TYPE_DEFAULT)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2c97f07f1c2c..8bd1bf9622a7 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -131,6 +131,11 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
atomic_dec(&nmi_active);
}
+static void __acpi_nmi_disable(void *__unused)
+{
+ apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
int __init check_nmi_watchdog(void)
{
unsigned int *prev_nmi_count;
@@ -179,8 +184,12 @@ int __init check_nmi_watchdog(void)
kfree(prev_nmi_count);
return 0;
error:
- if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
- disable_8259A_irq(0);
+ if (nmi_watchdog == NMI_IO_APIC) {
+ if (!timer_through_8259)
+ disable_8259A_irq(0);
+ on_each_cpu(__acpi_nmi_disable, NULL, 1);
+ }
+
#ifdef CONFIG_X86_32
timer_ack = 0;
#endif
@@ -199,12 +208,17 @@ static int __init setup_nmi_watchdog(char *str)
++str;
}
- get_option(&str, &nmi);
-
- if (nmi >= NMI_INVALID)
- return 0;
+ if (!strncmp(str, "lapic", 5))
+ nmi_watchdog = NMI_LOCAL_APIC;
+ else if (!strncmp(str, "ioapic", 6))
+ nmi_watchdog = NMI_IO_APIC;
+ else {
+ get_option(&str, &nmi);
+ if (nmi >= NMI_INVALID)
+ return 0;
+ nmi_watchdog = nmi;
+ }
- nmi_watchdog = nmi;
return 1;
}
__setup("nmi_watchdog=", setup_nmi_watchdog);
@@ -285,11 +299,6 @@ void acpi_nmi_enable(void)
on_each_cpu(__acpi_nmi_enable, NULL, 1);
}
-static void __acpi_nmi_disable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
/*
* Disable timer based NMIs on all CPUs:
*/
@@ -340,6 +349,8 @@ void stop_apic_nmi_watchdog(void *unused)
return;
if (nmi_watchdog == NMI_LOCAL_APIC)
lapic_watchdog_stop();
+ else
+ __acpi_nmi_disable(NULL);
__get_cpu_var(wd_enabled) = 0;
atomic_dec(&nmi_active);
}
@@ -465,6 +476,24 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
#ifdef CONFIG_SYSCTL
+static void enable_ioapic_nmi_watchdog_single(void *unused)
+{
+ __get_cpu_var(wd_enabled) = 1;
+ atomic_inc(&nmi_active);
+ __acpi_nmi_enable(NULL);
+}
+
+static void enable_ioapic_nmi_watchdog(void)
+{
+ on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
+ touch_nmi_watchdog();
+}
+
+static void disable_ioapic_nmi_watchdog(void)
+{
+ on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
+}
+
static int __init setup_unknown_nmi_panic(char *str)
{
unknown_nmi_panic = 1;
@@ -507,6 +536,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
enable_lapic_nmi_watchdog();
else
disable_lapic_nmi_watchdog();
+ } else if (nmi_watchdog == NMI_IO_APIC) {
+ if (nmi_watchdog_enabled)
+ enable_ioapic_nmi_watchdog();
+ else
+ disable_ioapic_nmi_watchdog();
} else {
printk(KERN_WARNING
"NMI watchdog doesn't know what hardware to touch\n");
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 4caff39078e0..0deea37a53cf 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,7 +31,7 @@
#include <asm/numaq.h>
#include <asm/topology.h>
#include <asm/processor.h>
-#include <asm/mpspec.h>
+#include <asm/genapic.h>
#include <asm/e820.h>
#include <asm/setup.h>
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void)
return 1;
}
+static int __init numaq_update_genapic(void)
+{
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
+
+ return 0;
+}
+
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
@@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
.setup_ioapic_ids = numaq_setup_ioapic_ids,
+ .update_genapic = numaq_update_genapic,
};
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e1e731d78f38..d28bbdc35e4e 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p)
++p;
if (*p == '\0')
break;
- bridge = simple_strtol(p, &endp, 0);
+ bridge = simple_strtoul(p, &endp, 0);
if (p == endp)
break;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..a4da7c4b3129 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,6 +8,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <asm/system.h>
+#include <asm/apic.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -122,6 +123,21 @@ void default_idle(void)
EXPORT_SYMBOL(default_idle);
#endif
+void stop_this_cpu(void *dummy)
+{
+ local_irq_disable();
+ /*
+ * Remove this CPU:
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ disable_local_APIC();
+
+ for (;;) {
+ if (hlt_works(smp_processor_id()))
+ halt();
+ }
+}
+
static void do_nothing(void *unused)
{
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..06180dff5b2e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -929,17 +929,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
+ case 0 ... 0xC:
+ /* sorry, don't know about them */
+ break;
case 0xD:
case 0xE: /* Pentium M */
bts_configure(&bts_cfg_pentium_m);
break;
- case 0xF: /* Core2 */
- case 0x1C: /* Atom */
+ default: /* Core2, Atom, ... */
bts_configure(&bts_cfg_core2);
break;
- default:
- /* sorry, don't know about them */
- break;
}
break;
case 0xF:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index cc5a2545dd41..61f718df6eec 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -21,6 +21,9 @@
# include <asm/iommu.h>
#endif
+#include <mach_ipi.h>
+
+
/*
* Power off function, if any
*/
@@ -36,7 +39,10 @@ int reboot_force;
static int reboot_cpu = -1;
#endif
-/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
+/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
+bool port_cf9_safe = false;
+
+/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
warm Don't set the cold reboot flag
cold Set the cold reboot flag
bios Reboot by jumping through the BIOS (only for X86_32)
@@ -45,6 +51,7 @@ static int reboot_cpu = -1;
kbd Use the keyboard controller. cold reset (default)
acpi Use the RESET_REG in the FADT
efi Use efi reset_system runtime service
+ pci Use the so-called "PCI reset register", CF9
force Avoid anything that could hang.
*/
static int __init reboot_setup(char *str)
@@ -79,6 +86,7 @@ static int __init reboot_setup(char *str)
case 'k':
case 't':
case 'e':
+ case 'p':
reboot_type = *str;
break;
@@ -404,12 +412,27 @@ static void native_machine_emergency_restart(void)
reboot_type = BOOT_KBD;
break;
-
case BOOT_EFI:
if (efi_enabled)
- efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD,
+ efi.reset_system(reboot_mode ?
+ EFI_RESET_WARM :
+ EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
+ reboot_type = BOOT_KBD;
+ break;
+ case BOOT_CF9:
+ port_cf9_safe = true;
+ /* fall through */
+
+ case BOOT_CF9_COND:
+ if (port_cf9_safe) {
+ u8 cf9 = inb(0xcf9) & ~6;
+ outb(cf9|2, 0xcf9); /* Request hard reset */
+ udelay(50);
+ outb(cf9|6, 0xcf9); /* Actually do the reset */
+ udelay(50);
+ }
reboot_type = BOOT_KBD;
break;
}
@@ -470,6 +493,11 @@ static void native_machine_restart(char *__unused)
static void native_machine_halt(void)
{
+ /* stop other cpus and apics */
+ machine_shutdown();
+
+ /* stop this cpu */
+ stop_this_cpu(NULL);
}
static void native_machine_power_off(void)
@@ -523,3 +551,95 @@ void machine_crash_shutdown(struct pt_regs *regs)
machine_ops.crash_shutdown(regs);
}
#endif
+
+
+#if defined(CONFIG_SMP)
+
+/* This keeps a track of which one is crashing cpu. */
+static int crashing_cpu;
+static nmi_shootdown_cb shootdown_callback;
+
+static atomic_t waiting_for_crash_ipi;
+
+static int crash_nmi_callback(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ int cpu;
+
+ if (val != DIE_NMI_IPI)
+ return NOTIFY_OK;
+
+ cpu = raw_smp_processor_id();
+
+ /* Don't do anything if this handler is invoked on crashing cpu.
+ * Otherwise, system will completely hang. Crashing cpu can get
+ * an NMI if system was initially booted with nmi_watchdog parameter.
+ */
+ if (cpu == crashing_cpu)
+ return NOTIFY_STOP;
+ local_irq_disable();
+
+ shootdown_callback(cpu, (struct die_args *)data);
+
+ atomic_dec(&waiting_for_crash_ipi);
+ /* Assume hlt works */
+ halt();
+ for (;;)
+ cpu_relax();
+
+ return 1;
+}
+
+static void smp_send_nmi_allbutself(void)
+{
+ cpumask_t mask = cpu_online_map;
+ cpu_clear(safe_smp_processor_id(), mask);
+ if (!cpus_empty(mask))
+ send_IPI_mask(mask, NMI_VECTOR);
+}
+
+static struct notifier_block crash_nmi_nb = {
+ .notifier_call = crash_nmi_callback,
+};
+
+/* Halt all other CPUs, calling the specified function on each of them
+ *
+ * This function can be used to halt all other CPUs on crash
+ * or emergency reboot time. The function passed as parameter
+ * will be called inside a NMI handler on all CPUs.
+ */
+void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+{
+ unsigned long msecs;
+ local_irq_disable();
+
+ /* Make a note of crashing cpu. Will be used in NMI callback.*/
+ crashing_cpu = safe_smp_processor_id();
+
+ shootdown_callback = callback;
+
+ atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
+ /* Would it be better to replace the trap vector here? */
+ if (register_die_notifier(&crash_nmi_nb))
+ return; /* return what? */
+ /* Ensure the new callback function is set before sending
+ * out the NMI
+ */
+ wmb();
+
+ smp_send_nmi_allbutself();
+
+ msecs = 1000; /* Wait at most a second for the other cpus to stop */
+ while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
+ mdelay(1);
+ msecs--;
+ }
+
+ /* Leave the nmi callback set */
+}
+#else /* !CONFIG_SMP */
+void nmi_shootdown_cpus(nmi_shootdown_cb callback)
+{
+ /* No other CPUs to shoot down */
+}
+#endif
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 6f50664b2ba5..a160f3119725 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -10,15 +10,12 @@
#include <asm/page.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
-#include <asm/pgtable.h>
/*
* Must be relocatable PIC code callable as a C function
*/
#define PTR(x) (x << 2)
-#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define PAE_PGD_ATTR (_PAGE_PRESENT)
/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for
@@ -39,7 +36,6 @@
#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
.text
- .align PAGE_SIZE
.globl relocate_kernel
relocate_kernel:
/* Save the CPU context, used for jumping back */
@@ -60,117 +56,6 @@ relocate_kernel:
movl %cr4, %eax
movl %eax, CR4(%edi)
-#ifdef CONFIG_X86_PAE
- /* map the control page at its virtual address */
-
- movl PTR(VA_PGD)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0xc0000000, %eax
- shrl $27, %eax
- addl %edi, %eax
-
- movl PTR(PA_PMD_0)(%ebp), %edx
- orl $PAE_PGD_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PMD_0)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0x3fe00000, %eax
- shrl $18, %eax
- addl %edi, %eax
-
- movl PTR(PA_PTE_0)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PTE_0)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0x001ff000, %eax
- shrl $9, %eax
- addl %edi, %eax
-
- movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- /* identity map the control page at its physical address */
-
- movl PTR(VA_PGD)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0xc0000000, %eax
- shrl $27, %eax
- addl %edi, %eax
-
- movl PTR(PA_PMD_1)(%ebp), %edx
- orl $PAE_PGD_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PMD_1)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0x3fe00000, %eax
- shrl $18, %eax
- addl %edi, %eax
-
- movl PTR(PA_PTE_1)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PTE_1)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0x001ff000, %eax
- shrl $9, %eax
- addl %edi, %eax
-
- movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-#else
- /* map the control page at its virtual address */
-
- movl PTR(VA_PGD)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0xffc00000, %eax
- shrl $20, %eax
- addl %edi, %eax
-
- movl PTR(PA_PTE_0)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PTE_0)(%ebp), %edi
- movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
- andl $0x003ff000, %eax
- shrl $10, %eax
- addl %edi, %eax
-
- movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- /* identity map the control page at its physical address */
-
- movl PTR(VA_PGD)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0xffc00000, %eax
- shrl $20, %eax
- addl %edi, %eax
-
- movl PTR(PA_PTE_1)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-
- movl PTR(VA_PTE_1)(%ebp), %edi
- movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
- andl $0x003ff000, %eax
- shrl $10, %eax
- addl %edi, %eax
-
- movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
- orl $PAGE_ATTR, %edx
- movl %edx, (%eax)
-#endif
-
-relocate_new_kernel:
/* read the arguments and say goodbye to the stack */
movl 20+4(%esp), %ebx /* page_list */
movl 20+8(%esp), %ebp /* list of pages */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9d5674f7b6cc..9f4b90d7d60d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -98,6 +98,7 @@
#include <mach_apic.h>
#include <asm/paravirt.h>
+#include <asm/hypervisor.h>
#include <asm/percpu.h>
#include <asm/topology.h>
@@ -583,161 +584,24 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
-static struct x86_quirks default_x86_quirks __initdata;
-
-struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
-
-/*
- * Some BIOSes seem to corrupt the low 64k of memory during events
- * like suspend/resume and unplugging an HDMI cable. Reserve all
- * remaining free memory in that area and fill it with a distinct
- * pattern.
- */
-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
-#define MAX_SCAN_AREAS 8
-
-static int __read_mostly memory_corruption_check = -1;
-
-static unsigned __read_mostly corruption_check_size = 64*1024;
-static unsigned __read_mostly corruption_check_period = 60; /* seconds */
-
-static struct e820entry scan_areas[MAX_SCAN_AREAS];
-static int num_scan_areas;
-
-
-static int set_corruption_check(char *arg)
-{
- char *end;
-
- memory_corruption_check = simple_strtol(arg, &end, 10);
-
- return (*end == 0) ? 0 : -EINVAL;
-}
-early_param("memory_corruption_check", set_corruption_check);
-
-static int set_corruption_check_period(char *arg)
-{
- char *end;
-
- corruption_check_period = simple_strtoul(arg, &end, 10);
-
- return (*end == 0) ? 0 : -EINVAL;
-}
-early_param("memory_corruption_check_period", set_corruption_check_period);
-
-static int set_corruption_check_size(char *arg)
+static int __init default_update_genapic(void)
{
- char *end;
- unsigned size;
-
- size = memparse(arg, &end);
-
- if (*end == '\0')
- corruption_check_size = size;
-
- return (size == corruption_check_size) ? 0 : -EINVAL;
-}
-early_param("memory_corruption_check_size", set_corruption_check_size);
-
-
-static void __init setup_bios_corruption_check(void)
-{
- u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
-
- if (memory_corruption_check == -1) {
- memory_corruption_check =
-#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
- 1
-#else
- 0
+#ifdef CONFIG_X86_SMP
+# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
+# endif
#endif
- ;
- }
-
- if (corruption_check_size == 0)
- memory_corruption_check = 0;
-
- if (!memory_corruption_check)
- return;
-
- corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
- while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
- u64 size;
- addr = find_e820_area_size(addr, &size, PAGE_SIZE);
-
- if (addr == 0)
- break;
-
- if ((addr + size) > corruption_check_size)
- size = corruption_check_size - addr;
-
- if (size == 0)
- break;
-
- e820_update_range(addr, size, E820_RAM, E820_RESERVED);
- scan_areas[num_scan_areas].addr = addr;
- scan_areas[num_scan_areas].size = size;
- num_scan_areas++;
-
- /* Assume we've already mapped this early memory */
- memset(__va(addr), 0, size);
-
- addr += size;
- }
-
- printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
- num_scan_areas);
- update_e820();
-}
-
-static struct timer_list periodic_check_timer;
-
-void check_for_bios_corruption(void)
-{
- int i;
- int corruption = 0;
-
- if (!memory_corruption_check)
- return;
-
- for(i = 0; i < num_scan_areas; i++) {
- unsigned long *addr = __va(scan_areas[i].addr);
- unsigned long size = scan_areas[i].size;
-
- for(; size; addr++, size -= sizeof(unsigned long)) {
- if (!*addr)
- continue;
- printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
- addr, __pa(addr), *addr);
- corruption = 1;
- *addr = 0;
- }
- }
-
- WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
-}
-
-static void periodic_check_for_corruption(unsigned long data)
-{
- check_for_bios_corruption();
- mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
+ return 0;
}
-void start_periodic_check_for_corruption(void)
-{
- if (!memory_corruption_check || corruption_check_period == 0)
- return;
-
- printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
- corruption_check_period);
+static struct x86_quirks default_x86_quirks __initdata = {
+ .update_genapic = default_update_genapic,
+};
- init_timer(&periodic_check_timer);
- periodic_check_timer.function = &periodic_check_for_corruption;
- periodic_check_for_corruption(0);
-}
-#endif
+struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
+#ifdef CONFIG_X86_RESERVE_LOW_64K
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
printk(KERN_NOTICE
@@ -749,6 +613,7 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
return 0;
}
+#endif
/* List of systems that have known low memory corruption BIOS problems */
static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
@@ -909,6 +774,12 @@ void __init setup_arch(char **cmdline_p)
dmi_check_system(bad_bios_dmi_table);
+ /*
+ * VMware detection requires dmi to be available, so this
+ * needs to be done after dmi_scan_machine, for the BP.
+ */
+ init_hypervisor(&boot_cpu_data);
+
#ifdef CONFIG_X86_32
probe_roms();
#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index d6dd057d0f22..f7dd6c44c042 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -4,29 +4,32 @@
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
*/
-#include <linux/list.h>
-#include <linux/personality.h>
-#include <linux/binfmts.h>
-#include <linux/suspend.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
#include <linux/kernel.h>
-#include <linux/ptrace.h>
#include <linux/signal.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
#include <linux/errno.h>
-#include <linux/sched.h>
#include <linux/wait.h>
+#include <linux/ptrace.h>
#include <linux/tracehook.h>
-#include <linux/elf.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
-#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/vdso.h>
+
+#ifdef CONFIG_X86_64
+#include <asm/proto.h>
+#include <asm/ia32_unistd.h>
+#include <asm/mce.h>
+#endif /* CONFIG_X86_64 */
+
#include <asm/syscall.h>
#include <asm/syscalls.h>
@@ -45,6 +48,28 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
+static const struct {
+ u16 poplmovl;
+ u32 val;
+ u16 int80;
+} __attribute__((packed)) retcode = {
+ 0xb858, /* popl %eax; movl $..., %eax */
+ __NR_sigreturn,
+ 0x80cd, /* int $0x80 */
+};
+
+static const struct {
+ u8 movl;
+ u32 val;
+ u16 int80;
+ u8 pad;
+} __attribute__((packed)) rt_retcode = {
+ 0xb8, /* movl $..., %eax */
+ __NR_rt_sigreturn,
+ 0x80cd, /* int $0x80 */
+ 0
+};
+
/*
* Atomically swap in the new signal mask, and wait for a signal.
*/
@@ -100,6 +125,7 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
return ret;
}
+#ifdef CONFIG_X86_32
asmlinkage int sys_sigaltstack(unsigned long bx)
{
/*
@@ -112,6 +138,14 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
return do_sigaltstack(uss, uoss, regs->sp);
}
+#else /* !CONFIG_X86_32 */
+asmlinkage long
+sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
+ struct pt_regs *regs)
+{
+ return do_sigaltstack(uss, uoss, regs->sp);
+}
+#endif /* CONFIG_X86_32 */
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
@@ -123,7 +157,7 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
regs->seg = tmp; \
}
-#define COPY_SEG_STRICT(seg) { \
+#define COPY_SEG_CPL3(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
@@ -149,14 +183,36 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
+#ifdef CONFIG_X86_32
GET_SEG(gs);
COPY_SEG(fs);
COPY_SEG(es);
COPY_SEG(ds);
+#endif /* CONFIG_X86_32 */
+
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
- COPY_SEG_STRICT(cs);
- COPY_SEG_STRICT(ss);
+
+#ifdef CONFIG_X86_64
+ COPY(r8);
+ COPY(r9);
+ COPY(r10);
+ COPY(r11);
+ COPY(r12);
+ COPY(r13);
+ COPY(r14);
+ COPY(r15);
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_32
+ COPY_SEG_CPL3(cs);
+ COPY_SEG_CPL3(ss);
+#else /* !CONFIG_X86_32 */
+ /* Kernel saves and restores only the CS segment register on signals,
+ * which is the bare minimum needed to allow mixed 32/64-bit code.
+ * App's signal handler can save/restore other segments if needed. */
+ COPY_SEG_CPL3(cs);
+#endif /* CONFIG_X86_32 */
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -243,12 +299,19 @@ badframe:
return 0;
}
+#ifdef CONFIG_X86_32
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
{
struct pt_regs *regs = (struct pt_regs *)&__unused;
return do_rt_sigreturn(regs);
}
+#else /* !CONFIG_X86_32 */
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+ return do_rt_sigreturn(regs);
+}
+#endif /* CONFIG_X86_32 */
/*
* Set up a signal frame.
@@ -257,14 +320,20 @@ static int
setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask)
{
- int tmp, err = 0;
+ int err = 0;
- err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
- savesegment(gs, tmp);
- err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+#ifdef CONFIG_X86_32
+ {
+ unsigned int tmp;
+ savesegment(gs, tmp);
+ err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+ }
+ err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+#endif /* CONFIG_X86_32 */
+
err |= __put_user(regs->di, &sc->di);
err |= __put_user(regs->si, &sc->si);
err |= __put_user(regs->bp, &sc->bp);
@@ -273,19 +342,33 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
err |= __put_user(regs->dx, &sc->dx);
err |= __put_user(regs->cx, &sc->cx);
err |= __put_user(regs->ax, &sc->ax);
+#ifdef CONFIG_X86_64
+ err |= __put_user(regs->r8, &sc->r8);
+ err |= __put_user(regs->r9, &sc->r9);
+ err |= __put_user(regs->r10, &sc->r10);
+ err |= __put_user(regs->r11, &sc->r11);
+ err |= __put_user(regs->r12, &sc->r12);
+ err |= __put_user(regs->r13, &sc->r13);
+ err |= __put_user(regs->r14, &sc->r14);
+ err |= __put_user(regs->r15, &sc->r15);
+#endif /* CONFIG_X86_64 */
+
err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user(regs->ip, &sc->ip);
+#ifdef CONFIG_X86_32
err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
err |= __put_user(regs->flags, &sc->flags);
err |= __put_user(regs->sp, &sc->sp_at_signal);
err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+#else /* !CONFIG_X86_32 */
+ err |= __put_user(regs->flags, &sc->flags);
+ err |= __put_user(regs->cs, &sc->cs);
+ err |= __put_user(0, &sc->gs);
+ err |= __put_user(0, &sc->fs);
+#endif /* CONFIG_X86_32 */
- tmp = save_i387_xstate(fpstate);
- if (tmp < 0)
- err = 1;
- else
- err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+ err |= __put_user(fpstate, &sc->fpstate);
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
@@ -328,6 +411,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
if (used_math()) {
sp = sp - sig_xstate_size;
*fpstate = (struct _fpstate *) sp;
+ if (save_i387_xstate(*fpstate) < 0)
+ return (void __user *)-1L;
}
sp -= frame_size;
@@ -383,9 +468,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
- err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
- err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
- err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
+ err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode);
if (err)
return -EFAULT;
@@ -454,9 +537,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
* reasons and because gdb uses it as a signature to notice
* signal handler stack frames.
*/
- err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
- err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
- err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
+ err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
if (err)
return -EFAULT;
@@ -481,17 +562,31 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
*/
static int signr_convert(int sig)
{
+#ifdef CONFIG_X86_32
struct thread_info *info = current_thread_info();
if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
return info->exec_domain->signal_invmap[sig];
+#endif /* CONFIG_X86_32 */
return sig;
}
+#ifdef CONFIG_X86_32
+
#define is_ia32 1
#define ia32_setup_frame __setup_frame
#define ia32_setup_rt_frame __setup_rt_frame
+#else /* !CONFIG_X86_32 */
+
+#ifdef CONFIG_IA32_EMULATION
+#define is_ia32 test_thread_flag(TIF_IA32)
+#else /* !CONFIG_IA32_EMULATION */
+#define is_ia32 0
+#endif /* CONFIG_IA32_EMULATION */
+
+#endif /* CONFIG_X86_32 */
+
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
@@ -592,7 +687,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
return 0;
}
+#ifdef CONFIG_X86_32
#define NR_restart_syscall __NR_restart_syscall
+#else /* !CONFIG_X86_32 */
+#define NR_restart_syscall \
+ test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
+#endif /* CONFIG_X86_32 */
+
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index a5c9627f4db9..32718f5e4f61 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -19,17 +19,22 @@
#include <linux/unistd.h>
#include <linux/stddef.h>
#include <linux/personality.h>
-#include <linux/compiler.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
#include <asm/i387.h>
+#include <asm/vdso.h>
+
+#ifdef CONFIG_X86_64
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
#include <asm/mce.h>
+#endif /* CONFIG_X86_64 */
+
#include <asm/syscall.h>
#include <asm/syscalls.h>
+
#include "sigframe.h"
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -45,18 +50,33 @@
# define FIX_EFLAGS __FIX_EFLAGS
#endif
+#ifdef CONFIG_X86_32
+asmlinkage int sys_sigaltstack(unsigned long bx)
+{
+ /*
+ * This is needed to make gcc realize it doesn't own the
+ * "struct pt_regs"
+ */
+ struct pt_regs *regs = (struct pt_regs *)&bx;
+ const stack_t __user *uss = (const stack_t __user *)bx;
+ stack_t __user *uoss = (stack_t __user *)regs->cx;
+
+ return do_sigaltstack(uss, uoss, regs->sp);
+}
+#else /* !CONFIG_X86_32 */
asmlinkage long
sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
struct pt_regs *regs)
{
return do_sigaltstack(uss, uoss, regs->sp);
}
+#endif /* CONFIG_X86_32 */
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
-#define COPY_SEG_STRICT(seg) { \
+#define COPY_SEG_CPL3(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
@@ -76,8 +96,17 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall;
+#ifdef CONFIG_X86_32
+ GET_SEG(gs);
+ COPY_SEG(fs);
+ COPY_SEG(es);
+ COPY_SEG(ds);
+#endif /* CONFIG_X86_32 */
+
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip);
+
+#ifdef CONFIG_X86_64
COPY(r8);
COPY(r9);
COPY(r10);
@@ -86,11 +115,17 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
COPY(r13);
COPY(r14);
COPY(r15);
+#endif /* CONFIG_X86_64 */
+#ifdef CONFIG_X86_32
+ COPY_SEG_CPL3(cs);
+ COPY_SEG_CPL3(ss);
+#else /* !CONFIG_X86_32 */
/* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */
- COPY_SEG_STRICT(cs);
+ COPY_SEG_CPL3(cs);
+#endif /* CONFIG_X86_32 */
err |= __get_user(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
@@ -134,24 +169,40 @@ badframe:
return 0;
}
+#ifdef CONFIG_X86_32
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+ struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+ return do_rt_sigreturn(regs);
+}
+#else /* !CONFIG_X86_32 */
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
{
return do_rt_sigreturn(regs);
}
+#endif /* CONFIG_X86_32 */
/*
* Set up a signal frame.
*/
-
-static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
- unsigned long mask, struct task_struct *me)
+static int
+setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
+ struct pt_regs *regs, unsigned long mask)
{
int err = 0;
- err |= __put_user(regs->cs, &sc->cs);
- err |= __put_user(0, &sc->gs);
- err |= __put_user(0, &sc->fs);
+#ifdef CONFIG_X86_32
+ {
+ unsigned int tmp;
+
+ savesegment(gs, tmp);
+ err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+ }
+ err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
+ err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
+ err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+#endif /* CONFIG_X86_32 */
err |= __put_user(regs->di, &sc->di);
err |= __put_user(regs->si, &sc->si);
@@ -161,6 +212,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
err |= __put_user(regs->dx, &sc->dx);
err |= __put_user(regs->cx, &sc->cx);
err |= __put_user(regs->ax, &sc->ax);
+#ifdef CONFIG_X86_64
err |= __put_user(regs->r8, &sc->r8);
err |= __put_user(regs->r9, &sc->r9);
err |= __put_user(regs->r10, &sc->r10);
@@ -169,12 +221,28 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
err |= __put_user(regs->r13, &sc->r13);
err |= __put_user(regs->r14, &sc->r14);
err |= __put_user(regs->r15, &sc->r15);
- err |= __put_user(me->thread.trap_no, &sc->trapno);
- err |= __put_user(me->thread.error_code, &sc->err);
+#endif /* CONFIG_X86_64 */
+
+ err |= __put_user(current->thread.trap_no, &sc->trapno);
+ err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user(regs->ip, &sc->ip);
+#ifdef CONFIG_X86_32
+ err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
+ err |= __put_user(regs->flags, &sc->flags);
+ err |= __put_user(regs->sp, &sc->sp_at_signal);
+ err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+#else /* !CONFIG_X86_32 */
err |= __put_user(regs->flags, &sc->flags);
+ err |= __put_user(regs->cs, &sc->cs);
+ err |= __put_user(0, &sc->gs);
+ err |= __put_user(0, &sc->fs);
+#endif /* CONFIG_X86_32 */
+
+ err |= __put_user(fpstate, &sc->fpstate);
+
+ /* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
- err |= __put_user(me->thread.cr2, &sc->cr2);
+ err |= __put_user(current->thread.cr2, &sc->cr2);
return err;
}
@@ -184,12 +252,10 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
*/
static void __user *
-get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
+get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
{
- unsigned long sp;
-
/* Default to using normal stack - redzone*/
- sp = regs->sp - 128;
+ sp -= 128;
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
@@ -209,14 +275,14 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct task_struct *me = current;
if (used_math()) {
- fp = get_stack(ka, regs, sig_xstate_size);
+ fp = get_stack(ka, regs->sp, sig_xstate_size);
frame = (void __user *)round_down(
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (save_i387_xstate(fp) < 0)
return -EFAULT;
} else
- frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
+ frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
return -EFAULT;
@@ -236,13 +302,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
- err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
- if (sizeof(*set) == 16) {
- __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
- __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
- } else
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -282,14 +343,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
*/
static int signr_convert(int sig)
{
+#ifdef CONFIG_X86_32
+ struct thread_info *info = current_thread_info();
+
+ if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
+ return info->exec_domain->signal_invmap[sig];
+#endif /* CONFIG_X86_32 */
return sig;
}
+#ifdef CONFIG_X86_32
+
+#define is_ia32 1
+#define ia32_setup_frame __setup_frame
+#define ia32_setup_rt_frame __setup_rt_frame
+
+#else /* !CONFIG_X86_32 */
+
#ifdef CONFIG_IA32_EMULATION
#define is_ia32 test_thread_flag(TIF_IA32)
-#else
+#else /* !CONFIG_IA32_EMULATION */
#define is_ia32 0
-#endif
+#endif /* CONFIG_IA32_EMULATION */
+
+#endif /* CONFIG_X86_32 */
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -391,8 +468,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
return 0;
}
+#ifdef CONFIG_X86_32
+#define NR_restart_syscall __NR_restart_syscall
+#else /* !CONFIG_X86_32 */
#define NR_restart_syscall \
test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
+#endif /* CONFIG_X86_32 */
+
/*
* Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18f9b19f5f8f..3f92b134ab90 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -140,19 +140,6 @@ void native_send_call_func_ipi(cpumask_t mask)
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
}
-static void stop_this_cpu(void *dummy)
-{
- local_irq_disable();
- /*
- * Remove this CPU:
- */
- cpu_clear(smp_processor_id(), cpu_online_map);
- disable_local_APIC();
- if (hlt_works(smp_processor_id()))
- for (;;) halt();
- for (;;);
-}
-
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b1093397319..0e9f446269f4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
#include <asm/mtrr.h>
#include <asm/vmi.h>
#include <asm/genapic.h>
+#include <asm/setup.h>
#include <linux/mc146818rtc.h>
#include <mach_apic.h>
@@ -536,7 +537,7 @@ static void impress_friends(void)
pr_debug("Before bogocount - setting activated=1.\n");
}
-static inline void __inquire_remote_apic(int apicid)
+void __inquire_remote_apic(int apicid)
{
unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
@@ -575,14 +576,13 @@ static inline void __inquire_remote_apic(int apicid)
}
}
-#ifdef WAKE_SECONDARY_VIA_NMI
/*
* Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
-static int __devinit
-wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+int __devinit
+wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt;
@@ -599,7 +599,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
maxlvt = lapic_get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
@@ -614,11 +614,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-#endif /* WAKE_SECONDARY_VIA_NMI */
-#ifdef WAKE_SECONDARY_VIA_INIT
-static int __devinit
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+int __devinit
+wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt, num_starts, j;
@@ -737,7 +735,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-#endif /* WAKE_SECONDARY_VIA_INIT */
struct create_idle {
struct work_struct work;
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/stacktrace.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
#include <asm/stacktrace.h>
static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
+
+struct stack_frame {
+ const void __user *next_fp;
+ unsigned long ret_addr;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+ int ret;
+
+ if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+ return 0;
+
+ ret = 1;
+ pagefault_disable();
+ if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+ ret = 0;
+ pagefault_enable();
+
+ return ret;
+}
+
+static inline void __save_stack_trace_user(struct stack_trace *trace)
+{
+ const struct pt_regs *regs = task_pt_regs(current);
+ const void __user *fp = (const void __user *)regs->bp;
+
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = regs->ip;
+
+ while (trace->nr_entries < trace->max_entries) {
+ struct stack_frame frame;
+
+ frame.next_fp = NULL;
+ frame.ret_addr = 0;
+ if (!copy_stack_frame(fp, &frame))
+ break;
+ if ((unsigned long)fp < regs->sp)
+ break;
+ if (frame.ret_addr) {
+ trace->entries[trace->nr_entries++] =
+ frame.ret_addr;
+ }
+ if (fp == frame.next_fp)
+ break;
+ fp = frame.next_fp;
+ }
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+ /*
+ * Trace user stack if we are not a kernel thread
+ */
+ if (current->mm) {
+ __save_stack_trace_user(trace);
+ }
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index cb19d650c216..418a095c5796 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void)
break;
no_ctr_free = (i == 4);
if (no_ctr_free) {
+ WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
+ "cpu_khz value may be incorrect.\n");
i = 3;
rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
wrmsrl(MSR_K7_EVNTSEL3, 0);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 04431f34fd16..6a00e5faaa74 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -566,14 +566,10 @@ static int __init uv_ptc_init(void)
if (!is_uv_system())
return 0;
- if (!proc_mkdir("sgi_uv", NULL))
- return -EINVAL;
-
proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
if (!proc_uv_ptc) {
printk(KERN_ERR "unable to create %s proc entry\n",
UV_PTC_BASENAME);
- remove_proc_entry("sgi_uv", NULL);
return -EINVAL;
}
proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 424093b157d3..599e58168631 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -15,6 +15,7 @@
#include <asm/vgtod.h>
#include <asm/time.h>
#include <asm/delay.h>
+#include <asm/hypervisor.h>
unsigned int cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -31,6 +32,7 @@ static int tsc_unstable;
erroneous rdtsc usage on !cpu_has_tsc processors */
static int tsc_disabled = -1;
+static int tsc_clocksource_reliable;
/*
* Scheduler clock - returns current time in nanosec units.
*/
@@ -98,6 +100,15 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
+static int __init tsc_setup(char *str)
+{
+ if (!strcmp(str, "reliable"))
+ tsc_clocksource_reliable = 1;
+ return 1;
+}
+
+__setup("tsc=", tsc_setup);
+
#define MAX_RETRIES 5
#define SMI_TRESHOLD 50000
@@ -352,9 +363,15 @@ unsigned long native_calibrate_tsc(void)
{
u64 tsc1, tsc2, delta, ref1, ref2;
unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
- unsigned long flags, latch, ms, fast_calibrate;
+ unsigned long flags, latch, ms, fast_calibrate, tsc_khz;
int hpet = is_hpet_enabled(), i, loopmin;
+ tsc_khz = get_hypervisor_tsc_freq();
+ if (tsc_khz) {
+ printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
+ return tsc_khz;
+ }
+
local_irq_save(flags);
fast_calibrate = quick_pit_calibrate();
local_irq_restore(flags);
@@ -731,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
{}
};
-/*
- * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
- */
+static void __init check_system_tsc_reliable(void)
+{
#ifdef CONFIG_MGEODE_LX
-/* RTSC counts during suspend */
+ /* RTSC counts during suspend */
#define RTSC_SUSP 0x100
-
-static void __init check_geode_tsc_reliable(void)
-{
unsigned long res_low, res_high;
rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
+ /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */
if (res_low & RTSC_SUSP)
- clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
-}
-#else
-static inline void check_geode_tsc_reliable(void) { }
+ tsc_clocksource_reliable = 1;
#endif
+ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
+ tsc_clocksource_reliable = 1;
+}
/*
* Make an educated guess if the TSC is trustworthy and synchronized
@@ -783,6 +797,8 @@ static void __init init_tsc_clocksource(void)
{
clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
clocksource_tsc.shift);
+ if (tsc_clocksource_reliable)
+ clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
/* lower the rating if we already know its unstable: */
if (check_tsc_unstable()) {
clocksource_tsc.rating = 0;
@@ -843,7 +859,7 @@ void __init tsc_init(void)
if (unsynchronized_tsc())
mark_tsc_unstable("TSCs unsynchronized");
- check_geode_tsc_reliable();
+ check_system_tsc_reliable();
init_tsc_clocksource();
}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 1c0dfbca87c1..bf36328f6ef9 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -112,6 +112,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
if (unsynchronized_tsc())
return;
+ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
+ printk(KERN_INFO
+ "Skipping synchronization checks as TSC is reliable.\n");
+ return;
+ }
+
printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
smp_processor_id(), cpu);
@@ -165,7 +171,7 @@ void __cpuinit check_tsc_sync_target(void)
{
int cpus = 2;
- if (unsynchronized_tsc())
+ if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
return;
/*
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..44153afc9067 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,9 @@
* want per guest time just set the kernel.vsyscall64 sysctl to 0.
*/
+/* Disable profiling for userspace code: */
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/time.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -128,7 +131,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
gettimeofday(tv,NULL);
return;
}
+
+ /*
+ * Surround the RDTSC by barriers, to make sure it's not
+ * speculated to outside the seqlock critical section and
+ * does not cause time warps:
+ */
+ rdtsc_barrier();
now = vread();
+ rdtsc_barrier();
+
base = __vsyscall_gtod_data.clock.cycle_last;
mask = __vsyscall_gtod_data.clock.mask;
mult = __vsyscall_gtod_data.clock.mult;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index b13acb75e822..15c3e6999182 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void)
/*
* Enable and initialize the xsave feature.
*/
-void __init xsave_cntxt_init(void)
+void __ref xsave_cntxt_init(void)
{
unsigned int eax, ebx, ecx, edx;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1cf..50a779264bb1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void)
* a straightforward 1 to 1 mapping, so force that here. */
__get_cpu_var(vector_irq)[vector] = i;
if (vector != SYSCALL_VECTOR) {
- set_intr_gate(vector, interrupt[vector]);
+ set_intr_gate(vector,
+ interrupt[vector-FIRST_EXTERNAL_VECTOR]);
set_irq_chip_and_handler_name(i, &lguest_irq_controller,
handle_level_irq,
"level");
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9e68075544f6..4a20b2f9a381 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
#define __do_strncpy_from_user(dst, src, count, res) \
do { \
int __d0, __d1, __d2; \
- might_sleep(); \
+ might_fault(); \
__asm__ __volatile__( \
" testl %1,%1\n" \
" jz 2f\n" \
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
#define __do_clear_user(addr,size) \
do { \
int __d0; \
- might_sleep(); \
+ might_fault(); \
__asm__ __volatile__( \
"0: rep; stosl\n" \
" movl %2,%0\n" \
@@ -155,7 +155,7 @@ do { \
unsigned long
clear_user(void __user *to, unsigned long n)
{
- might_sleep();
+ might_fault();
if (access_ok(VERIFY_WRITE, to, n))
__do_clear_user(to, n);
return n;
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
unsigned long mask = -__addr_ok(s);
unsigned long res, tmp;
- might_sleep();
+ might_fault();
__asm__ __volatile__(
" testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f4df6e7c718b..64d6c84e6353 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,7 +15,7 @@
#define __do_strncpy_from_user(dst,src,count,res) \
do { \
long __d0, __d1, __d2; \
- might_sleep(); \
+ might_fault(); \
__asm__ __volatile__( \
" testq %1,%1\n" \
" jz 2f\n" \
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
unsigned long __clear_user(void __user *addr, unsigned long size)
{
long __d0;
- might_sleep();
+ might_fault();
/* no memory constraint because it doesn't change any memory gcc knows
about */
asm volatile(
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3c3b471ea496..3624a364b7f3 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -17,6 +17,7 @@
#include <asm/bigsmp/apic.h>
#include <asm/bigsmp/ipi.h>
#include <asm/mach-default/mach_mpparse.h>
+#include <asm/mach-default/mach_wakecpu.h>
static int dmi_bigsmp; /* can be set by dmi scanners */
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c
index 9e835a11a13a..e63a4a76d8cd 100644
--- a/arch/x86/mach-generic/default.c
+++ b/arch/x86/mach-generic/default.c
@@ -16,6 +16,7 @@
#include <asm/mach-default/mach_apic.h>
#include <asm/mach-default/mach_ipi.h>
#include <asm/mach-default/mach_mpparse.h>
+#include <asm/mach-default/mach_wakecpu.h>
/* should be called last. */
static int probe_default(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 28459cab3ddb..7b4e6d0d1690 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -16,7 +16,19 @@
#include <asm/es7000/apic.h>
#include <asm/es7000/ipi.h>
#include <asm/es7000/mpparse.h>
-#include <asm/es7000/wakecpu.h>
+#include <asm/mach-default/mach_wakecpu.h>
+
+void __init es7000_update_genapic_to_cluster(void)
+{
+ genapic->target_cpus = target_cpus_cluster;
+ genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
+ genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
+ genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
+
+ genapic->init_apic_ldr = init_apic_ldr_cluster;
+
+ genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
+}
static int probe_es7000(void)
{
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index 5a7e4619e1c4..c346d9d0226f 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -15,6 +15,7 @@
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/genapic.h>
+#include <asm/setup.h>
extern struct genapic apic_numaq;
extern struct genapic apic_summit;
@@ -57,6 +58,9 @@ static int __init parse_apic(char *arg)
}
}
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
+
/* Parsed again by __setup for debug/verbose */
return 0;
}
@@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void)
* - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
*/
- if (!cmdline_apic && genapic == &apic_default)
+ if (!cmdline_apic && genapic == &apic_default) {
if (apic_bigsmp.probe()) {
genapic = &apic_bigsmp;
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
printk(KERN_INFO "Overriding APIC driver with %s\n",
genapic->name);
}
+ }
#endif
}
@@ -94,6 +101,9 @@ void __init generic_apic_probe(void)
/* Not visible without early console */
if (!apic_probe[i])
panic("Didn't find an APIC driver");
+
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
}
printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
}
@@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
if (!cmdline_apic) {
genapic = apic_probe[i];
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
genapic->name);
}
@@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
if (!cmdline_apic) {
genapic = apic_probe[i];
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
genapic->name);
}
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6272b5e69da6..2c6d234e0009 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -16,6 +16,7 @@
#include <asm/summit/apic.h>
#include <asm/summit/ipi.h>
#include <asm/summit/mpparse.h>
+#include <asm/mach-default/mach_wakecpu.h>
static int probe_summit(void)
{
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
-obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
-mmiotrace-y := pf_in.o mmio-mod.o
+mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 56c17830e347..353a70d2fe71 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -54,7 +54,7 @@
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
-#ifdef CONFIG_MMIOTRACE_HOOKS
+#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
@@ -414,6 +414,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
unsigned long error_code)
{
unsigned long flags = oops_begin();
+ int sig = SIGKILL;
struct task_struct *tsk;
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -424,8 +425,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
if (__die("Bad pagetable", regs, error_code))
- regs = NULL;
- oops_end(flags, regs, SIGKILL);
+ sig = 0;
+ oops_end(flags, regs, sig);
}
#endif
@@ -593,6 +594,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
#ifdef CONFIG_X86_64
unsigned long flags;
+ int sig;
#endif
tsk = current;
@@ -856,11 +858,12 @@ no_context:
bust_spinlocks(0);
do_exit(SIGKILL);
#else
+ sig = SIGKILL;
if (__die("Oops", regs, error_code))
- regs = NULL;
+ sig = 0;
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end(flags, regs, SIGKILL);
+ oops_end(flags, regs, sig);
#endif
/*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c483f4242079..9b1cd4360a50 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -102,6 +102,8 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
+
+ return pmd_table;
}
#endif
pud = pud_offset(pgd, 0);
@@ -969,8 +971,6 @@ void __init mem_init(void)
int codesize, reservedpages, datasize, initsize;
int tmp;
- start_periodic_check_for_corruption();
-
#ifdef CONFIG_FLATMEM
BUG_ON(!mem_map);
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9db01db6e3cd..9f7a0d24d42a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -902,8 +902,6 @@ void __init mem_init(void)
long codesize, reservedpages, datasize, initsize;
unsigned long absent_pages;
- start_periodic_check_for_corruption();
-
pci_iommu_alloc();
/* clear_bss() already clear the empty_zero_page */
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3f1b81a83e2e..716d26f0e5d4 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
int i;
if (!reset_value) {
- reset_value = kmalloc(sizeof(unsigned) * num_counters,
+ reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
GFP_ATOMIC);
if (!reset_value)
return;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index b67732bbb85a..bb1a01f089e2 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -23,6 +23,12 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
unsigned int pci_early_dump_regs;
static int pci_bf_sort;
int pci_routeirq;
+int noioapicquirk;
+#ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS
+int noioapicreroute = 0;
+#else
+int noioapicreroute = 1;
+#endif
int pcibios_last_bus = -1;
unsigned long pirq_table_addr;
struct pci_bus *pci_root_bus;
@@ -519,6 +525,17 @@ char * __devinit pcibios_setup(char *str)
} else if (!strcmp(str, "skip_isa_align")) {
pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
return NULL;
+ } else if (!strcmp(str, "noioapicquirk")) {
+ noioapicquirk = 1;
+ return NULL;
+ } else if (!strcmp(str, "ioapicreroute")) {
+ if (noioapicreroute != -1)
+ noioapicreroute = 0;
+ return NULL;
+ } else if (!strcmp(str, "noioapicreroute")) {
+ if (noioapicreroute != -1)
+ noioapicreroute = 1;
+ return NULL;
}
return str;
}
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9915293500fb..9a5af6c8fbe9 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,
#undef PCI_CONF2_ADDRESS
-static struct pci_raw_ops pci_direct_conf2 = {
+struct pci_raw_ops pci_direct_conf2 = {
.read = pci_conf2_read,
.write = pci_conf2_write,
};
@@ -289,6 +289,7 @@ int __init pci_direct_probe(void)
if (pci_check_type1()) {
raw_pci_ops = &pci_direct_conf1;
+ port_cf9_safe = true;
return 1;
}
release_resource(region);
@@ -305,6 +306,7 @@ int __init pci_direct_probe(void)
if (pci_check_type2()) {
raw_pci_ops = &pci_direct_conf2;
+ port_cf9_safe = true;
return 2;
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 3c27a809393b..2051dc96b8e9 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
pci_siemens_interrupt_controller);
/*
- * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config
- * have 4096 bytes. Even if the device is capable, that doesn't mean we can
- * access it. Maybe we don't have a way to generate extended config space
- * accesses. So check it
+ * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
+ * 4096 bytes configuration space for each function of their processor
+ * configuration space.
*/
-static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
+static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
{
dev->cfg_size = pci_cfg_space_size_ext(dev);
}
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
/*
* SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 15b9cf6be729..1959018aac02 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -96,6 +96,7 @@ extern struct pci_raw_ops *raw_pci_ops;
extern struct pci_raw_ops *raw_pci_ext_ops;
extern struct pci_raw_ops pci_direct_conf1;
+extern bool port_cf9_safe;
/* arch_initcall level */
extern int pci_direct_probe(void);
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d6..d9d35824c56f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
* Also alternative() doesn't work.
*/
+/* Disable profiling for userspace code: */
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/kernel.h>
#include <linux/posix-timers.h>
#include <linux/time.h>
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 513f330c5832..1241f118ab56 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -310,7 +310,7 @@ int __init sysenter_setup(void)
}
/* Setup a VMA at program startup for the vsyscall page */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
unsigned long addr;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 257ba4a10abf..9c98cc6ba978 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -98,7 +98,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
/* Setup a VMA at program startup for the vsyscall page.
Not called for compat tasks */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
unsigned long addr;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 688936044dc9..636ef4caa52d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
* For 64-bit, we must skip the Xen hole in the middle of the address
* space, just after the big x86-64 virtual hole.
*/
-static int xen_pgd_walk(struct mm_struct *mm,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
+static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
{
- pgd_t *pgd = mm->pgd;
int flush = 0;
unsigned hole_low, hole_high;
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -753,6 +752,14 @@ out:
return flush;
}
+static int xen_pgd_walk(struct mm_struct *mm,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
+{
+ return __xen_pgd_walk(mm, mm->pgd, func, limit);
+}
+
/* If we're using split pte locks, then take the page's lock and
return a pointer to it. Otherwise return NULL. */
static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
@@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
xen_mc_batch();
- if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
+ if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
/* re-enable interrupts for flushing */
xen_mc_issue(0);
@@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
PT_PMD);
#endif
- xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
+ __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
xen_mc_issue(0);
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d77da613b1d2..acd9b6705e02 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -362,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu)
alternatives_smp_switch(0);
}
-static void xen_play_dead(void)
+static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d7422dc2a55c..9e1afae8461f 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu);
void xen_mark_init_mm_pinned(void);
-void __init xen_setup_vcpu_info_placement(void);
+void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);