summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig101
-rw-r--r--arch/x86/Kconfig.debug10
-rw-r--r--arch/x86/boot/Makefile5
-rw-r--r--arch/x86/boot/boot.h10
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/boot/compressed/aslr.c267
-rw-r--r--arch/x86/boot/compressed/cmdline.c2
-rw-r--r--arch/x86/boot/compressed/cpuflags.c12
-rw-r--r--arch/x86/boot/compressed/eboot.c789
-rw-r--r--arch/x86/boot/compressed/eboot.h9
-rw-r--r--arch/x86/boot/compressed/head_32.S10
-rw-r--r--arch/x86/boot/compressed/head_64.S16
-rw-r--r--arch/x86/boot/compressed/misc.c18
-rw-r--r--arch/x86/boot/compressed/misc.h37
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c16
-rw-r--r--arch/x86/boot/cpucheck.c100
-rw-r--r--arch/x86/boot/cpuflags.c104
-rw-r--r--arch/x86/boot/cpuflags.h19
-rw-r--r--arch/x86/boot/tools/build.c40
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/ablk_helper.c149
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c2
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c2
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c2
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c2
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c2
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c2
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c2
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c2
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c4
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c2
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/archrandom.h21
-rw-r--r--arch/x86/include/asm/atomic.h29
-rw-r--r--arch/x86/include/asm/atomic64_64.h28
-rw-r--r--arch/x86/include/asm/bitops.h24
-rw-r--r--arch/x86/include/asm/calling.h50
-rw-r--r--arch/x86/include/asm/cpufeature.h6
-rw-r--r--arch/x86/include/asm/crypto/ablk_helper.h31
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/intel-mid.h113
-rw-r--r--arch/x86/include/asm/intel_mid_vrtc.h (renamed from arch/x86/include/asm/mrst-vrtc.h)4
-rw-r--r--arch/x86/include/asm/jump_label.h2
-rw-r--r--arch/x86/include/asm/kdebug.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/asm/local.h28
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/include/asm/misc.h6
-rw-r--r--arch/x86/include/asm/mpspec.h2
-rw-r--r--arch/x86/include/asm/mrst.h81
-rw-r--r--arch/x86/include/asm/msr.h22
-rw-r--r--arch/x86/include/asm/mutex_64.h4
-rw-r--r--arch/x86/include/asm/page_64_types.h15
-rw-r--r--arch/x86/include/asm/percpu.h11
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/preempt.h100
-rw-r--r--arch/x86/include/asm/prom.h5
-rw-r--r--arch/x86/include/asm/rmwcc.h41
-rw-r--r--arch/x86/include/asm/setup.h4
-rw-r--r--arch/x86/include/asm/simd.h11
-rw-r--r--arch/x86/include/asm/thread_info.h5
-rw-r--r--arch/x86/include/asm/uaccess.h98
-rw-r--r--arch/x86/include/asm/uaccess_32.h29
-rw-r--r--arch/x86/include/asm/uaccess_64.h52
-rw-r--r--arch/x86/include/asm/uv/uv.h10
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h57
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h31
-rw-r--r--arch/x86/include/asm/xen/page-coherent.h38
-rw-r--r--arch/x86/include/asm/xen/page.h31
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h2
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h3
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c90
-rw-r--r--arch/x86/kernel/alternative.c11
-rw-r--r--arch/x86/kernel/apb_timer.c10
-rw-r--r--arch/x86/kernel/apic/apic.c8
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c72
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c8
-rw-r--r--arch/x86/kernel/cpu/centaur.c8
-rw-r--r--arch/x86/kernel/cpu/common.c17
-rw-r--r--arch/x86/kernel/cpu/cpu.h20
-rw-r--r--arch/x86/kernel/cpu/intel.c12
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c27
-rw-r--r--arch/x86/kernel/cpu/perf_event.h6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c83
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c204
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c29
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c10
-rw-r--r--arch/x86/kernel/cpu/rdrand.c14
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/cpu/umc.c4
-rw-r--r--arch/x86/kernel/devicetree.c51
-rw-r--r--arch/x86/kernel/dumpstack.c11
-rw-r--r--arch/x86/kernel/early_printk.c9
-rw-r--r--arch/x86/kernel/entry_32.S7
-rw-r--r--arch/x86/kernel/entry_64.S23
-rw-r--r--arch/x86/kernel/head32.c4
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c7
-rw-r--r--arch/x86/kernel/i8259.c3
-rw-r--r--arch/x86/kernel/irq_32.c34
-rw-r--r--arch/x86/kernel/irq_64.c21
-rw-r--r--arch/x86/kernel/jump_label.c25
-rw-r--r--arch/x86/kernel/kvm.c19
-rw-r--r--arch/x86/kernel/microcode_amd.c1
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c4
-rw-r--r--arch/x86/kernel/preempt.S25
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/process_32.c8
-rw-r--r--arch/x86/kernel/process_64.c10
-rw-r--r--arch/x86/kernel/reboot.c271
-rw-r--r--arch/x86/kernel/rtc.c12
-rw-r--r--arch/x86/kernel/setup.c27
-rw-r--r--arch/x86/kernel/smpboot.c57
-rw-r--r--arch/x86/kernel/sysfb_simplefb.c4
-rw-r--r--arch/x86/kernel/topology.c11
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c7
-rw-r--r--arch/x86/kvm/cpuid.c36
-rw-r--r--arch/x86/kvm/emulate.c14
-rw-r--r--arch/x86/kvm/mmu.c115
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/paging_tmpl.h20
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c173
-rw-r--r--arch/x86/kvm/x86.c45
-rw-r--r--arch/x86/kvm/x86.h1
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/misc.c21
-rw-r--r--arch/x86/lib/msr-smp.c62
-rw-r--r--arch/x86/lib/usercopy.c43
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/mm/fault.c43
-rw-r--r--arch/x86/mm/init.c23
-rw-r--r--arch/x86/mm/init_32.c3
-rw-r--r--arch/x86/net/bpf_jit_comp.c18
-rw-r--r--arch/x86/pci/Makefile2
-rw-r--r--arch/x86/pci/fixup.c18
-rw-r--r--arch/x86/pci/intel_mid_pci.c (renamed from arch/x86/pci/mrst.c)20
-rw-r--r--arch/x86/pci/mmconfig-shared.c7
-rw-r--r--arch/x86/platform/Makefile2
-rw-r--r--arch/x86/platform/efi/Makefile1
-rw-r--r--arch/x86/platform/efi/early_printk.c191
-rw-r--r--arch/x86/platform/efi/efi.c137
-rw-r--r--arch/x86/platform/geode/alix.c2
-rw-r--r--arch/x86/platform/geode/geos.c2
-rw-r--r--arch/x86/platform/geode/net5501.c2
-rw-r--r--arch/x86/platform/intel-mid/Makefile7
-rw-r--r--arch/x86/platform/intel-mid/device_libs/Makefile22
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_bma023.c20
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_emc1403.c41
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c83
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_ipc.c68
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_ipc.h17
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_lis331.c39
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_max3111.c35
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_max7315.c79
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c36
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic.c87
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic.h19
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c47
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c37
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c48
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c49
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c36
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c37
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c54
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c36
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_tca6416.c57
-rw-r--r--arch/x86/platform/intel-mid/early_printk_intel_mid.c (renamed from arch/x86/platform/mrst/early_printk_mrst.c)11
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c213
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_vrtc.c (renamed from arch/x86/platform/mrst/vrtc.c)19
-rw-r--r--arch/x86/platform/intel-mid/sfi.c488
-rw-r--r--arch/x86/platform/mrst/Makefile3
-rw-r--r--arch/x86/platform/mrst/mrst.c1052
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c9
-rw-r--r--arch/x86/platform/uv/Makefile2
-rw-r--r--arch/x86/platform/uv/uv_nmi.c711
-rw-r--r--arch/x86/tools/relocs.c20
-rw-r--r--arch/x86/xen/mmu.c15
-rw-r--r--arch/x86/xen/p2m.c16
-rw-r--r--arch/x86/xen/smp.c9
-rw-r--r--arch/x86/xen/spinlock.c26
187 files changed, 5285 insertions, 3360 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e241a1930c98..d2cff2c55584 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,7 @@ config X86_64
config X86
def_bool y
select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ select ARCH_MIGHT_HAVE_PC_PARPORT
select HAVE_AOUT if X86_32
select HAVE_UNSTABLE_SCHED_CLOCK
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -123,6 +124,7 @@ config X86
select COMPAT_OLD_SIGACTION if IA32_EMULATION
select RTC_LIB
select HAVE_DEBUG_STACKOVERFLOW
+ select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
config INSTRUCTION_DECODER
def_bool y
@@ -254,10 +256,6 @@ config ARCH_HWEIGHT_CFLAGS
default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
-config ARCH_CPU_PROBE_RELEASE
- def_bool y
- depends on HOTPLUG_CPU
-
config ARCH_SUPPORTS_UPROBES
def_bool y
@@ -481,11 +479,12 @@ config X86_INTEL_LPSS
bool "Intel Low Power Subsystem Support"
depends on ACPI
select COMMON_CLK
+ select PINCTRL
---help---
Select to build support for Intel Low Power Subsystem such as
found on Intel Lynxpoint PCH. Selecting this option enables
- things like clock tree (common clock framework) which are needed
- by the LPSS peripheral drivers.
+ things like clock tree (common clock framework) and pincontrol
+ which are needed by the LPSS peripheral drivers.
config X86_RDC321X
bool "RDC R-321x SoC"
@@ -637,10 +636,10 @@ config PARAVIRT_SPINLOCKS
spinlock implementation with something virtualization-friendly
(for example, block the virtual CPU rather than spinning).
- Unfortunately the downside is an up to 5% performance hit on
- native kernels, with various workloads.
+ It has a minimal impact on native kernels and gives a nice performance
+ benefit on paravirtualized KVM / Xen kernels.
- If you are unsure how to answer this question, answer N.
+ If you are unsure how to answer this question, answer Y.
source "arch/x86/xen/Kconfig"
@@ -755,20 +754,25 @@ config DMI
BIOS code.
config GART_IOMMU
- bool "GART IOMMU support" if EXPERT
- default y
+ bool "Old AMD GART IOMMU support"
select SWIOTLB
depends on X86_64 && PCI && AMD_NB
---help---
- Support for full DMA access of devices with 32bit memory access only
- on systems with more than 3GB. This is usually needed for USB,
- sound, many IDE/SATA chipsets and some other devices.
- Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART
- based hardware IOMMU and a software bounce buffer based IOMMU used
- on Intel systems and as fallback.
- The code is only active when needed (enough memory and limited
- device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified
- too.
+ Provides a driver for older AMD Athlon64/Opteron/Turion/Sempron
+ GART based hardware IOMMUs.
+
+ The GART supports full DMA access for devices with 32-bit access
+ limitations, on systems with more than 3 GB. This is usually needed
+ for USB, sound, many IDE/SATA chipsets and some other devices.
+
+ Newer systems typically have a modern AMD IOMMU, supported via
+ the CONFIG_AMD_IOMMU=y config option.
+
+ In normal configurations this driver is only active when needed:
+ there's more than 3 GB of memory and the system contains a
+ 32-bit limited device.
+
+ If unsure, say Y.
config CALGARY_IOMMU
bool "IBM Calgary IOMMU support"
@@ -859,7 +863,7 @@ source "kernel/Kconfig.preempt"
config X86_UP_APIC
bool "Local APIC support on uniprocessors"
- depends on X86_32 && !SMP && !X86_32_NON_STANDARD
+ depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI
---help---
A local APIC (Advanced Programmable Interrupt Controller) is an
integrated interrupt controller in the CPU. If you have a single-CPU
@@ -884,11 +888,11 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
- depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
+ depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
config X86_IO_APIC
def_bool y
- depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
+ depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
config X86_VISWS_APIC
def_bool y
@@ -1032,6 +1036,7 @@ config X86_REBOOTFIXUPS
config MICROCODE
tristate "CPU microcode loading support"
+ depends on CPU_SUP_AMD || CPU_SUP_INTEL
select FW_LOADER
---help---
@@ -1592,7 +1597,7 @@ config EFI_STUB
This kernel feature allows a bzImage to be loaded directly
by EFI firmware without the use of a bootloader.
- See Documentation/x86/efi-stub.txt for more information.
+ See Documentation/efi-stub.txt for more information.
config SECCOMP
def_bool y
@@ -1721,16 +1726,56 @@ config RELOCATABLE
Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
it has been loaded at and the compile time physical address
- (CONFIG_PHYSICAL_START) is ignored.
+ (CONFIG_PHYSICAL_START) is used as the minimum location.
-# Relocation on x86-32 needs some additional build support
+config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image"
+ depends on RELOCATABLE
+ depends on !HIBERNATION
+ default n
+ ---help---
+ Randomizes the physical and virtual address at which the
+ kernel image is decompressed, as a security feature that
+ deters exploit attempts relying on knowledge of the location
+ of kernel internals.
+
+ Entropy is generated using the RDRAND instruction if it
+ is supported. If not, then RDTSC is used, if supported. If
+ neither RDRAND nor RDTSC are supported, then no randomness
+ is introduced.
+
+ The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET,
+ and aligned according to PHYSICAL_ALIGN.
+
+config RANDOMIZE_BASE_MAX_OFFSET
+ hex "Maximum ASLR offset allowed"
+ depends on RANDOMIZE_BASE
+ range 0x0 0x20000000 if X86_32
+ default "0x20000000" if X86_32
+ range 0x0 0x40000000 if X86_64
+ default "0x40000000" if X86_64
+ ---help---
+ Determines the maximal offset in bytes that will be applied to the
+ kernel when Address Space Layout Randomization (ASLR) is active.
+ Must be less than or equal to the actual physical memory on the
+ system. This must be a multiple of CONFIG_PHYSICAL_ALIGN.
+
+ On 32-bit this is limited to 512MiB.
+
+ On 64-bit this is limited by how the kernel fixmap page table is
+ positioned, so this cannot be larger that 1GiB currently. Normally
+ there is a 512MiB to 1.5GiB split between kernel and modules. When
+ this is raised above the 512MiB default, the modules area will
+ shrink to compensate, up to the current maximum 1GiB to 1GiB split.
+
+# Relocation on x86 needs some additional build support
config X86_NEED_RELOCS
def_bool y
- depends on X86_32 && RELOCATABLE
+ depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE)
config PHYSICAL_ALIGN
hex "Alignment value to which kernel should be aligned"
- default "0x1000000"
+ default "0x200000"
range 0x2000 0x1000000 if X86_32
range 0x200000 0x1000000 if X86_64
---help---
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 78d91afb8e50..0f3621ed1db6 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -59,6 +59,16 @@ config EARLY_PRINTK_DBGP
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash. You need usb debug device.
+config EARLY_PRINTK_EFI
+ bool "Early printk via the EFI framebuffer"
+ depends on EFI && EARLY_PRINTK
+ select FONT_SUPPORT
+ ---help---
+ Write kernel log output directly into the EFI framebuffer.
+
+ This is useful for kernel debugging when your machine crashes very
+ early before the console code is initialized.
+
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 379814bc41e3..28f7db7993f5 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage
targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
subdir- := compressed
-setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o
+setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o
setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
setup-y += video-mode.o version.o
@@ -71,7 +71,8 @@ GCOV_PROFILE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
quiet_cmd_image = BUILD $@
-cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/zoffset.h > $@
+cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
+ $(obj)/zoffset.h $@
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index ef72baeff484..50f8c5e0f37e 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -26,9 +26,8 @@
#include <asm/boot.h>
#include <asm/setup.h>
#include "bitops.h"
-#include <asm/cpufeature.h>
-#include <asm/processor-flags.h>
#include "ctype.h"
+#include "cpuflags.h"
/* Useful macros */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option)
return __cmdline_find_option_bool(cmd_line_ptr, option);
}
-
/* cpu.c, cpucheck.c */
-struct cpu_features {
- int level; /* Family, or 64 for x86-64 */
- int model;
- u32 flags[NCAPINTS];
-};
-extern struct cpu_features cpu;
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
int validate_cpu(void);
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index dcd90df10ab4..ae8b5dbbd8c5 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -27,7 +27,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
- $(obj)/piggy.o
+ $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
new file mode 100644
index 000000000000..05957986d123
--- /dev/null
+++ b/arch/x86/boot/compressed/aslr.c
@@ -0,0 +1,267 @@
+#include "misc.h"
+
+#ifdef CONFIG_RANDOMIZE_BASE
+#include <asm/msr.h>
+#include <asm/archrandom.h>
+#include <asm/e820.h>
+
+#define I8254_PORT_CONTROL 0x43
+#define I8254_PORT_COUNTER0 0x40
+#define I8254_CMD_READBACK 0xC0
+#define I8254_SELECT_COUNTER0 0x02
+#define I8254_STATUS_NOTREADY 0x40
+static inline u16 i8254(void)
+{
+ u16 status, timer;
+
+ do {
+ outb(I8254_PORT_CONTROL,
+ I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
+ status = inb(I8254_PORT_COUNTER0);
+ timer = inb(I8254_PORT_COUNTER0);
+ timer |= inb(I8254_PORT_COUNTER0) << 8;
+ } while (status & I8254_STATUS_NOTREADY);
+
+ return timer;
+}
+
+static unsigned long get_random_long(void)
+{
+ unsigned long random;
+
+ if (has_cpuflag(X86_FEATURE_RDRAND)) {
+ debug_putstr("KASLR using RDRAND...\n");
+ if (rdrand_long(&random))
+ return random;
+ }
+
+ if (has_cpuflag(X86_FEATURE_TSC)) {
+ uint32_t raw;
+
+ debug_putstr("KASLR using RDTSC...\n");
+ rdtscl(raw);
+
+ /* Only use the low bits of rdtsc. */
+ random = raw & 0xffff;
+ } else {
+ debug_putstr("KASLR using i8254...\n");
+ random = i8254();
+ }
+
+ /* Extend timer bits poorly... */
+ random |= (random << 16);
+#ifdef CONFIG_X86_64
+ random |= (random << 32);
+#endif
+ return random;
+}
+
+struct mem_vector {
+ unsigned long start;
+ unsigned long size;
+};
+
+#define MEM_AVOID_MAX 5
+struct mem_vector mem_avoid[MEM_AVOID_MAX];
+
+static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
+{
+ /* Item at least partially before region. */
+ if (item->start < region->start)
+ return false;
+ /* Item at least partially after region. */
+ if (item->start + item->size > region->start + region->size)
+ return false;
+ return true;
+}
+
+static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
+{
+ /* Item one is entirely before item two. */
+ if (one->start + one->size <= two->start)
+ return false;
+ /* Item one is entirely after item two. */
+ if (one->start >= two->start + two->size)
+ return false;
+ return true;
+}
+
+static void mem_avoid_init(unsigned long input, unsigned long input_size,
+ unsigned long output, unsigned long output_size)
+{
+ u64 initrd_start, initrd_size;
+ u64 cmd_line, cmd_line_size;
+ unsigned long unsafe, unsafe_len;
+ char *ptr;
+
+ /*
+ * Avoid the region that is unsafe to overlap during
+ * decompression (see calculations at top of misc.c).
+ */
+ unsafe_len = (output_size >> 12) + 32768 + 18;
+ unsafe = (unsigned long)input + input_size - unsafe_len;
+ mem_avoid[0].start = unsafe;
+ mem_avoid[0].size = unsafe_len;
+
+ /* Avoid initrd. */
+ initrd_start = (u64)real_mode->ext_ramdisk_image << 32;
+ initrd_start |= real_mode->hdr.ramdisk_image;
+ initrd_size = (u64)real_mode->ext_ramdisk_size << 32;
+ initrd_size |= real_mode->hdr.ramdisk_size;
+ mem_avoid[1].start = initrd_start;
+ mem_avoid[1].size = initrd_size;
+
+ /* Avoid kernel command line. */
+ cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32;
+ cmd_line |= real_mode->hdr.cmd_line_ptr;
+ /* Calculate size of cmd_line. */
+ ptr = (char *)(unsigned long)cmd_line;
+ for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+ ;
+ mem_avoid[2].start = cmd_line;
+ mem_avoid[2].size = cmd_line_size;
+
+ /* Avoid heap memory. */
+ mem_avoid[3].start = (unsigned long)free_mem_ptr;
+ mem_avoid[3].size = BOOT_HEAP_SIZE;
+
+ /* Avoid stack memory. */
+ mem_avoid[4].start = (unsigned long)free_mem_end_ptr;
+ mem_avoid[4].size = BOOT_STACK_SIZE;
+}
+
+/* Does this memory vector overlap a known avoided area? */
+bool mem_avoid_overlap(struct mem_vector *img)
+{
+ int i;
+
+ for (i = 0; i < MEM_AVOID_MAX; i++) {
+ if (mem_overlaps(img, &mem_avoid[i]))
+ return true;
+ }
+
+ return false;
+}
+
+unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN];
+unsigned long slot_max = 0;
+
+static void slots_append(unsigned long addr)
+{
+ /* Overflowing the slots list should be impossible. */
+ if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
+ CONFIG_PHYSICAL_ALIGN)
+ return;
+
+ slots[slot_max++] = addr;
+}
+
+static unsigned long slots_fetch_random(void)
+{
+ /* Handle case of no slots stored. */
+ if (slot_max == 0)
+ return 0;
+
+ return slots[get_random_long() % slot_max];
+}
+
+static void process_e820_entry(struct e820entry *entry,
+ unsigned long minimum,
+ unsigned long image_size)
+{
+ struct mem_vector region, img;
+
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_RAM)
+ return;
+
+ /* Ignore entries entirely above our maximum. */
+ if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
+ return;
+
+ /* Ignore entries entirely below our minimum. */
+ if (entry->addr + entry->size < minimum)
+ return;
+
+ region.start = entry->addr;
+ region.size = entry->size;
+
+ /* Potentially raise address to minimum location. */
+ if (region.start < minimum)
+ region.start = minimum;
+
+ /* Potentially raise address to meet alignment requirements. */
+ region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
+
+ /* Did we raise the address above the bounds of this e820 region? */
+ if (region.start > entry->addr + entry->size)
+ return;
+
+ /* Reduce size by any delta from the original address. */
+ region.size -= region.start - entry->addr;
+
+ /* Reduce maximum size to fit end of image within maximum limit. */
+ if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
+ region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start;
+
+ /* Walk each aligned slot and check for avoided areas. */
+ for (img.start = region.start, img.size = image_size ;
+ mem_contains(&region, &img) ;
+ img.start += CONFIG_PHYSICAL_ALIGN) {
+ if (mem_avoid_overlap(&img))
+ continue;
+ slots_append(img.start);
+ }
+}
+
+static unsigned long find_random_addr(unsigned long minimum,
+ unsigned long size)
+{
+ int i;
+ unsigned long addr;
+
+ /* Make sure minimum is aligned. */
+ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+
+ /* Verify potential e820 positions, appending to slots list. */
+ for (i = 0; i < real_mode->e820_entries; i++) {
+ process_e820_entry(&real_mode->e820_map[i], minimum, size);
+ }
+
+ return slots_fetch_random();
+}
+
+unsigned char *choose_kernel_location(unsigned char *input,
+ unsigned long input_size,
+ unsigned char *output,
+ unsigned long output_size)
+{
+ unsigned long choice = (unsigned long)output;
+ unsigned long random;
+
+ if (cmdline_find_option_bool("nokaslr")) {
+ debug_putstr("KASLR disabled...\n");
+ goto out;
+ }
+
+ /* Record the various known unsafe memory ranges. */
+ mem_avoid_init((unsigned long)input, input_size,
+ (unsigned long)output, output_size);
+
+ /* Walk e820 and find a random address. */
+ random = find_random_addr(choice, output_size);
+ if (!random) {
+ debug_putstr("KASLR could not find suitable E820 region...\n");
+ goto out;
+ }
+
+ /* Always enforce the minimum. */
+ if (random < choice)
+ goto out;
+
+ choice = random;
+out:
+ return (unsigned char *)choice;
+}
+
+#endif /* CONFIG_RANDOMIZE_BASE */
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index bffd73b45b1f..b68e3033e6b9 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,6 +1,6 @@
#include "misc.h"
-#ifdef CONFIG_EARLY_PRINTK
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
static unsigned long fs;
static inline void set_fs(unsigned long seg)
diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c
new file mode 100644
index 000000000000..aa313466118b
--- /dev/null
+++ b/arch/x86/boot/compressed/cpuflags.c
@@ -0,0 +1,12 @@
+#ifdef CONFIG_RANDOMIZE_BASE
+
+#include "../cpuflags.c"
+
+bool has_cpuflag(int flag)
+{
+ get_cpuflags();
+
+ return test_bit(flag, cpu.flags);
+}
+
+#endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index b7388a425f09..a7677babf946 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -19,214 +19,10 @@
static efi_system_table_t *sys_table;
-static void efi_char16_printk(efi_char16_t *str)
-{
- struct efi_simple_text_output_protocol *out;
-
- out = (struct efi_simple_text_output_protocol *)sys_table->con_out;
- efi_call_phys2(out->output_string, out, str);
-}
-
-static void efi_printk(char *str)
-{
- char *s8;
-
- for (s8 = str; *s8; s8++) {
- efi_char16_t ch[2] = { 0 };
-
- ch[0] = *s8;
- if (*s8 == '\n') {
- efi_char16_t nl[2] = { '\r', 0 };
- efi_char16_printk(nl);
- }
-
- efi_char16_printk(ch);
- }
-}
-
-static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size,
- unsigned long *desc_size)
-{
- efi_memory_desc_t *m = NULL;
- efi_status_t status;
- unsigned long key;
- u32 desc_version;
-
- *map_size = sizeof(*m) * 32;
-again:
- /*
- * Add an additional efi_memory_desc_t because we're doing an
- * allocation which may be in a new descriptor region.
- */
- *map_size += sizeof(*m);
- status = efi_call_phys3(sys_table->boottime->allocate_pool,
- EFI_LOADER_DATA, *map_size, (void **)&m);
- if (status != EFI_SUCCESS)
- goto fail;
-
- status = efi_call_phys5(sys_table->boottime->get_memory_map, map_size,
- m, &key, desc_size, &desc_version);
- if (status == EFI_BUFFER_TOO_SMALL) {
- efi_call_phys1(sys_table->boottime->free_pool, m);
- goto again;
- }
-
- if (status != EFI_SUCCESS)
- efi_call_phys1(sys_table->boottime->free_pool, m);
-fail:
- *map = m;
- return status;
-}
-
-/*
- * Allocate at the highest possible address that is not above 'max'.
- */
-static efi_status_t high_alloc(unsigned long size, unsigned long align,
- unsigned long *addr, unsigned long max)
-{
- unsigned long map_size, desc_size;
- efi_memory_desc_t *map;
- efi_status_t status;
- unsigned long nr_pages;
- u64 max_addr = 0;
- int i;
-
- status = __get_map(&map, &map_size, &desc_size);
- if (status != EFI_SUCCESS)
- goto fail;
-
- nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
-again:
- for (i = 0; i < map_size / desc_size; i++) {
- efi_memory_desc_t *desc;
- unsigned long m = (unsigned long)map;
- u64 start, end;
-
- desc = (efi_memory_desc_t *)(m + (i * desc_size));
- if (desc->type != EFI_CONVENTIONAL_MEMORY)
- continue;
-
- if (desc->num_pages < nr_pages)
- continue;
+#include "../../../../drivers/firmware/efi/efi-stub-helper.c"
- start = desc->phys_addr;
- end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT);
- if ((start + size) > end || (start + size) > max)
- continue;
-
- if (end - size > max)
- end = max;
-
- if (round_down(end - size, align) < start)
- continue;
-
- start = round_down(end - size, align);
-
- /*
- * Don't allocate at 0x0. It will confuse code that
- * checks pointers against NULL.
- */
- if (start == 0x0)
- continue;
-
- if (start > max_addr)
- max_addr = start;
- }
-
- if (!max_addr)
- status = EFI_NOT_FOUND;
- else {
- status = efi_call_phys4(sys_table->boottime->allocate_pages,
- EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
- nr_pages, &max_addr);
- if (status != EFI_SUCCESS) {
- max = max_addr;
- max_addr = 0;
- goto again;
- }
-
- *addr = max_addr;
- }
-
-free_pool:
- efi_call_phys1(sys_table->boottime->free_pool, map);
-
-fail:
- return status;
-}
-
-/*
- * Allocate at the lowest possible address.
- */
-static efi_status_t low_alloc(unsigned long size, unsigned long align,
- unsigned long *addr)
-{
- unsigned long map_size, desc_size;
- efi_memory_desc_t *map;
- efi_status_t status;
- unsigned long nr_pages;
- int i;
-
- status = __get_map(&map, &map_size, &desc_size);
- if (status != EFI_SUCCESS)
- goto fail;
-
- nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
- for (i = 0; i < map_size / desc_size; i++) {
- efi_memory_desc_t *desc;
- unsigned long m = (unsigned long)map;
- u64 start, end;
-
- desc = (efi_memory_desc_t *)(m + (i * desc_size));
-
- if (desc->type != EFI_CONVENTIONAL_MEMORY)
- continue;
-
- if (desc->num_pages < nr_pages)
- continue;
-
- start = desc->phys_addr;
- end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT);
-
- /*
- * Don't allocate at 0x0. It will confuse code that
- * checks pointers against NULL. Skip the first 8
- * bytes so we start at a nice even number.
- */
- if (start == 0x0)
- start += 8;
-
- start = round_up(start, align);
- if ((start + size) > end)
- continue;
-
- status = efi_call_phys4(sys_table->boottime->allocate_pages,
- EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
- nr_pages, &start);
- if (status == EFI_SUCCESS) {
- *addr = start;
- break;
- }
- }
-
- if (i == map_size / desc_size)
- status = EFI_NOT_FOUND;
-
-free_pool:
- efi_call_phys1(sys_table->boottime->free_pool, map);
-fail:
- return status;
-}
-
-static void low_free(unsigned long size, unsigned long addr)
-{
- unsigned long nr_pages;
-
- nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
- efi_call_phys2(sys_table->boottime->free_pages, addr, nr_pages);
-}
static void find_bits(unsigned long mask, u8 *pos, u8 *size)
{
@@ -624,242 +420,6 @@ void setup_graphics(struct boot_params *boot_params)
}
}
-struct initrd {
- efi_file_handle_t *handle;
- u64 size;
-};
-
-/*
- * Check the cmdline for a LILO-style initrd= arguments.
- *
- * We only support loading an initrd from the same filesystem as the
- * kernel image.
- */
-static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
- struct setup_header *hdr)
-{
- struct initrd *initrds;
- unsigned long initrd_addr;
- efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
- u64 initrd_total;
- efi_file_io_interface_t *io;
- efi_file_handle_t *fh;
- efi_status_t status;
- int nr_initrds;
- char *str;
- int i, j, k;
-
- initrd_addr = 0;
- initrd_total = 0;
-
- str = (char *)(unsigned long)hdr->cmd_line_ptr;
-
- j = 0; /* See close_handles */
-
- if (!str || !*str)
- return EFI_SUCCESS;
-
- for (nr_initrds = 0; *str; nr_initrds++) {
- str = strstr(str, "initrd=");
- if (!str)
- break;
-
- str += 7;
-
- /* Skip any leading slashes */
- while (*str == '/' || *str == '\\')
- str++;
-
- while (*str && *str != ' ' && *str != '\n')
- str++;
- }
-
- if (!nr_initrds)
- return EFI_SUCCESS;
-
- status = efi_call_phys3(sys_table->boottime->allocate_pool,
- EFI_LOADER_DATA,
- nr_initrds * sizeof(*initrds),
- &initrds);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for initrds\n");
- goto fail;
- }
-
- str = (char *)(unsigned long)hdr->cmd_line_ptr;
- for (i = 0; i < nr_initrds; i++) {
- struct initrd *initrd;
- efi_file_handle_t *h;
- efi_file_info_t *info;
- efi_char16_t filename_16[256];
- unsigned long info_sz;
- efi_guid_t info_guid = EFI_FILE_INFO_ID;
- efi_char16_t *p;
- u64 file_sz;
-
- str = strstr(str, "initrd=");
- if (!str)
- break;
-
- str += 7;
-
- initrd = &initrds[i];
- p = filename_16;
-
- /* Skip any leading slashes */
- while (*str == '/' || *str == '\\')
- str++;
-
- while (*str && *str != ' ' && *str != '\n') {
- if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16))
- break;
-
- if (*str == '/') {
- *p++ = '\\';
- *str++;
- } else {
- *p++ = *str++;
- }
- }
-
- *p = '\0';
-
- /* Only open the volume once. */
- if (!i) {
- efi_boot_services_t *boottime;
-
- boottime = sys_table->boottime;
-
- status = efi_call_phys3(boottime->handle_protocol,
- image->device_handle, &fs_proto, &io);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to handle fs_proto\n");
- goto free_initrds;
- }
-
- status = efi_call_phys2(io->open_volume, io, &fh);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to open volume\n");
- goto free_initrds;
- }
- }
-
- status = efi_call_phys5(fh->open, fh, &h, filename_16,
- EFI_FILE_MODE_READ, (u64)0);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to open initrd file: ");
- efi_char16_printk(filename_16);
- efi_printk("\n");
- goto close_handles;
- }
-
- initrd->handle = h;
-
- info_sz = 0;
- status = efi_call_phys4(h->get_info, h, &info_guid,
- &info_sz, NULL);
- if (status != EFI_BUFFER_TOO_SMALL) {
- efi_printk("Failed to get initrd info size\n");
- goto close_handles;
- }
-
-grow:
- status = efi_call_phys3(sys_table->boottime->allocate_pool,
- EFI_LOADER_DATA, info_sz, &info);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for initrd info\n");
- goto close_handles;
- }
-
- status = efi_call_phys4(h->get_info, h, &info_guid,
- &info_sz, info);
- if (status == EFI_BUFFER_TOO_SMALL) {
- efi_call_phys1(sys_table->boottime->free_pool, info);
- goto grow;
- }
-
- file_sz = info->file_size;
- efi_call_phys1(sys_table->boottime->free_pool, info);
-
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to get initrd info\n");
- goto close_handles;
- }
-
- initrd->size = file_sz;
- initrd_total += file_sz;
- }
-
- if (initrd_total) {
- unsigned long addr;
-
- /*
- * Multiple initrd's need to be at consecutive
- * addresses in memory, so allocate enough memory for
- * all the initrd's.
- */
- status = high_alloc(initrd_total, 0x1000,
- &initrd_addr, hdr->initrd_addr_max);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc highmem for initrds\n");
- goto close_handles;
- }
-
- /* We've run out of free low memory. */
- if (initrd_addr > hdr->initrd_addr_max) {
- efi_printk("We've run out of free low memory\n");
- status = EFI_INVALID_PARAMETER;
- goto free_initrd_total;
- }
-
- addr = initrd_addr;
- for (j = 0; j < nr_initrds; j++) {
- u64 size;
-
- size = initrds[j].size;
- while (size) {
- u64 chunksize;
- if (size > EFI_READ_CHUNK_SIZE)
- chunksize = EFI_READ_CHUNK_SIZE;
- else
- chunksize = size;
- status = efi_call_phys3(fh->read,
- initrds[j].handle,
- &chunksize, addr);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to read initrd\n");
- goto free_initrd_total;
- }
- addr += chunksize;
- size -= chunksize;
- }
-
- efi_call_phys1(fh->close, initrds[j].handle);
- }
-
- }
-
- efi_call_phys1(sys_table->boottime->free_pool, initrds);
-
- hdr->ramdisk_image = initrd_addr;
- hdr->ramdisk_size = initrd_total;
-
- return status;
-
-free_initrd_total:
- low_free(initrd_total, initrd_addr);
-
-close_handles:
- for (k = j; k < i; k++)
- efi_call_phys1(fh->close, initrds[k].handle);
-free_initrds:
- efi_call_phys1(sys_table->boottime->free_pool, initrds);
-fail:
- hdr->ramdisk_image = 0;
- hdr->ramdisk_size = 0;
-
- return status;
-}
/*
* Because the x86 boot code expects to be passed a boot_params we
@@ -875,14 +435,15 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
struct efi_info *efi;
efi_loaded_image_t *image;
void *options;
- u32 load_options_size;
efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
int options_size = 0;
efi_status_t status;
- unsigned long cmdline;
+ char *cmdline_ptr;
u16 *s2;
u8 *s1;
int i;
+ unsigned long ramdisk_addr;
+ unsigned long ramdisk_size;
sys_table = _table;
@@ -893,13 +454,14 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
status = efi_call_phys3(sys_table->boottime->handle_protocol,
handle, &proto, (void *)&image);
if (status != EFI_SUCCESS) {
- efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
+ efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
return NULL;
}
- status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
+ status = efi_low_alloc(sys_table, 0x4000, 1,
+ (unsigned long *)&boot_params);
if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc lowmem for boot params\n");
+ efi_printk(sys_table, "Failed to alloc lowmem for boot params\n");
return NULL;
}
@@ -926,40 +488,11 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
hdr->type_of_loader = 0x21;
/* Convert unicode cmdline to ascii */
- options = image->load_options;
- load_options_size = image->load_options_size / 2; /* ASCII */
- cmdline = 0;
- s2 = (u16 *)options;
-
- if (s2) {
- while (*s2 && *s2 != '\n' && options_size < load_options_size) {
- s2++;
- options_size++;
- }
-
- if (options_size) {
- if (options_size > hdr->cmdline_size)
- options_size = hdr->cmdline_size;
-
- options_size++; /* NUL termination */
-
- status = low_alloc(options_size, 1, &cmdline);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for cmdline\n");
- goto fail;
- }
-
- s1 = (u8 *)(unsigned long)cmdline;
- s2 = (u16 *)options;
-
- for (i = 0; i < options_size - 1; i++)
- *s1++ = *s2++;
-
- *s1 = '\0';
- }
- }
-
- hdr->cmd_line_ptr = cmdline;
+ cmdline_ptr = efi_convert_cmdline_to_ascii(sys_table, image,
+ &options_size);
+ if (!cmdline_ptr)
+ goto fail;
+ hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
hdr->ramdisk_image = 0;
hdr->ramdisk_size = 0;
@@ -969,96 +502,64 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
memset(sdt, 0, sizeof(*sdt));
- status = handle_ramdisks(image, hdr);
+ status = handle_cmdline_files(sys_table, image,
+ (char *)(unsigned long)hdr->cmd_line_ptr,
+ "initrd=", hdr->initrd_addr_max,
+ &ramdisk_addr, &ramdisk_size);
if (status != EFI_SUCCESS)
goto fail2;
+ hdr->ramdisk_image = ramdisk_addr;
+ hdr->ramdisk_size = ramdisk_size;
return boot_params;
fail2:
- if (options_size)
- low_free(options_size, hdr->cmd_line_ptr);
+ efi_free(sys_table, options_size, hdr->cmd_line_ptr);
fail:
- low_free(0x4000, (unsigned long)boot_params);
+ efi_free(sys_table, 0x4000, (unsigned long)boot_params);
return NULL;
}
-static efi_status_t exit_boot(struct boot_params *boot_params,
- void *handle)
+static void add_e820ext(struct boot_params *params,
+ struct setup_data *e820ext, u32 nr_entries)
{
- struct efi_info *efi = &boot_params->efi_info;
- struct e820entry *e820_map = &boot_params->e820_map[0];
- struct e820entry *prev = NULL;
- unsigned long size, key, desc_size, _size;
- efi_memory_desc_t *mem_map;
+ struct setup_data *data;
efi_status_t status;
- __u32 desc_version;
- bool called_exit = false;
- u8 nr_entries;
- int i;
-
- size = sizeof(*mem_map) * 32;
-
-again:
- size += sizeof(*mem_map) * 2;
- _size = size;
- status = low_alloc(size, 1, (unsigned long *)&mem_map);
- if (status != EFI_SUCCESS)
- return status;
-
-get_map:
- status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
- mem_map, &key, &desc_size, &desc_version);
- if (status == EFI_BUFFER_TOO_SMALL) {
- low_free(_size, (unsigned long)mem_map);
- goto again;
- }
+ unsigned long size;
- if (status != EFI_SUCCESS)
- goto free_mem_map;
+ e820ext->type = SETUP_E820_EXT;
+ e820ext->len = nr_entries * sizeof(struct e820entry);
+ e820ext->next = 0;
- memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32));
- efi->efi_systab = (unsigned long)sys_table;
- efi->efi_memdesc_size = desc_size;
- efi->efi_memdesc_version = desc_version;
- efi->efi_memmap = (unsigned long)mem_map;
- efi->efi_memmap_size = size;
-
-#ifdef CONFIG_X86_64
- efi->efi_systab_hi = (unsigned long)sys_table >> 32;
- efi->efi_memmap_hi = (unsigned long)mem_map >> 32;
-#endif
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
- /* Might as well exit boot services now */
- status = efi_call_phys2(sys_table->boottime->exit_boot_services,
- handle, key);
- if (status != EFI_SUCCESS) {
- /*
- * ExitBootServices() will fail if any of the event
- * handlers change the memory map. In which case, we
- * must be prepared to retry, but only once so that
- * we're guaranteed to exit on repeated failures instead
- * of spinning forever.
- */
- if (called_exit)
- goto free_mem_map;
+ while (data && data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
- called_exit = true;
- goto get_map;
- }
+ if (data)
+ data->next = (unsigned long)e820ext;
+ else
+ params->hdr.setup_data = (unsigned long)e820ext;
+}
- /* Historic? */
- boot_params->alt_mem_k = 32 * 1024;
+static efi_status_t setup_e820(struct boot_params *params,
+ struct setup_data *e820ext, u32 e820ext_size)
+{
+ struct e820entry *e820_map = &params->e820_map[0];
+ struct efi_info *efi = &params->efi_info;
+ struct e820entry *prev = NULL;
+ u32 nr_entries;
+ u32 nr_desc;
+ int i;
- /*
- * Convert the EFI memory map to E820.
- */
nr_entries = 0;
- for (i = 0; i < size / desc_size; i++) {
+ nr_desc = efi->efi_memmap_size / efi->efi_memdesc_size;
+
+ for (i = 0; i < nr_desc; i++) {
efi_memory_desc_t *d;
unsigned int e820_type = 0;
- unsigned long m = (unsigned long)mem_map;
+ unsigned long m = efi->efi_memmap;
- d = (efi_memory_desc_t *)(m + (i * desc_size));
+ d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size));
switch (d->type) {
case EFI_RESERVED_TYPE:
case EFI_RUNTIME_SERVICES_CODE:
@@ -1095,61 +596,151 @@ get_map:
/* Merge adjacent mappings */
if (prev && prev->type == e820_type &&
- (prev->addr + prev->size) == d->phys_addr)
+ (prev->addr + prev->size) == d->phys_addr) {
prev->size += d->num_pages << 12;
- else {
- e820_map->addr = d->phys_addr;
- e820_map->size = d->num_pages << 12;
- e820_map->type = e820_type;
- prev = e820_map++;
- nr_entries++;
+ continue;
+ }
+
+ if (nr_entries == ARRAY_SIZE(params->e820_map)) {
+ u32 need = (nr_desc - i) * sizeof(struct e820entry) +
+ sizeof(struct setup_data);
+
+ if (!e820ext || e820ext_size < need)
+ return EFI_BUFFER_TOO_SMALL;
+
+ /* boot_params map full, switch to e820 extended */
+ e820_map = (struct e820entry *)e820ext->data;
}
+
+ e820_map->addr = d->phys_addr;
+ e820_map->size = d->num_pages << PAGE_SHIFT;
+ e820_map->type = e820_type;
+ prev = e820_map++;
+ nr_entries++;
}
- boot_params->e820_entries = nr_entries;
+ if (nr_entries > ARRAY_SIZE(params->e820_map)) {
+ u32 nr_e820ext = nr_entries - ARRAY_SIZE(params->e820_map);
+
+ add_e820ext(params, e820ext, nr_e820ext);
+ nr_entries -= nr_e820ext;
+ }
+
+ params->e820_entries = (u8)nr_entries;
return EFI_SUCCESS;
+}
+
+static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
+ u32 *e820ext_size)
+{
+ efi_status_t status;
+ unsigned long size;
+
+ size = sizeof(struct setup_data) +
+ sizeof(struct e820entry) * nr_desc;
+
+ if (*e820ext) {
+ efi_call_phys1(sys_table->boottime->free_pool, *e820ext);
+ *e820ext = NULL;
+ *e820ext_size = 0;
+ }
+
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, size, e820ext);
+
+ if (status == EFI_SUCCESS)
+ *e820ext_size = size;
-free_mem_map:
- low_free(_size, (unsigned long)mem_map);
return status;
}
-static efi_status_t relocate_kernel(struct setup_header *hdr)
+static efi_status_t exit_boot(struct boot_params *boot_params,
+ void *handle)
{
- unsigned long start, nr_pages;
+ struct efi_info *efi = &boot_params->efi_info;
+ unsigned long map_sz, key, desc_size;
+ efi_memory_desc_t *mem_map;
+ struct setup_data *e820ext;
+ __u32 e820ext_size;
+ __u32 nr_desc, prev_nr_desc;
efi_status_t status;
+ __u32 desc_version;
+ bool called_exit = false;
+ u8 nr_entries;
+ int i;
- /*
- * The EFI firmware loader could have placed the kernel image
- * anywhere in memory, but the kernel has various restrictions
- * on the max physical address it can run at. Attempt to move
- * the kernel to boot_params.pref_address, or as low as
- * possible.
- */
- start = hdr->pref_address;
- nr_pages = round_up(hdr->init_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+ nr_desc = 0;
+ e820ext = NULL;
+ e820ext_size = 0;
- status = efi_call_phys4(sys_table->boottime->allocate_pages,
- EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
- nr_pages, &start);
- if (status != EFI_SUCCESS) {
- status = low_alloc(hdr->init_size, hdr->kernel_alignment,
- &start);
+get_map:
+ status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size,
+ &desc_version, &key);
+
+ if (status != EFI_SUCCESS)
+ return status;
+
+ prev_nr_desc = nr_desc;
+ nr_desc = map_sz / desc_size;
+ if (nr_desc > prev_nr_desc &&
+ nr_desc > ARRAY_SIZE(boot_params->e820_map)) {
+ u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map);
+
+ status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size);
if (status != EFI_SUCCESS)
- efi_printk("Failed to alloc mem for kernel\n");
+ goto free_mem_map;
+
+ efi_call_phys1(sys_table->boottime->free_pool, mem_map);
+ goto get_map; /* Allocated memory, get map again */
}
- if (status == EFI_SUCCESS)
- memcpy((void *)start, (void *)(unsigned long)hdr->code32_start,
- hdr->init_size);
+ memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32));
+ efi->efi_systab = (unsigned long)sys_table;
+ efi->efi_memdesc_size = desc_size;
+ efi->efi_memdesc_version = desc_version;
+ efi->efi_memmap = (unsigned long)mem_map;
+ efi->efi_memmap_size = map_sz;
+
+#ifdef CONFIG_X86_64
+ efi->efi_systab_hi = (unsigned long)sys_table >> 32;
+ efi->efi_memmap_hi = (unsigned long)mem_map >> 32;
+#endif
- hdr->pref_address = hdr->code32_start;
- hdr->code32_start = (__u32)start;
+ /* Might as well exit boot services now */
+ status = efi_call_phys2(sys_table->boottime->exit_boot_services,
+ handle, key);
+ if (status != EFI_SUCCESS) {
+ /*
+ * ExitBootServices() will fail if any of the event
+ * handlers change the memory map. In which case, we
+ * must be prepared to retry, but only once so that
+ * we're guaranteed to exit on repeated failures instead
+ * of spinning forever.
+ */
+ if (called_exit)
+ goto free_mem_map;
+ called_exit = true;
+ efi_call_phys1(sys_table->boottime->free_pool, mem_map);
+ goto get_map;
+ }
+
+ /* Historic? */
+ boot_params->alt_mem_k = 32 * 1024;
+
+ status = setup_e820(boot_params, e820ext, e820ext_size);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ return EFI_SUCCESS;
+
+free_mem_map:
+ efi_call_phys1(sys_table->boottime->free_pool, mem_map);
return status;
}
+
/*
* On success we return a pointer to a boot_params structure, and NULL
* on failure.
@@ -1157,7 +748,7 @@ static efi_status_t relocate_kernel(struct setup_header *hdr)
struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
struct boot_params *boot_params)
{
- struct desc_ptr *gdt, *idt;
+ struct desc_ptr *gdt;
efi_loaded_image_t *image;
struct setup_header *hdr = &boot_params->hdr;
efi_status_t status;
@@ -1177,37 +768,33 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
EFI_LOADER_DATA, sizeof(*gdt),
(void **)&gdt);
if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for gdt structure\n");
+ efi_printk(sys_table, "Failed to alloc mem for gdt structure\n");
goto fail;
}
gdt->size = 0x800;
- status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for gdt\n");
- goto fail;
- }
-
- status = efi_call_phys3(sys_table->boottime->allocate_pool,
- EFI_LOADER_DATA, sizeof(*idt),
- (void **)&idt);
+ status = efi_low_alloc(sys_table, gdt->size, 8,
+ (unsigned long *)&gdt->address);
if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc mem for idt structure\n");
+ efi_printk(sys_table, "Failed to alloc mem for gdt\n");
goto fail;
}
- idt->size = 0;
- idt->address = 0;
-
/*
* If the kernel isn't already loaded at the preferred load
* address, relocate it.
*/
if (hdr->pref_address != hdr->code32_start) {
- status = relocate_kernel(hdr);
-
+ unsigned long bzimage_addr = hdr->code32_start;
+ status = efi_relocate_kernel(sys_table, &bzimage_addr,
+ hdr->init_size, hdr->init_size,
+ hdr->pref_address,
+ hdr->kernel_alignment);
if (status != EFI_SUCCESS)
goto fail;
+
+ hdr->pref_address = hdr->code32_start;
+ hdr->code32_start = bzimage_addr;
}
status = exit_boot(boot_params, handle);
@@ -1267,10 +854,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
desc->base2 = 0x00;
#endif /* CONFIG_X86_64 */
- asm volatile ("lidt %0" : : "m" (*idt));
- asm volatile ("lgdt %0" : : "m" (*gdt));
-
asm volatile("cli");
+ asm volatile ("lgdt %0" : : "m" (*gdt));
return boot_params;
fail:
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index e5b0a8f91c5f..81b6b652b46a 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -11,9 +11,6 @@
#define DESC_TYPE_CODE_DATA (1 << 0)
-#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT)
-#define EFI_READ_CHUNK_SIZE (1024 * 1024)
-
#define EFI_CONSOLE_OUT_DEVICE_GUID \
EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
0x3f, 0xc1, 0x4d)
@@ -62,10 +59,4 @@ struct efi_uga_draw_protocol {
void *blt;
};
-struct efi_simple_text_output_protocol {
- void *reset;
- void *output_string;
- void *test_string;
-};
-
#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 5d6f6891b188..9116aac232c7 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -117,9 +117,11 @@ preferred_addr:
addl %eax, %ebx
notl %eax
andl %eax, %ebx
-#else
- movl $LOAD_PHYSICAL_ADDR, %ebx
+ cmpl $LOAD_PHYSICAL_ADDR, %ebx
+ jge 1f
#endif
+ movl $LOAD_PHYSICAL_ADDR, %ebx
+1:
/* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx
@@ -191,14 +193,14 @@ relocated:
leal boot_heap(%ebx), %eax
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
- call decompress_kernel
+ call decompress_kernel /* returns kernel location in %eax */
addl $24, %esp
/*
* Jump to the decompressed kernel.
*/
xorl %ebx, %ebx
- jmp *%ebp
+ jmp *%eax
/*
* Stack and heap for uncompression
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index c337422b575d..c5c1ae0997e7 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -94,9 +94,11 @@ ENTRY(startup_32)
addl %eax, %ebx
notl %eax
andl %eax, %ebx
-#else
- movl $LOAD_PHYSICAL_ADDR, %ebx
+ cmpl $LOAD_PHYSICAL_ADDR, %ebx
+ jge 1f
#endif
+ movl $LOAD_PHYSICAL_ADDR, %ebx
+1:
/* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx
@@ -269,9 +271,11 @@ preferred_addr:
addq %rax, %rbp
notq %rax
andq %rax, %rbp
-#else
- movq $LOAD_PHYSICAL_ADDR, %rbp
+ cmpq $LOAD_PHYSICAL_ADDR, %rbp
+ jge 1f
#endif
+ movq $LOAD_PHYSICAL_ADDR, %rbp
+1:
/* Target address to relocate to for decompression */
leaq z_extract_offset(%rbp), %rbx
@@ -339,13 +343,13 @@ relocated:
movl $z_input_len, %ecx /* input_len */
movq %rbp, %r8 /* output target address */
movq $z_output_len, %r9 /* decompressed length */
- call decompress_kernel
+ call decompress_kernel /* returns kernel location in %rax */
popq %rsi
/*
* Jump to the decompressed kernel.
*/
- jmp *%rbp
+ jmp *%rax
.code32
no_longmode:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 434f077d2c4d..196eaf373a06 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -112,14 +112,8 @@ struct boot_params *real_mode; /* Pointer to real-mode data */
void *memset(void *s, int c, size_t n);
void *memcpy(void *dest, const void *src, size_t n);
-#ifdef CONFIG_X86_64
-#define memptr long
-#else
-#define memptr unsigned
-#endif
-
-static memptr free_mem_ptr;
-static memptr free_mem_end_ptr;
+memptr free_mem_ptr;
+memptr free_mem_end_ptr;
static char *vidmem;
static int vidport;
@@ -395,7 +389,7 @@ static void parse_elf(void *output)
free(phdrs);
}
-asmlinkage void decompress_kernel(void *rmode, memptr heap,
+asmlinkage void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
@@ -422,6 +416,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
+ output = choose_kernel_location(input_data, input_len,
+ output, output_len);
+
+ /* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
error("Destination address inappropriately aligned");
#ifdef CONFIG_X86_64
@@ -441,5 +439,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
parse_elf(output);
handle_relocations(output, output_len);
debug_putstr("done.\nBooting the kernel.\n");
- return;
+ return output;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 674019d8e235..24e3e569a13c 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,7 +23,15 @@
#define BOOT_BOOT_H
#include "../ctype.h"
+#ifdef CONFIG_X86_64
+#define memptr long
+#else
+#define memptr unsigned
+#endif
+
/* misc.c */
+extern memptr free_mem_ptr;
+extern memptr free_mem_end_ptr;
extern struct boot_params *real_mode; /* Pointer to real-mode data */
void __putstr(const char *s);
#define error_putstr(__x) __putstr(__x)
@@ -39,23 +47,40 @@ static inline void debug_putstr(const char *s)
#endif
-#ifdef CONFIG_EARLY_PRINTK
-
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option);
+#endif
-/* early_serial_console.c */
-extern int early_serial_base;
-void console_init(void);
+#if CONFIG_RANDOMIZE_BASE
+/* aslr.c */
+unsigned char *choose_kernel_location(unsigned char *input,
+ unsigned long input_size,
+ unsigned char *output,
+ unsigned long output_size);
+/* cpuflags.c */
+bool has_cpuflag(int flag);
#else
+static inline
+unsigned char *choose_kernel_location(unsigned char *input,
+ unsigned long input_size,
+ unsigned char *output,
+ unsigned long output_size)
+{
+ return output;
+}
+#endif
+#ifdef CONFIG_EARLY_PRINTK
/* early_serial_console.c */
+extern int early_serial_base;
+void console_init(void);
+#else
static const int early_serial_base;
static inline void console_init(void)
{ }
-
#endif
#endif
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 958a641483dd..b669ab65bf6c 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -36,11 +36,12 @@ int main(int argc, char *argv[])
uint32_t olen;
long ilen;
unsigned long offs;
- FILE *f;
+ FILE *f = NULL;
+ int retval = 1;
if (argc < 2) {
fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
- return 1;
+ goto bail;
}
/* Get the information for the compressed kernel image first */
@@ -48,7 +49,7 @@ int main(int argc, char *argv[])
f = fopen(argv[1], "r");
if (!f) {
perror(argv[1]);
- return 1;
+ goto bail;
}
@@ -58,12 +59,11 @@ int main(int argc, char *argv[])
if (fread(&olen, sizeof(olen), 1, f) != 1) {
perror(argv[1]);
- return 1;
+ goto bail;
}
ilen = ftell(f);
olen = get_unaligned_le32(&olen);
- fclose(f);
/*
* Now we have the input (compressed) and output (uncompressed)
@@ -91,5 +91,9 @@ int main(int argc, char *argv[])
printf(".incbin \"%s\"\n", argv[1]);
printf("input_data_end:\n");
- return 0;
+ retval = 0;
+bail:
+ if (f)
+ fclose(f);
+ return retval;
}
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 4d3ff037201f..100a9a10076a 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -28,8 +28,6 @@
#include <asm/required-features.h>
#include <asm/msr-index.h>
-struct cpu_features cpu;
-static u32 cpu_vendor[3];
static u32 err_flags[NCAPINTS];
static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
@@ -69,92 +67,8 @@ static int is_transmeta(void)
cpu_vendor[2] == A32('M', 'x', '8', '6');
}
-static int has_fpu(void)
-{
- u16 fcw = -1, fsw = -1;
- u32 cr0;
-
- asm("movl %%cr0,%0" : "=r" (cr0));
- if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
- cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
- asm volatile("movl %0,%%cr0" : : "r" (cr0));
- }
-
- asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
- : "+m" (fsw), "+m" (fcw));
-
- return fsw == 0 && (fcw & 0x103f) == 0x003f;
-}
-
-static int has_eflag(u32 mask)
-{
- u32 f0, f1;
-
- asm("pushfl ; "
- "pushfl ; "
- "popl %0 ; "
- "movl %0,%1 ; "
- "xorl %2,%1 ; "
- "pushl %1 ; "
- "popfl ; "
- "pushfl ; "
- "popl %1 ; "
- "popfl"
- : "=&r" (f0), "=&r" (f1)
- : "ri" (mask));
-
- return !!((f0^f1) & mask);
-}
-
-static void get_flags(void)
-{
- u32 max_intel_level, max_amd_level;
- u32 tfms;
-
- if (has_fpu())
- set_bit(X86_FEATURE_FPU, cpu.flags);
-
- if (has_eflag(X86_EFLAGS_ID)) {
- asm("cpuid"
- : "=a" (max_intel_level),
- "=b" (cpu_vendor[0]),
- "=d" (cpu_vendor[1]),
- "=c" (cpu_vendor[2])
- : "a" (0));
-
- if (max_intel_level >= 0x00000001 &&
- max_intel_level <= 0x0000ffff) {
- asm("cpuid"
- : "=a" (tfms),
- "=c" (cpu.flags[4]),
- "=d" (cpu.flags[0])
- : "a" (0x00000001)
- : "ebx");
- cpu.level = (tfms >> 8) & 15;
- cpu.model = (tfms >> 4) & 15;
- if (cpu.level >= 6)
- cpu.model += ((tfms >> 16) & 0xf) << 4;
- }
-
- asm("cpuid"
- : "=a" (max_amd_level)
- : "a" (0x80000000)
- : "ebx", "ecx", "edx");
-
- if (max_amd_level >= 0x80000001 &&
- max_amd_level <= 0x8000ffff) {
- u32 eax = 0x80000001;
- asm("cpuid"
- : "+a" (eax),
- "=c" (cpu.flags[6]),
- "=d" (cpu.flags[1])
- : : "ebx");
- }
- }
-}
-
/* Returns a bitmask of which words we have error bits in */
-static int check_flags(void)
+static int check_cpuflags(void)
{
u32 err;
int i;
@@ -187,8 +101,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
if (has_eflag(X86_EFLAGS_AC))
cpu.level = 4;
- get_flags();
- err = check_flags();
+ get_cpuflags();
+ err = check_cpuflags();
if (test_bit(X86_FEATURE_LM, cpu.flags))
cpu.level = 64;
@@ -207,8 +121,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
eax &= ~(1 << 15);
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
- get_flags(); /* Make sure it really did something */
- err = check_flags();
+ get_cpuflags(); /* Make sure it really did something */
+ err = check_cpuflags();
} else if (err == 0x01 &&
!(err_flags[0] & ~(1 << X86_FEATURE_CX8)) &&
is_centaur() && cpu.model >= 6) {
@@ -223,7 +137,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
set_bit(X86_FEATURE_CX8, cpu.flags);
- err = check_flags();
+ err = check_cpuflags();
} else if (err == 0x01 && is_transmeta()) {
/* Transmeta might have masked feature bits in word 0 */
@@ -238,7 +152,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
: : "ecx", "ebx");
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
- err = check_flags();
+ err = check_cpuflags();
}
if (err_flags_ptr)
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
new file mode 100644
index 000000000000..a9fcb7cfb241
--- /dev/null
+++ b/arch/x86/boot/cpuflags.c
@@ -0,0 +1,104 @@
+#include <linux/types.h>
+#include "bitops.h"
+
+#include <asm/processor-flags.h>
+#include <asm/required-features.h>
+#include <asm/msr-index.h>
+#include "cpuflags.h"
+
+struct cpu_features cpu;
+u32 cpu_vendor[3];
+
+static bool loaded_flags;
+
+static int has_fpu(void)
+{
+ u16 fcw = -1, fsw = -1;
+ unsigned long cr0;
+
+ asm volatile("mov %%cr0,%0" : "=r" (cr0));
+ if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
+ cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
+ asm volatile("mov %0,%%cr0" : : "r" (cr0));
+ }
+
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+ : "+m" (fsw), "+m" (fcw));
+
+ return fsw == 0 && (fcw & 0x103f) == 0x003f;
+}
+
+int has_eflag(unsigned long mask)
+{
+ unsigned long f0, f1;
+
+ asm volatile("pushf \n\t"
+ "pushf \n\t"
+ "pop %0 \n\t"
+ "mov %0,%1 \n\t"
+ "xor %2,%1 \n\t"
+ "push %1 \n\t"
+ "popf \n\t"
+ "pushf \n\t"
+ "pop %1 \n\t"
+ "popf"
+ : "=&r" (f0), "=&r" (f1)
+ : "ri" (mask));
+
+ return !!((f0^f1) & mask);
+}
+
+/* Handle x86_32 PIC using ebx. */
+#if defined(__i386__) && defined(__PIC__)
+# define EBX_REG "=r"
+#else
+# define EBX_REG "=b"
+#endif
+
+static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
+{
+ asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t"
+ "cpuid \n\t"
+ ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t"
+ : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b)
+ : "a" (id)
+ );
+}
+
+void get_cpuflags(void)
+{
+ u32 max_intel_level, max_amd_level;
+ u32 tfms;
+ u32 ignored;
+
+ if (loaded_flags)
+ return;
+ loaded_flags = true;
+
+ if (has_fpu())
+ set_bit(X86_FEATURE_FPU, cpu.flags);
+
+ if (has_eflag(X86_EFLAGS_ID)) {
+ cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2],
+ &cpu_vendor[1]);
+
+ if (max_intel_level >= 0x00000001 &&
+ max_intel_level <= 0x0000ffff) {
+ cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
+ &cpu.flags[0]);
+ cpu.level = (tfms >> 8) & 15;
+ cpu.model = (tfms >> 4) & 15;
+ if (cpu.level >= 6)
+ cpu.model += ((tfms >> 16) & 0xf) << 4;
+ }
+
+ cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
+ &ignored);
+
+ if (max_amd_level >= 0x80000001 &&
+ max_amd_level <= 0x8000ffff) {
+ cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
+ &cpu.flags[1]);
+ }
+ }
+}
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
new file mode 100644
index 000000000000..ea97697e51e4
--- /dev/null
+++ b/arch/x86/boot/cpuflags.h
@@ -0,0 +1,19 @@
+#ifndef BOOT_CPUFLAGS_H
+#define BOOT_CPUFLAGS_H
+
+#include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
+
+struct cpu_features {
+ int level; /* Family, or 64 for x86-64 */
+ int model;
+ u32 flags[NCAPINTS];
+};
+
+extern struct cpu_features cpu;
+extern u32 cpu_vendor[3];
+
+int has_eflag(unsigned long mask);
+void get_cpuflags(void);
+
+#endif
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index c941d6a8887f..8e15b22391fc 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -5,14 +5,15 @@
*/
/*
- * This file builds a disk-image from two different files:
+ * This file builds a disk-image from three different files:
*
* - setup: 8086 machine code, sets up system parm
* - system: 80386 code for actual system
+ * - zoffset.h: header with ZO_* defines
*
- * It does some checking that all files are of the correct type, and
- * just writes the result to stdout, removing headers and padding to
- * the right amount. It also writes some system data to stderr.
+ * It does some checking that all files are of the correct type, and writes
+ * the result to the specified destination, removing headers and padding to
+ * the right amount. It also writes some system data to stdout.
*/
/*
@@ -136,7 +137,7 @@ static void die(const char * str, ...)
static void usage(void)
{
- die("Usage: build setup system [zoffset.h] [> image]");
+ die("Usage: build setup system zoffset.h image");
}
#ifdef CONFIG_EFI_STUB
@@ -265,7 +266,7 @@ int main(int argc, char ** argv)
int c;
u32 sys_size;
struct stat sb;
- FILE *file;
+ FILE *file, *dest;
int fd;
void *kernel;
u32 crc = 0xffffffffUL;
@@ -280,10 +281,13 @@ int main(int argc, char ** argv)
startup_64 = 0x200;
#endif
- if (argc == 4)
- parse_zoffset(argv[3]);
- else if (argc != 3)
+ if (argc != 5)
usage();
+ parse_zoffset(argv[3]);
+
+ dest = fopen(argv[4], "w");
+ if (!dest)
+ die("Unable to write `%s': %m", argv[4]);
/* Copy the setup code */
file = fopen(argv[1], "r");
@@ -318,7 +322,7 @@ int main(int argc, char ** argv)
/* Set the default root device */
put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
- fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i);
+ printf("Setup is %d bytes (padded to %d bytes).\n", c, i);
/* Open and stat the kernel file */
fd = open(argv[2], O_RDONLY);
@@ -327,7 +331,7 @@ int main(int argc, char ** argv)
if (fstat(fd, &sb))
die("Unable to stat `%s': %m", argv[2]);
sz = sb.st_size;
- fprintf (stderr, "System is %d kB\n", (sz+1023)/1024);
+ printf("System is %d kB\n", (sz+1023)/1024);
kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
if (kernel == MAP_FAILED)
die("Unable to mmap '%s': %m", argv[2]);
@@ -348,27 +352,31 @@ int main(int argc, char ** argv)
#endif
crc = partial_crc32(buf, i, crc);
- if (fwrite(buf, 1, i, stdout) != i)
+ if (fwrite(buf, 1, i, dest) != i)
die("Writing setup failed");
/* Copy the kernel code */
crc = partial_crc32(kernel, sz, crc);
- if (fwrite(kernel, 1, sz, stdout) != sz)
+ if (fwrite(kernel, 1, sz, dest) != sz)
die("Writing kernel failed");
/* Add padding leaving 4 bytes for the checksum */
while (sz++ < (sys_size*16) - 4) {
crc = partial_crc32_one('\0', crc);
- if (fwrite("\0", 1, 1, stdout) != 1)
+ if (fwrite("\0", 1, 1, dest) != 1)
die("Writing padding failed");
}
/* Write the CRC */
- fprintf(stderr, "CRC %x\n", crc);
+ printf("CRC %x\n", crc);
put_unaligned_le32(crc, buf);
- if (fwrite(buf, 1, 4, stdout) != 4)
+ if (fwrite(buf, 1, 4, dest) != 4)
die("Writing CRC failed");
+ /* Catch any delayed write failures */
+ if (fclose(dest))
+ die("Writing image failed");
+
close(fd);
/* Everything is OK */
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 7d6ba9db1be9..e0fc24db234a 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -3,8 +3,9 @@
#
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
+avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
+ $(comma)4)$(comma)%ymm2,yes,no)
-obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c
deleted file mode 100644
index 43282fe04a8b..000000000000
--- a/arch/x86/crypto/ablk_helper.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Shared async block cipher helpers
- *
- * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Based on aesni-intel_glue.c by:
- * Copyright (C) 2008, Intel Corp.
- * Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
- * USA
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <crypto/algapi.h>
-#include <crypto/cryptd.h>
-#include <asm/i387.h>
-#include <asm/crypto/ablk_helper.h>
-
-int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
- unsigned int key_len)
-{
- struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
- struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
- int err;
-
- crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
- & CRYPTO_TFM_REQ_MASK);
- err = crypto_ablkcipher_setkey(child, key, key_len);
- crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
- & CRYPTO_TFM_RES_MASK);
- return err;
-}
-EXPORT_SYMBOL_GPL(ablk_set_key);
-
-int __ablk_encrypt(struct ablkcipher_request *req)
-{
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
- struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
- struct blkcipher_desc desc;
-
- desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
- desc.info = req->info;
- desc.flags = 0;
-
- return crypto_blkcipher_crt(desc.tfm)->encrypt(
- &desc, req->dst, req->src, req->nbytes);
-}
-EXPORT_SYMBOL_GPL(__ablk_encrypt);
-
-int ablk_encrypt(struct ablkcipher_request *req)
-{
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
- struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
- if (!irq_fpu_usable()) {
- struct ablkcipher_request *cryptd_req =
- ablkcipher_request_ctx(req);
-
- memcpy(cryptd_req, req, sizeof(*req));
- ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
- return crypto_ablkcipher_encrypt(cryptd_req);
- } else {
- return __ablk_encrypt(req);
- }
-}
-EXPORT_SYMBOL_GPL(ablk_encrypt);
-
-int ablk_decrypt(struct ablkcipher_request *req)
-{
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
- struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
- if (!irq_fpu_usable()) {
- struct ablkcipher_request *cryptd_req =
- ablkcipher_request_ctx(req);
-
- memcpy(cryptd_req, req, sizeof(*req));
- ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
- return crypto_ablkcipher_decrypt(cryptd_req);
- } else {
- struct blkcipher_desc desc;
-
- desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
- desc.info = req->info;
- desc.flags = 0;
-
- return crypto_blkcipher_crt(desc.tfm)->decrypt(
- &desc, req->dst, req->src, req->nbytes);
- }
-}
-EXPORT_SYMBOL_GPL(ablk_decrypt);
-
-void ablk_exit(struct crypto_tfm *tfm)
-{
- struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
-
- cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-EXPORT_SYMBOL_GPL(ablk_exit);
-
-int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
-{
- struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- ctx->cryptd_tfm = cryptd_tfm;
- tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
- crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(ablk_init_common);
-
-int ablk_init(struct crypto_tfm *tfm)
-{
- char drv_name[CRYPTO_MAX_ALG_NAME];
-
- snprintf(drv_name, sizeof(drv_name), "__driver-%s",
- crypto_tfm_alg_driver_name(tfm));
-
- return ablk_init_common(tfm, drv_name);
-}
-EXPORT_SYMBOL_GPL(ablk_init);
-
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index f80e668785c0..835488b745ee 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -34,7 +34,7 @@
#include <asm/cpu_device_id.h>
#include <asm/i387.h>
#include <asm/crypto/aes.h>
-#include <asm/crypto/ablk_helper.h>
+#include <crypto/ablk_helper.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <linux/workqueue.h>
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 414fe5d7946b..4209a76fcdaa 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
+#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
@@ -21,7 +22,6 @@
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/camellia.h>
-#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 37fd0c0a81ea..87a041a10f4a 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
+#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
@@ -21,7 +22,6 @@
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/camellia.h>
-#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index c6631813dc11..e6a3700489b9 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -26,13 +26,13 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
+#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/cast5.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
-#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#define CAST5_PARALLEL_BLOCKS 16
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 8d0dfb86a559..09f3677393e4 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
+#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/cast6.h>
#include <crypto/cryptd.h>
@@ -37,7 +38,6 @@
#include <crypto/xts.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
-#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#define CAST6_PARALLEL_BLOCKS 8
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 23aabc6c20a5..2fae489b1524 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
+#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/ctr.h>
#include <crypto/lrw.h>
@@ -22,7 +23,6 @@
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/serpent-avx.h>
-#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#define SERPENT_AVX2_PARALLEL_BLOCKS 16
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 9ae83cf8d21e..ff4870870972 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
+#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/serpent.h>
#include <crypto/cryptd.h>
@@ -38,7 +39,6 @@
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/serpent-avx.h>
-#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
/* 8-way parallel cipher functions */
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 97a356ece24d..8c95f8637306 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -34,6 +34,7 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
+#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/serpent.h>
#include <crypto/cryptd.h>
@@ -42,7 +43,6 @@
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/crypto/serpent-sse2.h>
-#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 50226c4b86ed..f248546da1ca 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -281,7 +281,7 @@ static int __init sha256_ssse3_mod_init(void)
/* allow AVX to override SSSE3, it's a little faster */
if (avx_usable()) {
#ifdef CONFIG_AS_AVX2
- if (boot_cpu_has(X86_FEATURE_AVX2))
+ if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2))
sha256_transform_asm = sha256_transform_rorx;
else
#endif
@@ -319,4 +319,4 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS("sha256");
-MODULE_ALIAS("sha384");
+MODULE_ALIAS("sha224");
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index a62ba541884e..4e3c665be129 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
+#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/twofish.h>
#include <crypto/cryptd.h>
@@ -39,7 +40,6 @@
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/twofish.h>
-#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#include <crypto/scatterwalk.h>
#include <linux/workqueue.h>
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index b1977bad5435..c8c1e700c26e 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -26,6 +26,7 @@
#include <acpi/pdc_intel.h>
#include <asm/numa.h>
+#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 0d9ec770f2f8..e6a92455740e 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -39,6 +39,20 @@
#ifdef CONFIG_ARCH_RANDOM
+/* Instead of arch_get_random_long() when alternatives haven't run. */
+static inline int rdrand_long(unsigned long *v)
+{
+ int ok;
+ asm volatile("1: " RDRAND_LONG "\n\t"
+ "jc 2f\n\t"
+ "decl %0\n\t"
+ "jnz 1b\n\t"
+ "2:"
+ : "=r" (ok), "=a" (*v)
+ : "0" (RDRAND_RETRY_LOOPS));
+ return ok;
+}
+
#define GET_RANDOM(name, type, rdrand, nop) \
static inline int name(type *v) \
{ \
@@ -68,6 +82,13 @@ GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
#endif /* CONFIG_X86_64 */
+#else
+
+static inline int rdrand_long(unsigned long *v)
+{
+ return 0;
+}
+
#endif /* CONFIG_ARCH_RANDOM */
extern void x86_init_rdrand(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 722aa3b04624..da31c8b8a92d 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -6,6 +6,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
#include <asm/cmpxchg.h>
+#include <asm/rmwcc.h>
/*
* Atomic operations that C can't guarantee us. Useful for
@@ -76,12 +77,7 @@ static inline void atomic_sub(int i, atomic_t *v)
*/
static inline int atomic_sub_and_test(int i, atomic_t *v)
{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
- : "+m" (v->counter), "=qm" (c)
- : "ir" (i) : "memory");
- return c;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, i, "%0", "e");
}
/**
@@ -118,12 +114,7 @@ static inline void atomic_dec(atomic_t *v)
*/
static inline int atomic_dec_and_test(atomic_t *v)
{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "decl %0; sete %1"
- : "+m" (v->counter), "=qm" (c)
- : : "memory");
- return c != 0;
+ GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
}
/**
@@ -136,12 +127,7 @@ static inline int atomic_dec_and_test(atomic_t *v)
*/
static inline int atomic_inc_and_test(atomic_t *v)
{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "incl %0; sete %1"
- : "+m" (v->counter), "=qm" (c)
- : : "memory");
- return c != 0;
+ GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
}
/**
@@ -155,12 +141,7 @@ static inline int atomic_inc_and_test(atomic_t *v)
*/
static inline int atomic_add_negative(int i, atomic_t *v)
{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
- : "+m" (v->counter), "=qm" (c)
- : "ir" (i) : "memory");
- return c;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, i, "%0", "s");
}
/**
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 0e1cbfc8ee06..3f065c985aee 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -72,12 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
*/
static inline int atomic64_sub_and_test(long i, atomic64_t *v)
{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
- : "=m" (v->counter), "=qm" (c)
- : "er" (i), "m" (v->counter) : "memory");
- return c;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, i, "%0", "e");
}
/**
@@ -116,12 +111,7 @@ static inline void atomic64_dec(atomic64_t *v)
*/
static inline int atomic64_dec_and_test(atomic64_t *v)
{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "decq %0; sete %1"
- : "=m" (v->counter), "=qm" (c)
- : "m" (v->counter) : "memory");
- return c != 0;
+ GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
}
/**
@@ -134,12 +124,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v)
*/
static inline int atomic64_inc_and_test(atomic64_t *v)
{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "incq %0; sete %1"
- : "=m" (v->counter), "=qm" (c)
- : "m" (v->counter) : "memory");
- return c != 0;
+ GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
}
/**
@@ -153,12 +138,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v)
*/
static inline int atomic64_add_negative(long i, atomic64_t *v)
{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
- : "=m" (v->counter), "=qm" (c)
- : "er" (i), "m" (v->counter) : "memory");
- return c;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, i, "%0", "s");
}
/**
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 41639ce8fd63..6d76d0935989 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -14,6 +14,7 @@
#include <linux/compiler.h>
#include <asm/alternative.h>
+#include <asm/rmwcc.h>
#if BITS_PER_LONG == 32
# define _BITOPS_LONG_SHIFT 5
@@ -204,12 +205,7 @@ static inline void change_bit(long nr, volatile unsigned long *addr)
*/
static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
- int oldbit;
-
- asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
- "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
- return oldbit;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, nr, "%0", "c");
}
/**
@@ -255,13 +251,7 @@ static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- int oldbit;
-
- asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
- "sbb %0,%0"
- : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
- return oldbit;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, nr, "%0", "c");
}
/**
@@ -314,13 +304,7 @@ static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
*/
static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
- int oldbit;
-
- asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
- "sbb %0,%0"
- : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
- return oldbit;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, nr, "%0", "c");
}
static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 0fa675033912..cb4c73bfeb48 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -48,6 +48,8 @@ For 32-bit we have the following conventions - kernel is built with
#include <asm/dwarf2.h>
+#ifdef CONFIG_X86_64
+
/*
* 64-bit system call stack frame layout defines and helpers,
* for assembly code:
@@ -192,3 +194,51 @@ For 32-bit we have the following conventions - kernel is built with
.macro icebp
.byte 0xf1
.endm
+
+#else /* CONFIG_X86_64 */
+
+/*
+ * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
+ * are different from the entry_32.S versions in not changing the segment
+ * registers. So only suitable for in kernel use, not when transitioning
+ * from or to user space. The resulting stack frame is not a standard
+ * pt_regs frame. The main use case is calling C code from assembler
+ * when all the registers need to be preserved.
+ */
+
+ .macro SAVE_ALL
+ pushl_cfi %eax
+ CFI_REL_OFFSET eax, 0
+ pushl_cfi %ebp
+ CFI_REL_OFFSET ebp, 0
+ pushl_cfi %edi
+ CFI_REL_OFFSET edi, 0
+ pushl_cfi %esi
+ CFI_REL_OFFSET esi, 0
+ pushl_cfi %edx
+ CFI_REL_OFFSET edx, 0
+ pushl_cfi %ecx
+ CFI_REL_OFFSET ecx, 0
+ pushl_cfi %ebx
+ CFI_REL_OFFSET ebx, 0
+ .endm
+
+ .macro RESTORE_ALL
+ popl_cfi %ebx
+ CFI_RESTORE ebx
+ popl_cfi %ecx
+ CFI_RESTORE ecx
+ popl_cfi %edx
+ CFI_RESTORE edx
+ popl_cfi %esi
+ CFI_RESTORE esi
+ popl_cfi %edi
+ CFI_RESTORE edi
+ popl_cfi %ebp
+ CFI_RESTORE ebp
+ popl_cfi %eax
+ CFI_RESTORE eax
+ .endm
+
+#endif /* CONFIG_X86_64 */
+
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index d3f5c63078d8..89270b4318db 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
* Catch too early usage of this before alternatives
* have run.
*/
- asm goto("1: jmp %l[t_warn]\n"
+ asm_volatile_goto("1: jmp %l[t_warn]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n"
@@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
#endif
- asm goto("1: jmp %l[t_no]\n"
+ asm_volatile_goto("1: jmp %l[t_no]\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n"
@@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
* have. Thus, we force the jump to the widest, 4-byte, signed relative
* offset even though the last would often fit in less bytes.
*/
- asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+ asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
"2:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h
deleted file mode 100644
index 4f93df50c23e..000000000000
--- a/arch/x86/include/asm/crypto/ablk_helper.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Shared async block cipher helpers
- */
-
-#ifndef _CRYPTO_ABLK_HELPER_H
-#define _CRYPTO_ABLK_HELPER_H
-
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <crypto/cryptd.h>
-
-struct async_helper_ctx {
- struct cryptd_ablkcipher *cryptd_tfm;
-};
-
-extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
- unsigned int key_len);
-
-extern int __ablk_encrypt(struct ablkcipher_request *req);
-
-extern int ablk_encrypt(struct ablkcipher_request *req);
-
-extern int ablk_decrypt(struct ablkcipher_request *req);
-
-extern void ablk_exit(struct crypto_tfm *tfm);
-
-extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name);
-
-extern int ablk_init(struct crypto_tfm *tfm);
-
-#endif /* _CRYPTO_ABLK_HELPER_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0062a0125041..65c6e6e3a552 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -109,6 +109,8 @@ static inline bool efi_is_native(void)
return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
}
+extern struct console early_efi_console;
+
#else
/*
* IF EFI is not configured, have the EFI calls return -ENOSYS.
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
new file mode 100644
index 000000000000..459769d39263
--- /dev/null
+++ b/arch/x86/include/asm/intel-mid.h
@@ -0,0 +1,113 @@
+/*
+ * intel-mid.h: Intel MID specific setup code
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _ASM_X86_INTEL_MID_H
+#define _ASM_X86_INTEL_MID_H
+
+#include <linux/sfi.h>
+#include <linux/platform_device.h>
+
+extern int intel_mid_pci_init(void);
+extern int get_gpio_by_name(const char *name);
+extern void intel_scu_device_register(struct platform_device *pdev);
+extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
+extern int __init sfi_parse_mtmr(struct sfi_table_header *table);
+extern int sfi_mrtc_num;
+extern struct sfi_rtc_table_entry sfi_mrtc_array[];
+
+/*
+ * Here defines the array of devices platform data that IAFW would export
+ * through SFI "DEVS" table, we use name and type to match the device and
+ * its platform data.
+ */
+struct devs_id {
+ char name[SFI_NAME_LEN + 1];
+ u8 type;
+ u8 delay;
+ void *(*get_platform_data)(void *info);
+ /* Custom handler for devices */
+ void (*device_handler)(struct sfi_device_table_entry *pentry,
+ struct devs_id *dev);
+};
+
+#define sfi_device(i) \
+ static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \
+ __attribute__((__section__(".x86_intel_mid_dev.init"))) = &i
+
+/*
+ * Medfield is the follow-up of Moorestown, it combines two chip solution into
+ * one. Other than that it also added always-on and constant tsc and lapic
+ * timers. Medfield is the platform name, and the chip name is called Penwell
+ * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be
+ * identified via MSRs.
+ */
+enum intel_mid_cpu_type {
+ /* 1 was Moorestown */
+ INTEL_MID_CPU_CHIP_PENWELL = 2,
+};
+
+extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
+
+#ifdef CONFIG_X86_INTEL_MID
+
+static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void)
+{
+ return __intel_mid_cpu_chip;
+}
+
+static inline bool intel_mid_has_msic(void)
+{
+ return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL);
+}
+
+#else /* !CONFIG_X86_INTEL_MID */
+
+#define intel_mid_identify_cpu() (0)
+#define intel_mid_has_msic() (0)
+
+#endif /* !CONFIG_X86_INTEL_MID */
+
+enum intel_mid_timer_options {
+ INTEL_MID_TIMER_DEFAULT,
+ INTEL_MID_TIMER_APBT_ONLY,
+ INTEL_MID_TIMER_LAPIC_APBT,
+};
+
+extern enum intel_mid_timer_options intel_mid_timer_options;
+
+/*
+ * Penwell uses spread spectrum clock, so the freq number is not exactly
+ * the same as reported by MSR based on SDM.
+ */
+#define PENWELL_FSB_FREQ_83SKU 83200
+#define PENWELL_FSB_FREQ_100SKU 99840
+
+#define SFI_MTMR_MAX_NUM 8
+#define SFI_MRTC_MAX 8
+
+extern struct console early_mrst_console;
+extern void mrst_early_console_init(void);
+
+extern struct console early_hsu_console;
+extern void hsu_early_console_init(const char *);
+
+extern void intel_scu_devices_create(void);
+extern void intel_scu_devices_destroy(void);
+
+/* VRTC timer */
+#define MRST_VRTC_MAP_SZ (1024)
+/*#define MRST_VRTC_PGOFFSET (0xc00) */
+
+extern void intel_mid_rtc_init(void);
+
+/* the offset for the mapping of global gpio pin to irq */
+#define INTEL_MID_IRQ_OFFSET 0x100
+
+#endif /* _ASM_X86_INTEL_MID_H */
diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h
index 1e69a75412a4..86ff4685c409 100644
--- a/arch/x86/include/asm/mrst-vrtc.h
+++ b/arch/x86/include/asm/intel_mid_vrtc.h
@@ -1,5 +1,5 @@
-#ifndef _MRST_VRTC_H
-#define _MRST_VRTC_H
+#ifndef _INTEL_MID_VRTC_H
+#define _INTEL_MID_VRTC_H
extern unsigned char vrtc_cmos_read(unsigned char reg);
extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 64507f35800c..6a2cefb4395a 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -18,7 +18,7 @@
static __always_inline bool arch_static_branch(struct static_key *key)
{
- asm goto("1:"
+ asm_volatile_goto("1:"
".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 2c37aadcbc35..32ce71375b21 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -21,7 +21,7 @@ enum die_val {
DIE_NMIUNKNOWN,
};
-extern void printk_address(unsigned long address, int reliable);
+extern void printk_address(unsigned long address);
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_trace(struct task_struct *t, struct pt_regs *regs,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c76ff74a98f2..5cbf3166257c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+ /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+ return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+ (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
#define SELECTOR_TI_MASK (1 << 2)
#define SELECTOR_RPL_MASK 0x03
@@ -253,7 +260,6 @@ struct kvm_pio_request {
* mode.
*/
struct kvm_mmu {
- void (*new_cr3)(struct kvm_vcpu *vcpu);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root);
unsigned long (*get_cr3)(struct kvm_vcpu *vcpu);
u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
@@ -261,7 +267,6 @@ struct kvm_mmu {
bool prefault);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
- void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
struct x86_exception *exception);
gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
@@ -389,6 +394,8 @@ struct kvm_vcpu_arch {
struct fpu guest_fpu;
u64 xcr0;
+ u64 guest_supported_xcr0;
+ u32 guest_xstate_size;
struct kvm_pio_request pio;
void *pio_data;
@@ -780,11 +787,11 @@ void kvm_mmu_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
-int kvm_mmu_setup(struct kvm_vcpu *vcpu);
+void kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask);
-int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
@@ -922,6 +929,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
void kvm_enable_tdp(void);
void kvm_disable_tdp(void);
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 2d89e3980cbd..5b23e605e707 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -52,12 +52,7 @@ static inline void local_sub(long i, local_t *l)
*/
static inline int local_sub_and_test(long i, local_t *l)
{
- unsigned char c;
-
- asm volatile(_ASM_SUB "%2,%0; sete %1"
- : "+m" (l->a.counter), "=qm" (c)
- : "ir" (i) : "memory");
- return c;
+ GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, i, "%0", "e");
}
/**
@@ -70,12 +65,7 @@ static inline int local_sub_and_test(long i, local_t *l)
*/
static inline int local_dec_and_test(local_t *l)
{
- unsigned char c;
-
- asm volatile(_ASM_DEC "%0; sete %1"
- : "+m" (l->a.counter), "=qm" (c)
- : : "memory");
- return c != 0;
+ GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
}
/**
@@ -88,12 +78,7 @@ static inline int local_dec_and_test(local_t *l)
*/
static inline int local_inc_and_test(local_t *l)
{
- unsigned char c;
-
- asm volatile(_ASM_INC "%0; sete %1"
- : "+m" (l->a.counter), "=qm" (c)
- : : "memory");
- return c != 0;
+ GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
}
/**
@@ -107,12 +92,7 @@ static inline int local_inc_and_test(local_t *l)
*/
static inline int local_add_negative(long i, local_t *l)
{
- unsigned char c;
-
- asm volatile(_ASM_ADD "%2,%0; sets %1"
- : "+m" (l->a.counter), "=qm" (c)
- : "ir" (i) : "memory");
- return c;
+ GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, i, "%0", "s");
}
/**
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index cbe6b9e404ce..c696a8687567 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -16,6 +16,7 @@
#define MCG_EXT_CNT_SHIFT 16
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
+#define MCG_ELOG_P (1ULL<<26) /* Extended error log supported */
/* MCG_STATUS register defines */
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
diff --git a/arch/x86/include/asm/misc.h b/arch/x86/include/asm/misc.h
new file mode 100644
index 000000000000..475f5bbc7f53
--- /dev/null
+++ b/arch/x86/include/asm/misc.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_MISC_H
+#define _ASM_X86_MISC_H
+
+int num_digits(int val);
+
+#endif /* _ASM_X86_MISC_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 626cf70082d7..3142a94c7b4b 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -94,7 +94,7 @@ static inline void early_reserve_e820_mpc_new(void) { }
#define default_get_smp_config x86_init_uint_noop
#endif
-void generic_processor_info(int apicid, int version);
+int generic_processor_info(int apicid, int version);
#ifdef CONFIG_ACPI
extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
deleted file mode 100644
index fc18bf3ce7c8..000000000000
--- a/arch/x86/include/asm/mrst.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * mrst.h: Intel Moorestown platform specific setup code
- *
- * (C) Copyright 2009 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#ifndef _ASM_X86_MRST_H
-#define _ASM_X86_MRST_H
-
-#include <linux/sfi.h>
-
-extern int pci_mrst_init(void);
-extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
-extern int sfi_mrtc_num;
-extern struct sfi_rtc_table_entry sfi_mrtc_array[];
-
-/*
- * Medfield is the follow-up of Moorestown, it combines two chip solution into
- * one. Other than that it also added always-on and constant tsc and lapic
- * timers. Medfield is the platform name, and the chip name is called Penwell
- * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be
- * identified via MSRs.
- */
-enum mrst_cpu_type {
- /* 1 was Moorestown */
- MRST_CPU_CHIP_PENWELL = 2,
-};
-
-extern enum mrst_cpu_type __mrst_cpu_chip;
-
-#ifdef CONFIG_X86_INTEL_MID
-
-static inline enum mrst_cpu_type mrst_identify_cpu(void)
-{
- return __mrst_cpu_chip;
-}
-
-#else /* !CONFIG_X86_INTEL_MID */
-
-#define mrst_identify_cpu() (0)
-
-#endif /* !CONFIG_X86_INTEL_MID */
-
-enum mrst_timer_options {
- MRST_TIMER_DEFAULT,
- MRST_TIMER_APBT_ONLY,
- MRST_TIMER_LAPIC_APBT,
-};
-
-extern enum mrst_timer_options mrst_timer_options;
-
-/*
- * Penwell uses spread spectrum clock, so the freq number is not exactly
- * the same as reported by MSR based on SDM.
- */
-#define PENWELL_FSB_FREQ_83SKU 83200
-#define PENWELL_FSB_FREQ_100SKU 99840
-
-#define SFI_MTMR_MAX_NUM 8
-#define SFI_MRTC_MAX 8
-
-extern struct console early_mrst_console;
-extern void mrst_early_console_init(void);
-
-extern struct console early_hsu_console;
-extern void hsu_early_console_init(const char *);
-
-extern void intel_scu_devices_create(void);
-extern void intel_scu_devices_destroy(void);
-
-/* VRTC timer */
-#define MRST_VRTC_MAP_SZ (1024)
-/*#define MRST_VRTC_PGOFFSET (0xc00) */
-
-extern void mrst_rtc_init(void);
-
-#endif /* _ASM_X86_MRST_H */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index cb7502852acb..e139b13f2a33 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -218,10 +218,14 @@ void msrs_free(struct msr *msrs);
#ifdef CONFIG_SMP
int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
+int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
+int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs);
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h);
+int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q);
+int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q);
int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]);
#else /* CONFIG_SMP */
@@ -235,6 +239,16 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
wrmsr(msr_no, l, h);
return 0;
}
+static inline int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+{
+ rdmsrl(msr_no, *q);
+ return 0;
+}
+static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+{
+ wrmsrl(msr_no, q);
+ return 0;
+}
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
struct msr *msrs)
{
@@ -254,6 +268,14 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
return wrmsr_safe(msr_no, l, h);
}
+static inline int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+{
+ return rdmsrl_safe(msr_no, q);
+}
+static inline int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+{
+ return wrmsrl_safe(msr_no, q);
+}
static inline int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
{
return rdmsr_safe_regs(regs);
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index e7e6751648ed..07537a44216e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -20,7 +20,7 @@
static inline void __mutex_fastpath_lock(atomic_t *v,
void (*fail_fn)(atomic_t *))
{
- asm volatile goto(LOCK_PREFIX " decl %0\n"
+ asm_volatile_goto(LOCK_PREFIX " decl %0\n"
" jns %l[exit]\n"
: : "m" (v->counter)
: "memory", "cc"
@@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count)
static inline void __mutex_fastpath_unlock(atomic_t *v,
void (*fail_fn)(atomic_t *))
{
- asm volatile goto(LOCK_PREFIX " incl %0\n"
+ asm_volatile_goto(LOCK_PREFIX " incl %0\n"
" jg %l[exit]\n"
: : "m" (v->counter)
: "memory", "cc"
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 43dcd804ebd5..8de6d9cf3b95 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -39,9 +39,18 @@
#define __VIRTUAL_MASK_SHIFT 47
/*
- * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
- * arch/x86/kernel/head_64.S), and it is mapped here:
+ * Kernel image size is limited to 1GiB due to the fixmap living in the
+ * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use
+ * 512MiB by default, leaving 1.5GiB for modules once the page tables
+ * are fully set up. If kernel ASLR is configured, it can extend the
+ * kernel page table mapping, reducing the size of the modules area.
*/
-#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
+#define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024)
+#if defined(CONFIG_RANDOMIZE_BASE) && \
+ CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT
+#define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET
+#else
+#define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT
+#endif
#endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0da5200ee79d..94220d14d5cc 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -128,7 +128,8 @@ do { \
do { \
typedef typeof(var) pao_T__; \
const int pao_ID__ = (__builtin_constant_p(val) && \
- ((val) == 1 || (val) == -1)) ? (val) : 0; \
+ ((val) == 1 || (val) == -1)) ? \
+ (int)(val) : 0; \
if (0) { \
pao_T__ pao_tmp__; \
pao_tmp__ = (val); \
@@ -377,9 +378,6 @@ do { \
#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
-#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
-#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val)
#define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val)
#define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val)
@@ -399,9 +397,6 @@ do { \
#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val)
#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val)
#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
-#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
-#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
@@ -446,7 +441,6 @@ do { \
#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
@@ -456,7 +450,6 @@ do { \
#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
-#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 2d883440cb9a..c883bf726398 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t;
#define VMALLOC_START _AC(0xffffc90000000000, UL)
#define VMALLOC_END _AC(0xffffe8ffffffffff, UL)
#define VMEMMAP_START _AC(0xffffea0000000000, UL)
-#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
+#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
new file mode 100644
index 000000000000..8729723636fd
--- /dev/null
+++ b/arch/x86/include/asm/preempt.h
@@ -0,0 +1,100 @@
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <asm/rmwcc.h>
+#include <asm/percpu.h>
+#include <linux/thread_info.h>
+
+DECLARE_PER_CPU(int, __preempt_count);
+
+/*
+ * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
+ * that think a non-zero value indicates we cannot preempt.
+ */
+static __always_inline int preempt_count(void)
+{
+ return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
+}
+
+static __always_inline void preempt_count_set(int pc)
+{
+ __this_cpu_write_4(__preempt_count, pc);
+}
+
+/*
+ * must be macros to avoid header recursion hell
+ */
+#define task_preempt_count(p) \
+ (task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED)
+
+#define init_task_preempt_count(p) do { \
+ task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+ task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \
+ per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
+} while (0)
+
+/*
+ * We fold the NEED_RESCHED bit into the preempt count such that
+ * preempt_enable() can decrement and test for needing to reschedule with a
+ * single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know we both
+ * need to resched (the bit is cleared) and can resched (no preempt count).
+ */
+
+static __always_inline void set_preempt_need_resched(void)
+{
+ __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
+}
+
+static __always_inline void clear_preempt_need_resched(void)
+{
+ __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
+}
+
+static __always_inline bool test_preempt_need_resched(void)
+{
+ return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
+}
+
+/*
+ * The various preempt_count add/sub methods
+ */
+
+static __always_inline void __preempt_count_add(int val)
+{
+ __this_cpu_add_4(__preempt_count, val);
+}
+
+static __always_inline void __preempt_count_sub(int val)
+{
+ __this_cpu_add_4(__preempt_count, -val);
+}
+
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+ GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
+}
+
+/*
+ * Returns true when we need to resched and can (barring IRQ state).
+ */
+static __always_inline bool should_resched(void)
+{
+ return unlikely(!__this_cpu_read_4(__preempt_count));
+}
+
+#ifdef CONFIG_PREEMPT
+ extern asmlinkage void ___preempt_schedule(void);
+# define __preempt_schedule() asm ("call ___preempt_schedule")
+ extern asmlinkage void preempt_schedule(void);
+# ifdef CONFIG_CONTEXT_TRACKING
+ extern asmlinkage void ___preempt_schedule_context(void);
+# define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
+# endif
+#endif
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index bade6ac3b14f..fbeb06ed0eaa 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -39,10 +39,5 @@ static inline void x86_dtb_init(void) { }
extern char cmd_line[COMMAND_LINE_SIZE];
-#define pci_address_to_pio pci_address_to_pio
-unsigned long pci_address_to_pio(phys_addr_t addr);
-
-#define HAVE_ARCH_DEVTREE_FIXUPS
-
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
new file mode 100644
index 000000000000..1ff990f1de8e
--- /dev/null
+++ b/arch/x86/include/asm/rmwcc.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_X86_RMWcc
+#define _ASM_X86_RMWcc
+
+#ifdef CC_HAVE_ASM_GOTO
+
+#define __GEN_RMWcc(fullop, var, cc, ...) \
+do { \
+ asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \
+ : : "m" (var), ## __VA_ARGS__ \
+ : "memory" : cc_label); \
+ return 0; \
+cc_label: \
+ return 1; \
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
+ __GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \
+ __GEN_RMWcc(op " %1, " arg0, var, cc, "er" (val))
+
+#else /* !CC_HAVE_ASM_GOTO */
+
+#define __GEN_RMWcc(fullop, var, cc, ...) \
+do { \
+ char c; \
+ asm volatile (fullop "; set" cc " %1" \
+ : "+m" (var), "=qm" (c) \
+ : __VA_ARGS__ : "memory"); \
+ return c != 0; \
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
+ __GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, val, arg0, cc) \
+ __GEN_RMWcc(op " %2, " arg0, var, cc, "er" (val))
+
+#endif /* CC_HAVE_ASM_GOTO */
+
+#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 347555492dad..59bcf4e22418 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -51,9 +51,9 @@ extern void i386_reserve_resources(void);
extern void setup_default_timer_irq(void);
#ifdef CONFIG_X86_INTEL_MID
-extern void x86_mrst_early_setup(void);
+extern void x86_intel_mid_early_setup(void);
#else
-static inline void x86_mrst_early_setup(void) { }
+static inline void x86_intel_mid_early_setup(void) { }
#endif
#ifdef CONFIG_X86_INTEL_CE
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
new file mode 100644
index 000000000000..ee80b92f0096
--- /dev/null
+++ b/arch/x86/include/asm/simd.h
@@ -0,0 +1,11 @@
+
+#include <asm/i387.h>
+
+/*
+ * may_use_simd - whether it is allowable at this time to issue SIMD
+ * instructions or access the SIMD register file
+ */
+static __must_check inline bool may_use_simd(void)
+{
+ return irq_fpu_usable();
+}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 27811190cbd7..c46a46be1ec6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -28,8 +28,7 @@ struct thread_info {
__u32 flags; /* low level flags */
__u32 status; /* thread synchronous flags */
__u32 cpu; /* current CPU */
- int preempt_count; /* 0 => preemptable,
- <0 => BUG */
+ int saved_preempt_count;
mm_segment_t addr_limit;
struct restart_block restart_block;
void __user *sysenter_return;
@@ -49,7 +48,7 @@ struct thread_info {
.exec_domain = &default_exec_domain, \
.flags = 0, \
.cpu = 0, \
- .preempt_count = INIT_PREEMPT_COUNT, \
+ .saved_preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
.restart_block = { \
.fn = do_no_restart_syscall, \
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5838fa911aa0..8ec57c07b125 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -542,5 +542,103 @@ extern struct movsl_mask {
# include <asm/uaccess_64.h>
#endif
+unsigned long __must_check _copy_from_user(void *to, const void __user *from,
+ unsigned n);
+unsigned long __must_check _copy_to_user(void __user *to, const void *from,
+ unsigned n);
+
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+# define copy_user_diag __compiletime_error
+#else
+# define copy_user_diag __compiletime_warning
+#endif
+
+extern void copy_user_diag("copy_from_user() buffer size is too small")
+copy_from_user_overflow(void);
+extern void copy_user_diag("copy_to_user() buffer size is too small")
+copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
+
+#undef copy_user_diag
+
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+
+extern void
+__compiletime_warning("copy_from_user() buffer size is not provably correct")
+__copy_from_user_overflow(void) __asm__("copy_from_user_overflow");
+#define __copy_from_user_overflow(size, count) __copy_from_user_overflow()
+
+extern void
+__compiletime_warning("copy_to_user() buffer size is not provably correct")
+__copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
+#define __copy_to_user_overflow(size, count) __copy_to_user_overflow()
+
+#else
+
+static inline void
+__copy_from_user_overflow(int size, unsigned long count)
+{
+ WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+}
+
+#define __copy_to_user_overflow __copy_from_user_overflow
+
+#endif
+
+static inline unsigned long __must_check
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ int sz = __compiletime_object_size(to);
+
+ might_fault();
+
+ /*
+ * While we would like to have the compiler do the checking for us
+ * even in the non-constant size case, any false positives there are
+ * a problem (especially when DEBUG_STRICT_USER_COPY_CHECKS, but even
+ * without - the [hopefully] dangerous looking nature of the warning
+ * would make people go look at the respecitive call sites over and
+ * over again just to find that there's no problem).
+ *
+ * And there are cases where it's just not realistic for the compiler
+ * to prove the count to be in range. For example when multiple call
+ * sites of a helper function - perhaps in different source files -
+ * all doing proper range checking, yet the helper function not doing
+ * so again.
+ *
+ * Therefore limit the compile time checking to the constant size
+ * case, and do only runtime checking for non-constant sizes.
+ */
+
+ if (likely(sz < 0 || sz >= n))
+ n = _copy_from_user(to, from, n);
+ else if(__builtin_constant_p(n))
+ copy_from_user_overflow();
+ else
+ __copy_from_user_overflow(sz, n);
+
+ return n;
+}
+
+static inline unsigned long __must_check
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ int sz = __compiletime_object_size(from);
+
+ might_fault();
+
+ /* See the comment in copy_from_user() above. */
+ if (likely(sz < 0 || sz >= n))
+ n = _copy_to_user(to, from, n);
+ else if(__builtin_constant_p(n))
+ copy_to_user_overflow();
+ else
+ __copy_to_user_overflow(sz, n);
+
+ return n;
+}
+
+#undef __copy_from_user_overflow
+#undef __copy_to_user_overflow
+
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 7f760a9f1f61..3c03a5de64d3 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -184,33 +184,4 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from,
return __copy_from_user_ll_nocache_nozero(to, from, n);
}
-unsigned long __must_check copy_to_user(void __user *to,
- const void *from, unsigned long n);
-unsigned long __must_check _copy_from_user(void *to,
- const void __user *from,
- unsigned long n);
-
-
-extern void copy_from_user_overflow(void)
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
- __compiletime_error("copy_from_user() buffer size is not provably correct")
-#else
- __compiletime_warning("copy_from_user() buffer size is not provably correct")
-#endif
-;
-
-static inline unsigned long __must_check copy_from_user(void *to,
- const void __user *from,
- unsigned long n)
-{
- int sz = __compiletime_object_size(to);
-
- if (likely(sz == -1 || sz >= n))
- n = _copy_from_user(to, from, n);
- else
- copy_from_user_overflow();
-
- return n;
-}
-
#endif /* _ASM_X86_UACCESS_32_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 4f7923dd0007..190413d0de57 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -46,42 +46,13 @@ copy_user_generic(void *to, const void *from, unsigned len)
}
__must_check unsigned long
-_copy_to_user(void __user *to, const void *from, unsigned len);
-__must_check unsigned long
-_copy_from_user(void *to, const void __user *from, unsigned len);
-__must_check unsigned long
copy_in_user(void __user *to, const void __user *from, unsigned len);
-static inline unsigned long __must_check copy_from_user(void *to,
- const void __user *from,
- unsigned long n)
-{
- int sz = __compiletime_object_size(to);
-
- might_fault();
- if (likely(sz == -1 || sz >= n))
- n = _copy_from_user(to, from, n);
-#ifdef CONFIG_DEBUG_VM
- else
- WARN(1, "Buffer overflow detected!\n");
-#endif
- return n;
-}
-
static __always_inline __must_check
-int copy_to_user(void __user *dst, const void *src, unsigned size)
-{
- might_fault();
-
- return _copy_to_user(dst, src, size);
-}
-
-static __always_inline __must_check
-int __copy_from_user(void *dst, const void __user *src, unsigned size)
+int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
{
int ret = 0;
- might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
@@ -121,11 +92,17 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
}
static __always_inline __must_check
-int __copy_to_user(void __user *dst, const void *src, unsigned size)
+int __copy_from_user(void *dst, const void __user *src, unsigned size)
+{
+ might_fault();
+ return __copy_from_user_nocheck(dst, src, size);
+}
+
+static __always_inline __must_check
+int __copy_to_user_nocheck(void __user *dst, const void *src, unsigned size)
{
int ret = 0;
- might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size);
switch (size) {
@@ -165,6 +142,13 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size)
}
static __always_inline __must_check
+int __copy_to_user(void __user *dst, const void *src, unsigned size)
+{
+ might_fault();
+ return __copy_to_user_nocheck(dst, src, size);
+}
+
+static __always_inline __must_check
int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
{
int ret = 0;
@@ -220,13 +204,13 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
static __must_check __always_inline int
__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size)
{
- return copy_user_generic(dst, (__force const void *)src, size);
+ return __copy_from_user_nocheck(dst, (__force const void *)src, size);
}
static __must_check __always_inline int
__copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
{
- return copy_user_generic((__force void *)dst, src, size);
+ return __copy_to_user_nocheck((__force void *)dst, src, size);
}
extern long __copy_user_nocache(void *dst, const void __user *src,
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 062921ef34e9..8b1283daa332 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,7 +12,15 @@ extern enum uv_system_type get_uv_system_type(void);
extern int is_uv_system(void);
extern void uv_cpu_init(void);
extern void uv_nmi_init(void);
+extern void uv_register_nmi_notifier(void);
extern void uv_system_init(void);
+extern void (*uv_trace_nmi_func)(unsigned int reason, struct pt_regs *regs);
+extern void (*uv_trace_func)(const char *f, const int l, const char *fmt, ...);
+#define uv_trace(fmt, ...) \
+do { \
+ if (unlikely(uv_trace_func)) \
+ (uv_trace_func)(__func__, __LINE__, fmt, ##__VA_ARGS__);\
+} while (0)
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long start,
@@ -25,6 +33,8 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
static inline int is_uv_system(void) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
+static inline void uv_trace(void *fmt, ...) { }
+static inline void uv_register_nmi_notifier(void) { }
static inline const struct cpumask *
uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
unsigned long start, unsigned long end, unsigned int cpu)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 2c32df95bb78..a30836c8ac4d 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -502,8 +502,8 @@ struct uv_blade_info {
unsigned short nr_online_cpus;
unsigned short pnode;
short memory_nid;
- spinlock_t nmi_lock;
- unsigned long nmi_count;
+ spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
+ unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
};
extern struct uv_blade_info *uv_blade_info;
extern short *uv_node_to_blade;
@@ -576,6 +576,59 @@ static inline int uv_num_possible_blades(void)
return uv_possible_blades;
}
+/* Per Hub NMI support */
+extern void uv_nmi_setup(void);
+
+/* BMC sets a bit this MMR non-zero before sending an NMI */
+#define UVH_NMI_MMR UVH_SCRATCH5
+#define UVH_NMI_MMR_CLEAR UVH_SCRATCH5_ALIAS
+#define UVH_NMI_MMR_SHIFT 63
+#define UVH_NMI_MMR_TYPE "SCRATCH5"
+
+/* Newer SMM NMI handler, not present in all systems */
+#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0
+#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS
+#define UVH_NMI_MMRX_SHIFT (is_uv1_hub() ? \
+ UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :\
+ UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
+#define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
+
+/* Non-zero indicates newer SMM NMI handler present */
+#define UVH_NMI_MMRX_SUPPORTED UVH_EXTIO_INT0_BROADCAST
+
+/* Indicates to BIOS that we want to use the newer SMM NMI handler */
+#define UVH_NMI_MMRX_REQ UVH_SCRATCH5_ALIAS_2
+#define UVH_NMI_MMRX_REQ_SHIFT 62
+
+struct uv_hub_nmi_s {
+ raw_spinlock_t nmi_lock;
+ atomic_t in_nmi; /* flag this node in UV NMI IRQ */
+ atomic_t cpu_owner; /* last locker of this struct */
+ atomic_t read_mmr_count; /* count of MMR reads */
+ atomic_t nmi_count; /* count of true UV NMIs */
+ unsigned long nmi_value; /* last value read from NMI MMR */
+};
+
+struct uv_cpu_nmi_s {
+ struct uv_hub_nmi_s *hub;
+ atomic_t state;
+ atomic_t pinging;
+ int queries;
+ int pings;
+};
+
+DECLARE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi);
+#define uv_cpu_nmi (__get_cpu_var(__uv_cpu_nmi))
+#define uv_hub_nmi (uv_cpu_nmi.hub)
+#define uv_cpu_nmi_per(cpu) (per_cpu(__uv_cpu_nmi, cpu))
+#define uv_hub_nmi_per(cpu) (uv_cpu_nmi_per(cpu).hub)
+
+/* uv_cpu_nmi_states */
+#define UV_NMI_STATE_OUT 0
+#define UV_NMI_STATE_IN 1
+#define UV_NMI_STATE_DUMP 2
+#define UV_NMI_STATE_DUMP_DONE 3
+
/* Update SCIR state */
static inline void uv_set_scir_bits(unsigned char value)
{
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index bd5f80e58a23..e42249bcf7e1 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -461,6 +461,23 @@ union uvh_event_occurred0_u {
/* ========================================================================= */
+/* UVH_EXTIO_INT0_BROADCAST */
+/* ========================================================================= */
+#define UVH_EXTIO_INT0_BROADCAST 0x61448UL
+#define UVH_EXTIO_INT0_BROADCAST_32 0x3f0
+
+#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT 0
+#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK 0x0000000000000001UL
+
+union uvh_extio_int0_broadcast_u {
+ unsigned long v;
+ struct uvh_extio_int0_broadcast_s {
+ unsigned long enable:1; /* RW */
+ unsigned long rsvd_1_63:63;
+ } s;
+};
+
+/* ========================================================================= */
/* UVH_GR0_TLB_INT0_CONFIG */
/* ========================================================================= */
#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
@@ -2606,6 +2623,20 @@ union uvh_scratch5_u {
};
/* ========================================================================= */
+/* UVH_SCRATCH5_ALIAS */
+/* ========================================================================= */
+#define UVH_SCRATCH5_ALIAS 0x2d0208UL
+#define UVH_SCRATCH5_ALIAS_32 0x780
+
+
+/* ========================================================================= */
+/* UVH_SCRATCH5_ALIAS_2 */
+/* ========================================================================= */
+#define UVH_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UVH_SCRATCH5_ALIAS_2_32 0x788
+
+
+/* ========================================================================= */
/* UVXH_EVENT_OCCURRED2 */
/* ========================================================================= */
#define UVXH_EVENT_OCCURRED2 0x70100UL
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
new file mode 100644
index 000000000000..7f02fe4e2c7b
--- /dev/null
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_X86_XEN_PAGE_COHERENT_H
+#define _ASM_X86_XEN_PAGE_COHERENT_H
+
+#include <asm/page.h>
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+
+static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs)
+{
+ void *vstart = (void*)__get_free_pages(flags, get_order(size));
+ *dma_handle = virt_to_phys(vstart);
+ return vstart;
+}
+
+static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ free_pages((unsigned long) cpu_addr, get_order(size));
+}
+
+static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs) { }
+
+static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs) { }
+
+static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
+
+static inline void xen_dma_sync_single_for_device(struct device *hwdev,
+ dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
+
+#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 6aef9fbc09b7..b913915e8e63 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -79,30 +79,38 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
return get_phys_to_machine(pfn) != INVALID_P2M_ENTRY;
}
-static inline unsigned long mfn_to_pfn(unsigned long mfn)
+static inline unsigned long mfn_to_pfn_no_overrides(unsigned long mfn)
{
unsigned long pfn;
- int ret = 0;
+ int ret;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
- if (unlikely(mfn >= machine_to_phys_nr)) {
- pfn = ~0;
- goto try_override;
- }
- pfn = 0;
+ if (unlikely(mfn >= machine_to_phys_nr))
+ return ~0;
+
/*
* The array access can fail (e.g., device space beyond end of RAM).
* In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing!
*/
ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
-try_override:
- /* ret might be < 0 if there are no entries in the m2p for mfn */
if (ret < 0)
- pfn = ~0;
- else if (get_phys_to_machine(pfn) != mfn)
+ return ~0;
+
+ return pfn;
+}
+
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+ unsigned long pfn;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return mfn;
+
+ pfn = mfn_to_pfn_no_overrides(mfn);
+ if (get_phys_to_machine(pfn) != mfn) {
/*
* If this appears to be a foreign mfn (because the pfn
* doesn't map back to the mfn), then check the local override
@@ -111,6 +119,7 @@ try_override:
* m2p_find_override_pfn returns ~0 if it doesn't find anything.
*/
pfn = m2p_find_override_pfn(mfn, ~0);
+ }
/*
* pfn is ~0 if there are no entries in the m2p for mfn or if the
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index c15ddaf90710..9c3733c5f8f7 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -158,7 +158,7 @@ enum {
X86_SUBARCH_PC = 0,
X86_SUBARCH_LGUEST,
X86_SUBARCH_XEN,
- X86_SUBARCH_MRST,
+ X86_SUBARCH_INTEL_MID,
X86_SUBARCH_CE4100,
X86_NR_SUBARCHS,
};
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bb0465090ae5..37813b5ddc37 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -147,6 +147,8 @@
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
+#define MSR_CORE_C1_RES 0x00000660
+
#define MSR_AMD64_MC0_MASK 0xc0010044
#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
@@ -536,6 +538,7 @@
/* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a5408b965c9d..9b0a34e2cd79 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -36,6 +36,8 @@ obj-y += tsc.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
+obj-$(CONFIG_PREEMPT) += preempt.o
+
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 40c76604199f..6c0b43bd024b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -189,24 +189,31 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
return 0;
}
-static void acpi_register_lapic(int id, u8 enabled)
+/**
+ * acpi_register_lapic - register a local apic and generates a logic cpu number
+ * @id: local apic id to register
+ * @enabled: this cpu is enabled or not
+ *
+ * Returns the logic cpu number which maps to the local apic
+ */
+static int acpi_register_lapic(int id, u8 enabled)
{
unsigned int ver = 0;
if (id >= MAX_LOCAL_APIC) {
printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
- return;
+ return -EINVAL;
}
if (!enabled) {
++disabled_cpus;
- return;
+ return -EINVAL;
}
if (boot_cpu_physical_apicid != -1U)
ver = apic_version[boot_cpu_physical_apicid];
- generic_processor_info(id, ver);
+ return generic_processor_info(id, ver);
}
static int __init
@@ -614,84 +621,27 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
#endif
}
-static int _acpi_map_lsapic(acpi_handle handle, int *pcpu)
+static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
{
- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- union acpi_object *obj;
- struct acpi_madt_local_apic *lapic;
- cpumask_var_t tmp_map, new_map;
- u8 physid;
int cpu;
- int retval = -ENOMEM;
-
- if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
- return -EINVAL;
-
- if (!buffer.length || !buffer.pointer)
- return -EINVAL;
-
- obj = buffer.pointer;
- if (obj->type != ACPI_TYPE_BUFFER ||
- obj->buffer.length < sizeof(*lapic)) {
- kfree(buffer.pointer);
- return -EINVAL;
- }
- lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer;
-
- if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC ||
- !(lapic->lapic_flags & ACPI_MADT_ENABLED)) {
- kfree(buffer.pointer);
- return -EINVAL;
- }
-
- physid = lapic->id;
-
- kfree(buffer.pointer);
- buffer.length = ACPI_ALLOCATE_BUFFER;
- buffer.pointer = NULL;
- lapic = NULL;
-
- if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
- goto out;
-
- if (!alloc_cpumask_var(&new_map, GFP_KERNEL))
- goto free_tmp_map;
-
- cpumask_copy(tmp_map, cpu_present_mask);
- acpi_register_lapic(physid, ACPI_MADT_ENABLED);
-
- /*
- * If acpi_register_lapic successfully generates a new logical cpu
- * number, then the following will get us exactly what was mapped
- */
- cpumask_andnot(new_map, cpu_present_mask, tmp_map);
- if (cpumask_empty(new_map)) {
- printk ("Unable to map lapic to logical cpu number\n");
- retval = -EINVAL;
- goto free_new_map;
+ cpu = acpi_register_lapic(physid, ACPI_MADT_ENABLED);
+ if (cpu < 0) {
+ pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
+ return cpu;
}
acpi_processor_set_pdc(handle);
-
- cpu = cpumask_first(new_map);
acpi_map_cpu2node(handle, cpu, physid);
*pcpu = cpu;
- retval = 0;
-
-free_new_map:
- free_cpumask_var(new_map);
-free_tmp_map:
- free_cpumask_var(tmp_map);
-out:
- return retval;
+ return 0;
}
/* wrapper to silence section mismatch warning */
-int __ref acpi_map_lsapic(acpi_handle handle, int *pcpu)
+int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
{
- return _acpi_map_lsapic(handle, pcpu);
+ return _acpi_map_lsapic(handle, physid, pcpu);
}
EXPORT_SYMBOL(acpi_map_lsapic);
@@ -745,7 +695,7 @@ static int __init acpi_parse_sbf(struct acpi_table_header *table)
#ifdef CONFIG_HPET_TIMER
#include <asm/hpet.h>
-static struct __initdata resource *hpet_res;
+static struct resource *hpet_res __initdata;
static int __init acpi_parse_hpet(struct acpi_table_header *table)
{
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 15e8563e5c24..df94598ad05a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -402,17 +402,6 @@ void alternatives_enable_smp(void)
{
struct smp_alt_module *mod;
-#ifdef CONFIG_LOCKDEP
- /*
- * Older binutils section handling bug prevented
- * alternatives-replacement from working reliably.
- *
- * If this still occurs then you should see a hang
- * or crash shortly after this line:
- */
- pr_info("lockdep: fixing up alternatives\n");
-#endif
-
/* Why bother if there are no other CPUs? */
BUG_ON(num_possible_cpus() == 1);
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index c9876efecafb..af5b08ab3b71 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -40,7 +40,7 @@
#include <asm/fixmap.h>
#include <asm/apb_timer.h>
-#include <asm/mrst.h>
+#include <asm/intel-mid.h>
#include <asm/time.h>
#define APBT_CLOCKEVENT_RATING 110
@@ -157,13 +157,13 @@ static int __init apbt_clockevent_register(void)
adev->num = smp_processor_id();
adev->timer = dw_apb_clockevent_init(smp_processor_id(), "apbt0",
- mrst_timer_options == MRST_TIMER_LAPIC_APBT ?
+ intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ?
APBT_CLOCKEVENT_RATING - 100 : APBT_CLOCKEVENT_RATING,
adev_virt_addr(adev), 0, apbt_freq);
/* Firmware does EOI handling for us. */
adev->timer->eoi = NULL;
- if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
+ if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT) {
global_clock_event = &adev->timer->ced;
printk(KERN_DEBUG "%s clockevent registered as global\n",
global_clock_event->name);
@@ -253,7 +253,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
static __init int apbt_late_init(void)
{
- if (mrst_timer_options == MRST_TIMER_LAPIC_APBT ||
+ if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ||
!apb_timer_block_enabled)
return 0;
/* This notifier should be called after workqueue is ready */
@@ -340,7 +340,7 @@ void __init apbt_time_init(void)
}
#ifdef CONFIG_SMP
/* kernel cmdline disable apb timer, so we will use lapic timers */
- if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
+ if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT) {
printk(KERN_INFO "apbt: disabled per cpu timer\n");
return;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a7eb82d9b012..ed165d657380 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2107,7 +2107,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
apic_write(APIC_LVT1, value);
}
-void generic_processor_info(int apicid, int version)
+int generic_processor_info(int apicid, int version)
{
int cpu, max = nr_cpu_ids;
bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2127,7 +2127,7 @@ void generic_processor_info(int apicid, int version)
" Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
disabled_cpus++;
- return;
+ return -ENODEV;
}
if (num_processors >= nr_cpu_ids) {
@@ -2138,7 +2138,7 @@ void generic_processor_info(int apicid, int version)
" Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
disabled_cpus++;
- return;
+ return -EINVAL;
}
num_processors++;
@@ -2183,6 +2183,8 @@ void generic_processor_info(int apicid, int version)
#endif
set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
+
+ return cpu;
}
int hard_smp_processor_id(void)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1191ac1c9d25..ad0dc0428baf 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -39,12 +39,6 @@
#include <asm/x86_init.h>
#include <asm/nmi.h>
-/* BMC sets a bit this MMR non-zero before sending an NMI */
-#define UVH_NMI_MMR UVH_SCRATCH5
-#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
-#define UV_NMI_PENDING_MASK (1UL << 63)
-DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
-
DEFINE_PER_CPU(int, x2apic_extra_bits);
#define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args)
@@ -58,7 +52,6 @@ int uv_min_hub_revision_id;
EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
unsigned int uv_apicid_hibits;
EXPORT_SYMBOL_GPL(uv_apicid_hibits);
-static DEFINE_SPINLOCK(uv_nmi_lock);
static struct apic apic_x2apic_uv_x;
@@ -113,7 +106,7 @@ static int __init early_get_pnodeid(void)
break;
case UV3_HUB_PART_NUMBER:
case UV3_HUB_PART_NUMBER_X:
- uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1;
+ uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
break;
}
@@ -847,68 +840,6 @@ void uv_cpu_init(void)
set_x2apic_extra_bits(uv_hub_info->pnode);
}
-/*
- * When NMI is received, print a stack trace.
- */
-int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
-{
- unsigned long real_uv_nmi;
- int bid;
-
- /*
- * Each blade has an MMR that indicates when an NMI has been sent
- * to cpus on the blade. If an NMI is detected, atomically
- * clear the MMR and update a per-blade NMI count used to
- * cause each cpu on the blade to notice a new NMI.
- */
- bid = uv_numa_blade_id();
- real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
-
- if (unlikely(real_uv_nmi)) {
- spin_lock(&uv_blade_info[bid].nmi_lock);
- real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
- if (real_uv_nmi) {
- uv_blade_info[bid].nmi_count++;
- uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
- }
- spin_unlock(&uv_blade_info[bid].nmi_lock);
- }
-
- if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
- return NMI_DONE;
-
- __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
-
- /*
- * Use a lock so only one cpu prints at a time.
- * This prevents intermixed output.
- */
- spin_lock(&uv_nmi_lock);
- pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
- dump_stack();
- spin_unlock(&uv_nmi_lock);
-
- return NMI_HANDLED;
-}
-
-void uv_register_nmi_notifier(void)
-{
- if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
- printk(KERN_WARNING "UV NMI handler failed to register\n");
-}
-
-void uv_nmi_init(void)
-{
- unsigned int value;
-
- /*
- * Unmask NMI on all cpus
- */
- value = apic_read(APIC_LVT1) | APIC_DM_NMI;
- value &= ~APIC_LVT_MASKED;
- apic_write(APIC_LVT1, value);
-}
-
void __init uv_system_init(void)
{
union uvh_rh_gam_config_mmr_u m_n_config;
@@ -1046,6 +977,7 @@ void __init uv_system_init(void)
map_mmr_high(max_pnode);
map_mmioh_high(min_pnode, max_pnode);
+ uv_nmi_setup();
uv_cpu_init();
uv_scir_register_cpu_notifier();
uv_register_nmi_notifier();
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 28610822fb3c..9f6b9341950f 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -32,7 +32,6 @@ void common(void) {
OFFSET(TI_flags, thread_info, flags);
OFFSET(TI_status, thread_info, status);
OFFSET(TI_addr_limit, thread_info, addr_limit);
- OFFSET(TI_preempt_count, thread_info, preempt_count);
BLANK();
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 903a264af981..bca023bdd6b2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -339,7 +339,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
#endif
/*
- * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
+ * On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
* Assumes number of cores is a power of two.
*/
static void amd_detect_cmp(struct cpuinfo_x86 *c)
@@ -823,8 +823,8 @@ static const struct cpu_dev amd_cpu_dev = {
.c_vendor = "AMD",
.c_ident = { "AuthenticAMD" },
#ifdef CONFIG_X86_32
- .c_models = {
- { .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
+ .legacy_models = {
+ { .family = 4, .model_names =
{
[3] = "486 DX/2",
[7] = "486 DX/2-WB",
@@ -835,7 +835,7 @@ static const struct cpu_dev amd_cpu_dev = {
}
},
},
- .c_size_cache = amd_size_cache,
+ .legacy_cache_size = amd_size_cache,
#endif
.c_early_init = early_init_amd,
.c_detect_tlb = cpu_detect_tlb_amd,
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index fbf6c3bc2400..8d5652dc99dd 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -468,10 +468,10 @@ static void init_centaur(struct cpuinfo_x86 *c)
#endif
}
+#ifdef CONFIG_X86_32
static unsigned int
centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
-#ifdef CONFIG_X86_32
/* VIA C3 CPUs (670-68F) need further shifting. */
if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
size >>= 8;
@@ -484,16 +484,18 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
if ((c->x86 == 6) && (c->x86_model == 9) &&
(c->x86_mask == 1) && (size == 65))
size -= 1;
-#endif
return size;
}
+#endif
static const struct cpu_dev centaur_cpu_dev = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
.c_early_init = early_init_centaur,
.c_init = init_centaur,
- .c_size_cache = centaur_size_cache,
+#ifdef CONFIG_X86_32
+ .legacy_cache_size = centaur_size_cache,
+#endif
.c_x86_vendor = X86_VENDOR_CENTAUR,
};
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2793d1f095a2..6abc172b8258 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -346,7 +346,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
/* Look up CPU names by table lookup. */
static const char *table_lookup_model(struct cpuinfo_x86 *c)
{
- const struct cpu_model_info *info;
+#ifdef CONFIG_X86_32
+ const struct legacy_cpu_model_info *info;
if (c->x86_model >= 16)
return NULL; /* Range check */
@@ -354,13 +355,14 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
if (!this_cpu)
return NULL;
- info = this_cpu->c_models;
+ info = this_cpu->legacy_models;
- while (info && info->family) {
+ while (info->family) {
if (info->family == c->x86)
return info->model_names[c->x86_model];
info++;
}
+#endif
return NULL; /* Not found */
}
@@ -450,8 +452,8 @@ void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
#else
/* do processor-specific cache resizing */
- if (this_cpu->c_size_cache)
- l2size = this_cpu->c_size_cache(c, l2size);
+ if (this_cpu->legacy_cache_size)
+ l2size = this_cpu->legacy_cache_size(c, l2size);
/* Allow user to override all this if necessary. */
if (cachesize_override != -1)
@@ -1095,6 +1097,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
+DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
+EXPORT_PER_CPU_SYMBOL(__preempt_count);
+
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
/*
@@ -1169,6 +1174,8 @@ void debug_stack_reset(void)
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
+DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
+EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 4041c24ae7db..c37dc37e8317 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,12 +1,6 @@
#ifndef ARCH_X86_CPU_H
#define ARCH_X86_CPU_H
-struct cpu_model_info {
- int vendor;
- int family;
- const char *model_names[16];
-};
-
/* attempt to consolidate cpu attributes */
struct cpu_dev {
const char *c_vendor;
@@ -14,15 +8,23 @@ struct cpu_dev {
/* some have two possibilities for cpuid string */
const char *c_ident[2];
- struct cpu_model_info c_models[4];
-
void (*c_early_init)(struct cpuinfo_x86 *);
void (*c_bsp_init)(struct cpuinfo_x86 *);
void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *);
void (*c_detect_tlb)(struct cpuinfo_x86 *);
- unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
int c_x86_vendor;
+#ifdef CONFIG_X86_32
+ /* Optional vendor specific routine to obtain the cache size. */
+ unsigned int (*legacy_cache_size)(struct cpuinfo_x86 *,
+ unsigned int);
+
+ /* Family/stepping-based lookup table for model names. */
+ struct legacy_cpu_model_info {
+ int family;
+ const char *model_names[16];
+ } legacy_models[5];
+#endif
};
struct _tlb_table {
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ec7299566f79..dc1ec0dff939 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -665,8 +665,8 @@ static const struct cpu_dev intel_cpu_dev = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
#ifdef CONFIG_X86_32
- .c_models = {
- { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
+ .legacy_models = {
+ { .family = 4, .model_names =
{
[0] = "486 DX-25/33",
[1] = "486 DX-50",
@@ -679,7 +679,7 @@ static const struct cpu_dev intel_cpu_dev = {
[9] = "486 DX/4-WB"
}
},
- { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names =
+ { .family = 5, .model_names =
{
[0] = "Pentium 60/66 A-step",
[1] = "Pentium 60/66",
@@ -690,7 +690,7 @@ static const struct cpu_dev intel_cpu_dev = {
[8] = "Mobile Pentium MMX"
}
},
- { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names =
+ { .family = 6, .model_names =
{
[0] = "Pentium Pro A-step",
[1] = "Pentium Pro",
@@ -704,7 +704,7 @@ static const struct cpu_dev intel_cpu_dev = {
[11] = "Pentium III (Tualatin)",
}
},
- { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names =
+ { .family = 15, .model_names =
{
[0] = "Pentium 4 (Unknown)",
[1] = "Pentium 4 (Willamette)",
@@ -714,7 +714,7 @@ static const struct cpu_dev intel_cpu_dev = {
}
},
},
- .c_size_cache = intel_size_cache,
+ .legacy_cache_size = intel_size_cache,
#endif
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1414c90feaba..0641113e2965 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,5 +1,5 @@
/*
- * Routines to indentify caches on Intel CPU.
+ * Routines to identify caches on Intel CPU.
*
* Changes:
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index cd8b166a1735..de8b60a53f69 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -42,8 +42,7 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
struct mce m;
/* Only corrected MC is reported */
- if (!corrected || !(mem_err->validation_bits &
- CPER_MEM_VALID_PHYSICAL_ADDRESS))
+ if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
mce_setup(&m);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8355c84b9729..8a87a3224121 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void)
static int __kprobes
perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
- int ret;
u64 start_clock;
u64 finish_clock;
+ int ret;
if (!atomic_read(&active_events))
return NMI_DONE;
- start_clock = local_clock();
+ start_clock = sched_clock();
ret = x86_pmu.handle_irq(regs);
- finish_clock = local_clock();
+ finish_clock = sched_clock();
perf_sample_event_took(finish_clock - start_clock);
@@ -1506,7 +1506,7 @@ static int __init init_hw_perf_events(void)
err = amd_pmu_init();
break;
default:
- return 0;
+ err = -ENOTSUPP;
}
if (err != 0) {
pr_cont("no PMU driver, software events only.\n");
@@ -1883,26 +1883,21 @@ static struct pmu pmu = {
void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
- userpg->cap_usr_time = 0;
- userpg->cap_usr_time_zero = 0;
- userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
userpg->pmc_width = x86_pmu.cntval_bits;
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ if (!sched_clock_stable)
return;
- if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
- return;
-
- userpg->cap_usr_time = 1;
+ userpg->cap_user_time = 1;
userpg->time_mult = this_cpu_read(cyc2ns);
userpg->time_shift = CYC2NS_SCALE_FACTOR;
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
- if (sched_clock_stable && !check_tsc_disabled()) {
- userpg->cap_usr_time_zero = 1;
- userpg->time_zero = this_cpu_read(cyc2ns_offset);
- }
+ userpg->cap_user_time_zero = 1;
+ userpg->time_zero = this_cpu_read(cyc2ns_offset);
}
/*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index cc16faae0538..fd00bb29425d 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -164,6 +164,11 @@ struct cpu_hw_events {
struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
/*
+ * Intel checkpoint mask
+ */
+ u64 intel_cp_status;
+
+ /*
* manage shared (per-core, per-cpu) registers
* used on Intel NHM/WSM/SNB
*/
@@ -440,6 +445,7 @@ struct x86_pmu {
int lbr_nr; /* hardware stack size */
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */
+ bool lbr_double_abort; /* duplicated lbr aborts */
/*
* Extra registers for events
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c62d88396ad5..0fa4f242f050 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -190,9 +190,9 @@ static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
-EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
-EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
+EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
+EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
struct attribute *nhm_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
@@ -899,8 +899,8 @@ static __initconst const u64 atom_hw_cache_event_ids
static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1),
EVENT_EXTRA_END
};
@@ -1184,6 +1184,11 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
wrmsrl(hwc->config_base, ctrl_val);
}
+static inline bool event_is_checkpointed(struct perf_event *event)
+{
+ return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
+}
+
static void intel_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -1197,6 +1202,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+ cpuc->intel_cp_status &= ~(1ull << hwc->idx);
/*
* must disable before any actual event
@@ -1271,6 +1277,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
if (event->attr.exclude_guest)
cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
+ if (unlikely(event_is_checkpointed(event)))
+ cpuc->intel_cp_status |= (1ull << hwc->idx);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_enable_fixed(hwc);
return;
@@ -1289,6 +1298,17 @@ static void intel_pmu_enable_event(struct perf_event *event)
int intel_pmu_save_and_restart(struct perf_event *event)
{
x86_perf_event_update(event);
+ /*
+ * For a checkpointed counter always reset back to 0. This
+ * avoids a situation where the counter overflows, aborts the
+ * transaction and is then set back to shortly before the
+ * overflow, and overflows and aborts again.
+ */
+ if (unlikely(event_is_checkpointed(event))) {
+ /* No race with NMIs because the counter should not be armed */
+ wrmsrl(event->hw.event_base, 0);
+ local64_set(&event->hw.prev_count, 0);
+ }
return x86_perf_event_set_period(event);
}
@@ -1372,6 +1392,13 @@ again:
x86_pmu.drain_pebs(regs);
}
+ /*
+ * Checkpointed counters can lead to 'spurious' PMIs because the
+ * rollback caused by the PMI will have cleared the overflow status
+ * bit. Therefore always force probe these counters.
+ */
+ status |= cpuc->intel_cp_status;
+
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
@@ -1837,6 +1864,20 @@ static int hsw_hw_config(struct perf_event *event)
event->attr.precise_ip > 0))
return -EOPNOTSUPP;
+ if (event_is_checkpointed(event)) {
+ /*
+ * Sampling of checkpointed events can cause situations where
+ * the CPU constantly aborts because of a overflow, which is
+ * then checkpointed back and ignored. Forbid checkpointing
+ * for sampling.
+ *
+ * But still allow a long sampling period, so that perf stat
+ * from KVM works.
+ */
+ if (event->attr.sample_period > 0 &&
+ event->attr.sample_period < 0x7fffffff)
+ return -EOPNOTSUPP;
+ }
return 0;
}
@@ -2182,10 +2223,36 @@ static __init void intel_nehalem_quirk(void)
}
}
-EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
-EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
+EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
+
+/* Haswell special events */
+EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1");
+EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2");
+EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4");
+EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1");
+EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1");
+EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2");
+EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4");
+EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1");
+EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1");
+EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1");
static struct attribute *hsw_events_attrs[] = {
+ EVENT_PTR(tx_start),
+ EVENT_PTR(tx_commit),
+ EVENT_PTR(tx_abort),
+ EVENT_PTR(tx_capacity),
+ EVENT_PTR(tx_conflict),
+ EVENT_PTR(el_start),
+ EVENT_PTR(el_commit),
+ EVENT_PTR(el_abort),
+ EVENT_PTR(el_capacity),
+ EVENT_PTR(el_conflict),
+ EVENT_PTR(cycles_t),
+ EVENT_PTR(cycles_ct),
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
NULL
@@ -2325,6 +2392,7 @@ __init int intel_pmu_init(void)
break;
case 55: /* Atom 22nm "Silvermont" */
+ case 77: /* Avoton "Silvermont" */
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -2451,6 +2519,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.lbr_double_abort = true;
pr_cont("Haswell events, ");
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 63438aad177f..c1760ff3c757 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -12,6 +12,7 @@
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_BUFFER_SIZE PAGE_SIZE
+#define PEBS_FIXUP_SIZE PAGE_SIZE
/*
* pebs_record_32 for p4 and core not supported
@@ -182,18 +183,32 @@ struct pebs_record_nhm {
* Same as pebs_record_nhm, with two additional fields.
*/
struct pebs_record_hsw {
- struct pebs_record_nhm nhm;
- /*
- * Real IP of the event. In the Intel documentation this
- * is called eventingrip.
- */
- u64 real_ip;
- /*
- * TSX tuning information field: abort cycles and abort flags.
- */
- u64 tsx_tuning;
+ u64 flags, ip;
+ u64 ax, bx, cx, dx;
+ u64 si, di, bp, sp;
+ u64 r8, r9, r10, r11;
+ u64 r12, r13, r14, r15;
+ u64 status, dla, dse, lat;
+ u64 real_ip, tsx_tuning;
+};
+
+union hsw_tsx_tuning {
+ struct {
+ u32 cycles_last_block : 32,
+ hle_abort : 1,
+ rtm_abort : 1,
+ instruction_abort : 1,
+ non_instruction_abort : 1,
+ retry : 1,
+ data_conflict : 1,
+ capacity_writes : 1,
+ capacity_reads : 1;
+ };
+ u64 value;
};
+#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL
+
void init_debug_store_on_cpu(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -214,12 +229,14 @@ void fini_debug_store_on_cpu(int cpu)
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
}
+static DEFINE_PER_CPU(void *, insn_buffer);
+
static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max, thresh = 1; /* always use a single PEBS record */
- void *buffer;
+ void *buffer, *ibuffer;
if (!x86_pmu.pebs)
return 0;
@@ -228,6 +245,19 @@ static int alloc_pebs_buffer(int cpu)
if (unlikely(!buffer))
return -ENOMEM;
+ /*
+ * HSW+ already provides us the eventing ip; no need to allocate this
+ * buffer then.
+ */
+ if (x86_pmu.intel_cap.pebs_format < 2) {
+ ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ if (!ibuffer) {
+ kfree(buffer);
+ return -ENOMEM;
+ }
+ per_cpu(insn_buffer, cpu) = ibuffer;
+ }
+
max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
@@ -248,6 +278,9 @@ static void release_pebs_buffer(int cpu)
if (!ds || !x86_pmu.pebs)
return;
+ kfree(per_cpu(insn_buffer, cpu));
+ per_cpu(insn_buffer, cpu) = NULL;
+
kfree((void *)(unsigned long)ds->pebs_buffer_base);
ds->pebs_buffer_base = 0;
}
@@ -584,6 +617,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
EVENT_CONSTRAINT_END
};
@@ -714,6 +748,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
unsigned long old_to, to = cpuc->lbr_entries[0].to;
unsigned long ip = regs->ip;
int is_64bit = 0;
+ void *kaddr;
/*
* We don't need to fixup if the PEBS assist is fault like
@@ -737,7 +772,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
* unsigned math, either ip is before the start (impossible) or
* the basic block is larger than 1 page (sanity)
*/
- if ((ip - to) > PAGE_SIZE)
+ if ((ip - to) > PEBS_FIXUP_SIZE)
return 0;
/*
@@ -748,29 +783,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
return 1;
}
+ if (!kernel_ip(ip)) {
+ int size, bytes;
+ u8 *buf = this_cpu_read(insn_buffer);
+
+ size = ip - to; /* Must fit our buffer, see above */
+ bytes = copy_from_user_nmi(buf, (void __user *)to, size);
+ if (bytes != size)
+ return 0;
+
+ kaddr = buf;
+ } else {
+ kaddr = (void *)to;
+ }
+
do {
struct insn insn;
- u8 buf[MAX_INSN_SIZE];
- void *kaddr;
old_to = to;
- if (!kernel_ip(ip)) {
- int bytes, size = MAX_INSN_SIZE;
-
- bytes = copy_from_user_nmi(buf, (void __user *)to, size);
- if (bytes != size)
- return 0;
-
- kaddr = buf;
- } else
- kaddr = (void *)to;
#ifdef CONFIG_X86_64
is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
#endif
insn_init(&insn, kaddr, is_64bit);
insn_get_length(&insn);
+
to += insn.length;
+ kaddr += insn.length;
} while (to < ip);
if (to == ip) {
@@ -785,16 +824,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
return 0;
}
+static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
+{
+ if (pebs->tsx_tuning) {
+ union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+ return tsx.cycles_last_block;
+ }
+ return 0;
+}
+
+static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
+{
+ u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+
+ /* For RTM XABORTs also log the abort code from AX */
+ if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
+ txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+ return txn;
+}
+
static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs, void *__pebs)
{
/*
- * We cast to pebs_record_nhm to get the load latency data
- * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
+ * We cast to the biggest pebs_record but are careful not to
+ * unconditionally access the 'extra' entries.
*/
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct pebs_record_nhm *pebs = __pebs;
- struct pebs_record_hsw *pebs_hsw = __pebs;
+ struct pebs_record_hsw *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
u64 sample_type;
@@ -853,7 +910,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.sp = pebs->sp;
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
- regs.ip = pebs_hsw->real_ip;
+ regs.ip = pebs->real_ip;
regs.flags |= PERF_EFLAGS_EXACT;
} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
regs.flags |= PERF_EFLAGS_EXACT;
@@ -861,9 +918,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.flags &= ~PERF_EFLAGS_EXACT;
if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
- x86_pmu.intel_cap.pebs_format >= 1)
+ x86_pmu.intel_cap.pebs_format >= 1)
data.addr = pebs->dla;
+ if (x86_pmu.intel_cap.pebs_format >= 2) {
+ /* Only set the TSX weight when no memory weight. */
+ if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll)
+ data.weight = intel_hsw_weight(pebs);
+
+ if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION)
+ data.txn = intel_hsw_transaction(pebs);
+ }
+
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
@@ -912,17 +978,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
__intel_pmu_pebs_event(event, iregs, at);
}
-static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
- void *top)
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds;
struct perf_event *event = NULL;
+ void *at, *top;
u64 status = 0;
int bit;
+ if (!x86_pmu.pebs_active)
+ return;
+
+ at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
ds->pebs_index = ds->pebs_buffer_base;
+ if (unlikely(at > top))
+ return;
+
+ /*
+ * Should not happen, we program the threshold at 1 and do not
+ * set a reset value.
+ */
+ WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
+ "Unexpected number of pebs records %ld\n",
+ (long)(top - at) / x86_pmu.pebs_record_size);
+
for (; at < top; at += x86_pmu.pebs_record_size) {
struct pebs_record_nhm *p = at;
@@ -950,61 +1033,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
}
}
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
-{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct debug_store *ds = cpuc->ds;
- struct pebs_record_nhm *at, *top;
- int n;
-
- if (!x86_pmu.pebs_active)
- return;
-
- at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
- top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
-
- ds->pebs_index = ds->pebs_buffer_base;
-
- n = top - at;
- if (n <= 0)
- return;
-
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(n > x86_pmu.max_pebs_events,
- "Unexpected number of pebs records %d\n", n);
-
- return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
-static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
-{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct debug_store *ds = cpuc->ds;
- struct pebs_record_hsw *at, *top;
- int n;
-
- if (!x86_pmu.pebs_active)
- return;
-
- at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
- top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
-
- n = top - at;
- if (n <= 0)
- return;
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(n > x86_pmu.max_pebs_events,
- "Unexpected number of pebs records %d\n", n);
-
- return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
/*
* BTS, PEBS probe and setup
*/
@@ -1039,7 +1067,7 @@ void intel_ds_init(void)
case 2:
pr_cont("PEBS fmt2%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
- x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
break;
default:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d5be06a5005e..90ee6c1d0542 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -284,6 +284,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
int i;
+ int out = 0;
for (i = 0; i < x86_pmu.lbr_nr; i++) {
unsigned long lbr_idx = (tos - i) & mask;
@@ -306,15 +307,27 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
}
from = (u64)((((s64)from) << skip) >> skip);
- cpuc->lbr_entries[i].from = from;
- cpuc->lbr_entries[i].to = to;
- cpuc->lbr_entries[i].mispred = mis;
- cpuc->lbr_entries[i].predicted = pred;
- cpuc->lbr_entries[i].in_tx = in_tx;
- cpuc->lbr_entries[i].abort = abort;
- cpuc->lbr_entries[i].reserved = 0;
+ /*
+ * Some CPUs report duplicated abort records,
+ * with the second entry not having an abort bit set.
+ * Skip them here. This loop runs backwards,
+ * so we need to undo the previous record.
+ * If the abort just happened outside the window
+ * the extra entry cannot be removed.
+ */
+ if (abort && x86_pmu.lbr_double_abort && out > 0)
+ out--;
+
+ cpuc->lbr_entries[out].from = from;
+ cpuc->lbr_entries[out].to = to;
+ cpuc->lbr_entries[out].mispred = mis;
+ cpuc->lbr_entries[out].predicted = pred;
+ cpuc->lbr_entries[out].in_tx = in_tx;
+ cpuc->lbr_entries[out].abort = abort;
+ cpuc->lbr_entries[out].reserved = 0;
+ out++;
}
- cpuc->lbr_stack.nr = i;
+ cpuc->lbr_stack.nr = out;
}
void intel_pmu_lbr_read(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 8ed44589b0e4..4118f9f68315 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
box->hrtimer.function = uncore_pmu_hrtimer;
}
-struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
{
struct intel_uncore_box *box;
int i, size;
size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
- box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
+ box = kzalloc_node(size, GFP_KERNEL, node);
if (!box)
return NULL;
@@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
struct intel_uncore_box *fake_box;
int ret = -EINVAL, n;
- fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
+ fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
if (!fake_box)
return -ENOMEM;
@@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
}
type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
- box = uncore_alloc_box(type, 0);
+ box = uncore_alloc_box(type, NUMA_NO_NODE);
if (!box)
return -ENOMEM;
@@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id)
if (pmu->func_id < 0)
pmu->func_id = j;
- box = uncore_alloc_box(type, cpu);
+ box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
return -ENOMEM;
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 88db010845cb..384df5105fbc 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s)
}
__setup("nordrand", x86_rdrand_setup);
-/* We can't use arch_get_random_long() here since alternatives haven't run */
-static inline int rdrand_long(unsigned long *v)
-{
- int ok;
- asm volatile("1: " RDRAND_LONG "\n\t"
- "jc 2f\n\t"
- "decl %0\n\t"
- "jnz 1b\n\t"
- "2:"
- : "=r" (ok), "=a" (*v)
- : "0" (RDRAND_RETRY_LOOPS));
- return ok;
-}
-
/*
* Force a reseed cycle; we are architecturally guaranteed a reseed
* after no more than 512 128-bit chunks of random data. This also
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index f2cc63e9cf08..b6f794aa1693 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -1,5 +1,5 @@
/*
- * Routines to indentify additional cpu features that are scattered in
+ * Routines to identify additional cpu features that are scattered in
* cpuid space.
*/
#include <linux/cpu.h>
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index 202759a14121..75c5ad5d35cc 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -11,8 +11,8 @@
static const struct cpu_dev umc_cpu_dev = {
.c_vendor = "UMC",
.c_ident = { "UMC UMC UMC" },
- .c_models = {
- { .vendor = X86_VENDOR_UMC, .family = 4, .model_names =
+ .legacy_models = {
+ { .family = 4, .model_names =
{
[1] = "U5D",
[2] = "U5S",
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 376dc7873447..d35078ea1446 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -20,22 +20,13 @@
#include <asm/hpet.h>
#include <asm/apic.h>
#include <asm/pci_x86.h>
+#include <asm/setup.h>
__initdata u64 initial_dtb;
char __initdata cmd_line[COMMAND_LINE_SIZE];
int __initdata of_ioapic;
-unsigned long pci_address_to_pio(phys_addr_t address)
-{
- /*
- * The ioport address can be directly used by inX / outX
- */
- BUG_ON(address >= (1 << 16));
- return (unsigned long)address;
-}
-EXPORT_SYMBOL_GPL(pci_address_to_pio);
-
void __init early_init_dt_scan_chosen_arch(unsigned long node)
{
BUG();
@@ -51,15 +42,6 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
}
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
-{
- initrd_start = (unsigned long)__va(start);
- initrd_end = (unsigned long)__va(end);
- initrd_below_start_ok = 1;
-}
-#endif
-
void __init add_dtb(u64 data)
{
initial_dtb = data + offsetof(struct setup_data, data);
@@ -105,7 +87,6 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
static int x86_of_pci_irq_enable(struct pci_dev *dev)
{
- struct of_irq oirq;
u32 virq;
int ret;
u8 pin;
@@ -116,12 +97,7 @@ static int x86_of_pci_irq_enable(struct pci_dev *dev)
if (!pin)
return 0;
- ret = of_irq_map_pci(dev, &oirq);
- if (ret)
- return ret;
-
- virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
- oirq.size);
+ virq = of_irq_parse_and_map_pci(dev, 0, 0);
if (virq == 0)
return -EINVAL;
dev->irq = virq;
@@ -230,7 +206,7 @@ static void __init dtb_apic_setup(void)
static void __init x86_flattree_get_config(void)
{
u32 size, map_len;
- void *new_dtb;
+ struct boot_param_header *dt;
if (!initial_dtb)
return;
@@ -238,24 +214,17 @@ static void __init x86_flattree_get_config(void)
map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK),
(u64)sizeof(struct boot_param_header));
- initial_boot_params = early_memremap(initial_dtb, map_len);
- size = be32_to_cpu(initial_boot_params->totalsize);
+ dt = early_memremap(initial_dtb, map_len);
+ size = be32_to_cpu(dt->totalsize);
if (map_len < size) {
- early_iounmap(initial_boot_params, map_len);
- initial_boot_params = early_memremap(initial_dtb, size);
+ early_iounmap(dt, map_len);
+ dt = early_memremap(initial_dtb, size);
map_len = size;
}
- new_dtb = alloc_bootmem(size);
- memcpy(new_dtb, initial_boot_params, size);
- early_iounmap(initial_boot_params, map_len);
-
- initial_boot_params = new_dtb;
-
- /* root level address cells */
- of_scan_flat_dt(early_init_dt_scan_root, NULL);
-
- unflatten_device_tree();
+ initial_boot_params = dt;
+ unflatten_and_copy_device_tree();
+ early_iounmap(dt, map_len);
}
#else
static inline void x86_flattree_get_config(void) { }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index deb6421c9e69..d9c12d3022a7 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -25,12 +25,17 @@ unsigned int code_bytes = 64;
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static int die_counter;
-void printk_address(unsigned long address, int reliable)
+static void printk_stack_address(unsigned long address, int reliable)
{
pr_cont(" [<%p>] %s%pB\n",
(void *)address, reliable ? "" : "? ", (void *)address);
}
+void printk_address(unsigned long address)
+{
+ pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
+}
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static void
print_ftrace_graph_addr(unsigned long addr, void *data,
@@ -151,7 +156,7 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
{
touch_nmi_watchdog();
printk(data);
- printk_address(addr, reliable);
+ printk_stack_address(addr, reliable);
}
static const struct stacktrace_ops print_trace_ops = {
@@ -281,7 +286,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
#else
/* Executive summary in case the oops scrolled away */
printk(KERN_ALERT "RIP ");
- printk_address(regs->ip, 1);
+ printk_address(regs->ip);
printk(" RSP <%016lx>\n", regs->sp);
#endif
return 0;
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index d15f575a861b..01d1c187c9f9 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -14,9 +14,11 @@
#include <xen/hvc-console.h>
#include <asm/pci-direct.h>
#include <asm/fixmap.h>
-#include <asm/mrst.h>
+#include <asm/intel-mid.h>
#include <asm/pgtable.h>
#include <linux/usb/ehci_def.h>
+#include <linux/efi.h>
+#include <asm/efi.h>
/* Simple VGA output */
#define VGABASE (__ISA_IO_base + 0xb8000)
@@ -234,6 +236,11 @@ static int __init setup_early_printk(char *buf)
early_console_register(&early_hsu_console, keep);
}
#endif
+#ifdef CONFIG_EARLY_PRINTK_EFI
+ if (!strncmp(buf, "efi", 3))
+ early_console_register(&early_efi_console, keep);
+#endif
+
buf++;
}
return 0;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0dcb0ceb6a2..fd1bc1b15e6d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -362,12 +362,9 @@ END(ret_from_exception)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
- cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
- jnz restore_all
need_resched:
- movl TI_flags(%ebp), %ecx # need_resched set ?
- testb $_TIF_NEED_RESCHED, %cl
- jz restore_all
+ cmpl $0,PER_CPU_VAR(__preempt_count)
+ jnz restore_all
testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all
call preempt_schedule_irq
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1b69951a81e2..603be7c70675 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -487,21 +487,6 @@ ENDPROC(native_usergs_sysret64)
TRACE_IRQS_OFF
.endm
-ENTRY(save_rest)
- PARTIAL_FRAME 1 (REST_SKIP+8)
- movq 5*8+16(%rsp), %r11 /* save return address */
- movq_cfi rbx, RBX+16
- movq_cfi rbp, RBP+16
- movq_cfi r12, R12+16
- movq_cfi r13, R13+16
- movq_cfi r14, R14+16
- movq_cfi r15, R15+16
- movq %r11, 8(%rsp) /* return address */
- FIXUP_TOP_OF_STACK %r11, 16
- ret
- CFI_ENDPROC
-END(save_rest)
-
/* save complete stack frame */
.pushsection .kprobes.text, "ax"
ENTRY(save_paranoid)
@@ -1118,10 +1103,8 @@ retint_signal:
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
ENTRY(retint_kernel)
- cmpl $0,TI_preempt_count(%rcx)
+ cmpl $0,PER_CPU_VAR(__preempt_count)
jnz retint_restore_args
- bt $TIF_NEED_RESCHED,TI_flags(%rcx)
- jnc retint_restore_args
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
jnc retint_restore_args
call preempt_schedule_irq
@@ -1357,7 +1340,7 @@ bad_gs:
.previous
/* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(call_softirq)
+ENTRY(do_softirq_own_stack)
CFI_STARTPROC
pushq_cfi %rbp
CFI_REL_OFFSET rbp,0
@@ -1374,7 +1357,7 @@ ENTRY(call_softirq)
decl PER_CPU_VAR(irq_count)
ret
CFI_ENDPROC
-END(call_softirq)
+END(do_softirq_own_stack)
#ifdef CONFIG_XEN
zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 06f87bece92a..c61a14a4a310 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -35,8 +35,8 @@ asmlinkage void __init i386_start_kernel(void)
/* Call the subarch specific early setup function */
switch (boot_params.hdr.hardware_subarch) {
- case X86_SUBARCH_MRST:
- x86_mrst_early_setup();
+ case X86_SUBARCH_INTEL_MID:
+ x86_intel_mid_early_setup();
break;
case X86_SUBARCH_CE4100:
x86_ce4100_early_setup();
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 0fa69127209a..05fd74f537d6 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -37,3 +37,10 @@ EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(___preempt_schedule);
+#ifdef CONFIG_CONTEXT_TRACKING
+EXPORT_SYMBOL(___preempt_schedule_context);
+#endif
+#endif
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 9a5c460404dc..2e977b5d61dd 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -312,8 +312,7 @@ static void init_8259A(int auto_eoi)
*/
outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */
- /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64,
- to 0x20-0x27 on i386 */
+ /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */
outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR);
/* 8259A-1 (the master) has a slave on IR2 */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 4186755f1d7c..d7fcbedc9c43 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
irqctx->tinfo.task = curctx->tinfo.task;
irqctx->tinfo.previous_esp = current_stack_pointer;
- /* Copy the preempt_count so that the [soft]irq checks work. */
- irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
-
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
@@ -131,7 +128,6 @@ void irq_ctx_init(int cpu)
THREAD_SIZE_ORDER));
memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
irqctx->tinfo.cpu = cpu;
- irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
per_cpu(hardirq_ctx, cpu) = irqctx;
@@ -149,35 +145,21 @@ void irq_ctx_init(int cpu)
cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
}
-asmlinkage void do_softirq(void)
+void do_softirq_own_stack(void)
{
- unsigned long flags;
struct thread_info *curctx;
union irq_ctx *irqctx;
u32 *isp;
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
-
- if (local_softirq_pending()) {
- curctx = current_thread_info();
- irqctx = __this_cpu_read(softirq_ctx);
- irqctx->tinfo.task = curctx->task;
- irqctx->tinfo.previous_esp = current_stack_pointer;
-
- /* build the stack frame on the softirq stack */
- isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
+ curctx = current_thread_info();
+ irqctx = __this_cpu_read(softirq_ctx);
+ irqctx->tinfo.task = curctx->task;
+ irqctx->tinfo.previous_esp = current_stack_pointer;
- call_on_stack(__do_softirq, isp);
- /*
- * Shouldn't happen, we returned above if in_interrupt():
- */
- WARN_ON_ONCE(softirq_count());
- }
+ /* build the stack frame on the softirq stack */
+ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));
- local_irq_restore(flags);
+ call_on_stack(__do_softirq, isp);
}
bool handle_irq(unsigned irq, struct pt_regs *regs)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index d04d3ecded62..4d1c746892eb 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -87,24 +87,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
generic_handle_irq_desc(irq, desc);
return true;
}
-
-
-extern void call_softirq(void);
-
-asmlinkage void do_softirq(void)
-{
- __u32 pending;
- unsigned long flags;
-
- if (in_interrupt())
- return;
-
- local_irq_save(flags);
- pending = local_softirq_pending();
- /* Switch to interrupt stack */
- if (pending) {
- call_softirq();
- WARN_ON_ONCE(softirq_count());
- }
- local_irq_restore(flags);
-}
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ee11b7dfbfbb..26d5a55a2736 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -42,15 +42,27 @@ static void __jump_label_transform(struct jump_entry *entry,
int init)
{
union jump_code_union code;
+ const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
if (type == JUMP_LABEL_ENABLE) {
- /*
- * We are enabling this jump label. If it is not a nop
- * then something must have gone wrong.
- */
- if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
- bug_at((void *)entry->code, __LINE__);
+ if (init) {
+ /*
+ * Jump label is enabled for the first time.
+ * So we expect a default_nop...
+ */
+ if (unlikely(memcmp((void *)entry->code, default_nop, 5)
+ != 0))
+ bug_at((void *)entry->code, __LINE__);
+ } else {
+ /*
+ * ...otherwise expect an ideal_nop. Otherwise
+ * something went horribly wrong.
+ */
+ if (unlikely(memcmp((void *)entry->code, ideal_nop, 5)
+ != 0))
+ bug_at((void *)entry->code, __LINE__);
+ }
code.jump = 0xe9;
code.offset = entry->target -
@@ -63,7 +75,6 @@ static void __jump_label_transform(struct jump_entry *entry,
* are converting the default nop to the ideal nop.
*/
if (init) {
- const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
bug_at((void *)entry->code, __LINE__);
} else {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 697b93af02dd..b2046e4d0b59 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -609,7 +609,7 @@ static struct dentry *d_kvm_debug;
struct dentry *kvm_init_debugfs(void)
{
- d_kvm_debug = debugfs_create_dir("kvm", NULL);
+ d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
if (!d_kvm_debug)
printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
@@ -775,11 +775,22 @@ void __init kvm_spinlock_init(void)
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
- printk(KERN_INFO "KVM setup paravirtual spinlock\n");
+ pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
+ pv_lock_ops.unlock_kick = kvm_unlock_kick;
+}
+
+static __init int kvm_spinlock_init_jump(void)
+{
+ if (!kvm_para_available())
+ return 0;
+ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+ return 0;
static_key_slow_inc(&paravirt_ticketlocks_enabled);
+ printk(KERN_INFO "KVM setup paravirtual spinlock\n");
- pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
- pv_lock_ops.unlock_kick = kvm_unlock_kick;
+ return 0;
}
+early_initcall(kvm_spinlock_init_jump);
+
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 7123b5df479d..af99f71aeb7f 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -216,6 +216,7 @@ int apply_microcode_amd(int cpu)
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
c->microcode = rev;
+ uci->cpu_sig.rev = rev;
return 0;
}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 88458faea2f8..05266b5aae22 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -46,7 +46,7 @@ static struct class *msr_class;
static loff_t msr_seek(struct file *file, loff_t offset, int orig)
{
loff_t ret;
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = file_inode(file);
mutex_lock(&inode->i_mutex);
switch (orig) {
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ba77ebc2c353..6fcb49ce50a1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
u64 before, delta, whole_msecs;
int remainder_ns, decimal_msecs, thishandled;
- before = local_clock();
+ before = sched_clock();
thishandled = a->handler(type, regs);
handled += thishandled;
- delta = local_clock() - before;
+ delta = sched_clock() - before;
trace_nmi_handler(a->handler, (int)delta, thishandled);
if (delta < nmi_longest_ns)
diff --git a/arch/x86/kernel/preempt.S b/arch/x86/kernel/preempt.S
new file mode 100644
index 000000000000..ca7f0d58a87d
--- /dev/null
+++ b/arch/x86/kernel/preempt.S
@@ -0,0 +1,25 @@
+
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/asm.h>
+#include <asm/calling.h>
+
+ENTRY(___preempt_schedule)
+ CFI_STARTPROC
+ SAVE_ALL
+ call preempt_schedule
+ RESTORE_ALL
+ ret
+ CFI_ENDPROC
+
+#ifdef CONFIG_CONTEXT_TRACKING
+
+ENTRY(___preempt_schedule_context)
+ CFI_STARTPROC
+ SAVE_ALL
+ call preempt_schedule_context
+ RESTORE_ALL
+ ret
+ CFI_ENDPROC
+
+#endif
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c83516be1052..3fb8d95ab8b5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -391,9 +391,9 @@ static void amd_e400_idle(void)
* The switch back from broadcast mode needs to be
* called with interrupts disabled.
*/
- local_irq_disable();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
- local_irq_enable();
+ local_irq_disable();
+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+ local_irq_enable();
} else
default_idle();
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 884f98f69354..c2ec1aa6d454 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -292,6 +292,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
set_iopl_mask(next->iopl);
/*
+ * If it were not for PREEMPT_ACTIVE we could guarantee that the
+ * preempt_count of all tasks was equal here and this would not be
+ * needed.
+ */
+ task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
+ this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
+
+ /*
* Now maybe handle debug registers and/or IO bitmaps
*/
if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bb1dc51bab05..176ad94e1d57 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -63,7 +63,7 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int ds, cs, es;
printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
- printk_address(regs->ip, 1);
+ printk_address(regs->ip);
printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
regs->sp, regs->flags);
printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
@@ -363,6 +363,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
this_cpu_write(old_rsp, next->usersp);
this_cpu_write(current_task, next_p);
+ /*
+ * If it were not for PREEMPT_ACTIVE we could guarantee that the
+ * preempt_count of all tasks was equal here and this would not be
+ * needed.
+ */
+ task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
+ this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
+
this_cpu_write(kernel_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE - KERNEL_STACK_OFFSET);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 563ed91e6faa..0f958e19573e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -61,7 +61,7 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
if (reboot_type != BOOT_BIOS) {
reboot_type = BOOT_BIOS;
pr_info("%s series board detected. Selecting %s-method for reboots.\n",
- "BIOS", d->ident);
+ d->ident, "BIOS");
}
return 0;
}
@@ -117,7 +117,7 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
if (reboot_type != BOOT_CF9) {
reboot_type = BOOT_CF9;
pr_info("%s series board detected. Selecting %s-method for reboots.\n",
- "PCI", d->ident);
+ d->ident, "PCI");
}
return 0;
}
@@ -127,7 +127,7 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
if (reboot_type != BOOT_KBD) {
reboot_type = BOOT_KBD;
pr_info("%s series board detected. Selecting %s-method for reboot.\n",
- "KBD", d->ident);
+ d->ident, "KBD");
}
return 0;
}
@@ -136,54 +136,100 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
* This is a single dmi_table handling all reboot quirks.
*/
static struct dmi_system_id __initdata reboot_dmi_table[] = {
- { /* Handle problems with rebooting on Dell E520's */
- .callback = set_bios_reboot,
- .ident = "Dell E520",
+
+ /* Acer */
+ { /* Handle reboot issue on Acer Aspire one */
+ .callback = set_kbd_reboot,
+ .ident = "Acer Aspire One A110",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Dell DM061"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
},
},
- { /* Handle problems with rebooting on Dell 1300's */
- .callback = set_bios_reboot,
- .ident = "Dell PowerEdge 1300",
+
+ /* Apple */
+ { /* Handle problems with rebooting on Apple MacBook5 */
+ .callback = set_pci_reboot,
+ .ident = "Apple MacBook5",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
},
},
- { /* Handle problems with rebooting on Dell 300's */
+ { /* Handle problems with rebooting on Apple MacBookPro5 */
+ .callback = set_pci_reboot,
+ .ident = "Apple MacBookPro5",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
+ },
+ },
+ { /* Handle problems with rebooting on Apple Macmini3,1 */
+ .callback = set_pci_reboot,
+ .ident = "Apple Macmini3,1",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
+ },
+ },
+ { /* Handle problems with rebooting on the iMac9,1. */
+ .callback = set_pci_reboot,
+ .ident = "Apple iMac9,1",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
+ },
+ },
+
+ /* ASUS */
+ { /* Handle problems with rebooting on ASUS P4S800 */
.callback = set_bios_reboot,
- .ident = "Dell PowerEdge 300",
+ .ident = "ASUS P4S800",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 745's SFF */
+
+ /* Dell */
+ { /* Handle problems with rebooting on Dell DXP061 */
.callback = set_bios_reboot,
- .ident = "Dell OptiPlex 745",
+ .ident = "Dell DXP061",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 745's DFF */
+ { /* Handle problems with rebooting on Dell E520's */
.callback = set_bios_reboot,
- .ident = "Dell OptiPlex 745",
+ .ident = "Dell E520",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
- DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Dell DM061"),
},
},
- { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
- .callback = set_bios_reboot,
- .ident = "Dell OptiPlex 745",
+ { /* Handle problems with rebooting on the Latitude E5420. */
+ .callback = set_pci_reboot,
+ .ident = "Dell Latitude E5420",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
- DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"),
+ },
+ },
+ { /* Handle problems with rebooting on the Latitude E6320. */
+ .callback = set_pci_reboot,
+ .ident = "Dell Latitude E6320",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
+ },
+ },
+ { /* Handle problems with rebooting on the Latitude E6420. */
+ .callback = set_pci_reboot,
+ .ident = "Dell Latitude E6420",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
},
},
{ /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
@@ -204,160 +250,142 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
},
},
- { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */
+ { /* Handle problems with rebooting on Dell Optiplex 745's SFF */
.callback = set_bios_reboot,
- .ident = "Dell OptiPlex 760",
+ .ident = "Dell OptiPlex 745",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"),
- DMI_MATCH(DMI_BOARD_NAME, "0G919G"),
- },
- },
- { /* Handle problems with rebooting on Dell 2400's */
- .callback = set_bios_reboot,
- .ident = "Dell PowerEdge 2400",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
},
},
- { /* Handle problems with rebooting on Dell T5400's */
+ { /* Handle problems with rebooting on Dell Optiplex 745's DFF */
.callback = set_bios_reboot,
- .ident = "Dell Precision T5400",
+ .ident = "Dell OptiPlex 745",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
},
},
- { /* Handle problems with rebooting on Dell T7400's */
+ { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
.callback = set_bios_reboot,
- .ident = "Dell Precision T7400",
+ .ident = "Dell OptiPlex 745",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"),
- },
- },
- { /* Handle problems with rebooting on HP laptops */
- .callback = set_bios_reboot,
- .ident = "HP Compaq Laptop",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
},
},
- { /* Handle problems with rebooting on Dell XPS710 */
+ { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */
.callback = set_bios_reboot,
- .ident = "Dell XPS710",
+ .ident = "Dell OptiPlex 760",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"),
+ DMI_MATCH(DMI_BOARD_NAME, "0G919G"),
},
},
- { /* Handle problems with rebooting on Dell DXP061 */
- .callback = set_bios_reboot,
- .ident = "Dell DXP061",
+ { /* Handle problems with rebooting on the OptiPlex 990. */
+ .callback = set_pci_reboot,
+ .ident = "Dell OptiPlex 990",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
},
},
- { /* Handle problems with rebooting on Sony VGN-Z540N */
+ { /* Handle problems with rebooting on Dell 300's */
.callback = set_bios_reboot,
- .ident = "Sony VGN-Z540N",
+ .ident = "Dell PowerEdge 300",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
},
},
- { /* Handle problems with rebooting on ASUS P4S800 */
+ { /* Handle problems with rebooting on Dell 1300's */
.callback = set_bios_reboot,
- .ident = "ASUS P4S800",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
- DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
- },
- },
-
- { /* Handle reboot issue on Acer Aspire one */
- .callback = set_kbd_reboot,
- .ident = "Acer Aspire One A110",
+ .ident = "Dell PowerEdge 1300",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
- DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"),
},
},
- { /* Handle problems with rebooting on Apple MacBook5 */
- .callback = set_pci_reboot,
- .ident = "Apple MacBook5",
+ { /* Handle problems with rebooting on Dell 2400's */
+ .callback = set_bios_reboot,
+ .ident = "Dell PowerEdge 2400",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"),
},
},
- { /* Handle problems with rebooting on Apple MacBookPro5 */
+ { /* Handle problems with rebooting on the Dell PowerEdge C6100. */
.callback = set_pci_reboot,
- .ident = "Apple MacBookPro5",
+ .ident = "Dell PowerEdge C6100",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
},
},
- { /* Handle problems with rebooting on Apple Macmini3,1 */
+ { /* Handle problems with rebooting on the Latitude E5410. */
.callback = set_pci_reboot,
- .ident = "Apple Macmini3,1",
+ .ident = "Dell Latitude E5410",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5410"),
},
},
- { /* Handle problems with rebooting on the iMac9,1. */
+ { /* Handle problems with rebooting on the Precision M6600. */
.callback = set_pci_reboot,
- .ident = "Apple iMac9,1",
+ .ident = "Dell Precision M6600",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
},
},
- { /* Handle problems with rebooting on the Latitude E6320. */
- .callback = set_pci_reboot,
- .ident = "Dell Latitude E6320",
+ { /* Handle problems with rebooting on Dell T5400's */
+ .callback = set_bios_reboot,
+ .ident = "Dell Precision T5400",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"),
},
},
- { /* Handle problems with rebooting on the Latitude E5420. */
- .callback = set_pci_reboot,
- .ident = "Dell Latitude E5420",
+ { /* Handle problems with rebooting on Dell T7400's */
+ .callback = set_bios_reboot,
+ .ident = "Dell Precision T7400",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T7400"),
},
},
- { /* Handle problems with rebooting on the Latitude E6420. */
- .callback = set_pci_reboot,
- .ident = "Dell Latitude E6420",
+ { /* Handle problems with rebooting on Dell XPS710 */
+ .callback = set_bios_reboot,
+ .ident = "Dell XPS710",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
},
},
- { /* Handle problems with rebooting on the OptiPlex 990. */
- .callback = set_pci_reboot,
- .ident = "Dell OptiPlex 990",
+
+ /* Hewlett-Packard */
+ { /* Handle problems with rebooting on HP laptops */
+ .callback = set_bios_reboot,
+ .ident = "HP Compaq Laptop",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
},
},
- { /* Handle problems with rebooting on the Precision M6600. */
- .callback = set_pci_reboot,
- .ident = "Dell OptiPlex 990",
+
+ /* Sony */
+ { /* Handle problems with rebooting on Sony VGN-Z540N */
+ .callback = set_bios_reboot,
+ .ident = "Sony VGN-Z540N",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
},
},
+
{ }
};
@@ -511,10 +539,13 @@ static void native_machine_emergency_restart(void)
case BOOT_CF9_COND:
if (port_cf9_safe) {
- u8 cf9 = inb(0xcf9) & ~6;
+ u8 reboot_code = reboot_mode == REBOOT_WARM ?
+ 0x06 : 0x0E;
+ u8 cf9 = inb(0xcf9) & ~reboot_code;
outb(cf9|2, 0xcf9); /* Request hard reset */
udelay(50);
- outb(cf9|6, 0xcf9); /* Actually do the reset */
+ /* Actually do the reset */
+ outb(cf9|reboot_code, 0xcf9);
udelay(50);
}
reboot_type = BOOT_KBD;
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 0aa29394ed6f..ca9622a25e95 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -12,7 +12,7 @@
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
#include <asm/time.h>
-#include <asm/mrst.h>
+#include <asm/intel-mid.h>
#include <asm/rtc.h>
#ifdef CONFIG_X86_32
@@ -189,9 +189,17 @@ static __init int add_rtc_cmos(void)
return 0;
/* Intel MID platforms don't have ioport rtc */
- if (mrst_identify_cpu())
+ if (intel_mid_identify_cpu())
return -ENODEV;
+#ifdef CONFIG_ACPI
+ if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
+ /* This warning can likely go away again in a year or two. */
+ pr_info("ACPI: not registering RTC platform device\n");
+ return -ENODEV;
+ }
+#endif
+
platform_device_register(&rtc_device);
dev_info(&rtc_device.dev,
"registered platform RTC device (no PNP device found)\n");
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f0de6294b955..4ad8968d6106 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -824,6 +824,20 @@ static void __init trim_low_memory_range(void)
}
/*
+ * Dump out kernel offset information on panic.
+ */
+static int
+dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
+{
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
+ "(relocation range: 0x%lx-0x%lx)\n",
+ (unsigned long)&_text - __START_KERNEL, __START_KERNEL,
+ __START_KERNEL_map, MODULES_VADDR-1);
+
+ return 0;
+}
+
+/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
* for initialization. Note, the efi init code path is determined by the
@@ -993,6 +1007,7 @@ void __init setup_arch(char **cmdline_p)
efi_init();
dmi_scan_machine();
+ dmi_memdev_walk();
dmi_set_dump_stack_arch_desc();
/*
@@ -1242,3 +1257,15 @@ void __init i386_reserve_resources(void)
}
#endif /* CONFIG_X86_32 */
+
+static struct notifier_block kernel_offset_notifier = {
+ .notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kernel_offset_notifier);
+ return 0;
+}
+__initcall(register_kernel_offset_dumper);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index aecc98a93d1b..85dc05a3aa02 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -73,36 +73,14 @@
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
-
#include <asm/smpboot_hooks.h>
#include <asm/i8259.h>
-
#include <asm/realmode.h>
+#include <asm/misc.h>
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * We need this for trampoline_base protection from concurrent accesses when
- * off- and onlining cores wildly.
- */
-static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
-
-void cpu_hotplug_driver_lock(void)
-{
- mutex_lock(&x86_cpu_hotplug_driver_mutex);
-}
-
-void cpu_hotplug_driver_unlock(void)
-{
- mutex_unlock(&x86_cpu_hotplug_driver_mutex);
-}
-
-ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
-ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
-#endif
-
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
@@ -648,21 +626,46 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
+void smp_announce(void)
+{
+ int num_nodes = num_online_nodes();
+
+ printk(KERN_INFO "x86: Booted up %d node%s, %d CPUs\n",
+ num_nodes, (num_nodes > 1 ? "s" : ""), num_online_cpus());
+}
+
/* reduce the number of lines printed when booting a large cpu count system */
static void announce_cpu(int cpu, int apicid)
{
static int current_node = -1;
int node = early_cpu_to_node(cpu);
+ static int width, node_width;
+
+ if (!width)
+ width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */
+
+ if (!node_width)
+ node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */
+
+ if (cpu == 1)
+ printk(KERN_INFO "x86: Booting SMP configuration:\n");
if (system_state == SYSTEM_BOOTING) {
if (node != current_node) {
if (current_node > (-1))
- pr_cont(" OK\n");
+ pr_cont("\n");
current_node = node;
- pr_info("Booting Node %3d, Processors ", node);
+
+ printk(KERN_INFO ".... node %*s#%d, CPUs: ",
+ node_width - num_digits(node), " ", node);
}
- pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");
- return;
+
+ /* Add padding for the BSP */
+ if (cpu == 1)
+ pr_cont("%*s", width + 1, " ");
+
+ pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
+
} else
pr_info("Booting Node %d Processor %d APIC 0x%x\n",
node, cpu, apicid);
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
index 22513e96b012..86179d409893 100644
--- a/arch/x86/kernel/sysfb_simplefb.c
+++ b/arch/x86/kernel/sysfb_simplefb.c
@@ -72,14 +72,14 @@ __init int create_simplefb(const struct screen_info *si,
* the part that is occupied by the framebuffer */
len = mode->height * mode->stride;
len = PAGE_ALIGN(len);
- if (len > si->lfb_size << 16) {
+ if (len > (u64)si->lfb_size << 16) {
printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
return -EINVAL;
}
/* setup IORESOURCE_MEM as framebuffer memory */
memset(&res, 0, sizeof(res));
- res.flags = IORESOURCE_MEM;
+ res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
res.name = simplefb_resname;
res.start = si->lfb_base;
res.end = si->lfb_base + len - 1;
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 6e60b5fe2244..649b010da00b 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -65,29 +65,32 @@ int __ref _debug_hotplug_cpu(int cpu, int action)
if (!cpu_is_hotpluggable(cpu))
return -EINVAL;
- cpu_hotplug_driver_lock();
+ lock_device_hotplug();
switch (action) {
case 0:
ret = cpu_down(cpu);
if (!ret) {
pr_info("CPU %u is now offline\n", cpu);
+ dev->offline = true;
kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
} else
pr_debug("Can't offline CPU%d.\n", cpu);
break;
case 1:
ret = cpu_up(cpu);
- if (!ret)
+ if (!ret) {
+ dev->offline = false;
kobject_uevent(&dev->kobj, KOBJ_ONLINE);
- else
+ } else {
pr_debug("Can't online CPU%d.\n", cpu);
+ }
break;
default:
ret = -EINVAL;
}
- cpu_hotplug_driver_unlock();
+ unlock_device_hotplug();
return ret;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8c8093b146ca..729aa779ff75 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -88,7 +88,7 @@ static inline void conditional_sti(struct pt_regs *regs)
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
- inc_preempt_count();
+ preempt_count_inc();
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
@@ -103,7 +103,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
- dec_preempt_count();
+ preempt_count_dec();
}
static int __kprobes
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 10c4f3006afd..da6b35a98260 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -199,6 +199,15 @@ SECTIONS
__x86_cpu_dev_end = .;
}
+#ifdef CONFIG_X86_INTEL_MID
+ .x86_intel_mid_dev.init : AT(ADDR(.x86_intel_mid_dev.init) - \
+ LOAD_OFFSET) {
+ __x86_intel_mid_dev_start = .;
+ *(.x86_intel_mid_dev.init)
+ __x86_intel_mid_dev_end = .;
+ }
+#endif
+
/*
* start address and size of operations which during runtime
* can be patched with virtualization friendly instructions or
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index b014d9414d08..040681928e9d 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -66,3 +66,10 @@ EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
EXPORT_SYMBOL(native_load_gs_index);
#endif
+
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(___preempt_schedule);
+#ifdef CONFIG_CONTEXT_TRACKING
+EXPORT_SYMBOL(___preempt_schedule_context);
+#endif
+#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b110fe6c03d4..0a1e3b8b964d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -23,6 +23,26 @@
#include "mmu.h"
#include "trace.h"
+static u32 xstate_required_size(u64 xstate_bv)
+{
+ int feature_bit = 0;
+ u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
+ xstate_bv &= ~XSTATE_FPSSE;
+ while (xstate_bv) {
+ if (xstate_bv & 0x1) {
+ u32 eax, ebx, ecx, edx;
+ cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
+ ret = max(ret, eax + ebx);
+ }
+
+ xstate_bv >>= 1;
+ feature_bit++;
+ }
+
+ return ret;
+}
+
void kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -46,6 +66,18 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
apic->lapic_timer.timer_mode_mask = 1 << 17;
}
+ best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
+ if (!best) {
+ vcpu->arch.guest_supported_xcr0 = 0;
+ vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+ } else {
+ vcpu->arch.guest_supported_xcr0 =
+ (best->eax | ((u64)best->edx << 32)) &
+ host_xcr0 & KVM_SUPPORTED_XCR0;
+ vcpu->arch.guest_xstate_size =
+ xstate_required_size(vcpu->arch.guest_supported_xcr0);
+ }
+
kvm_pmu_cpuid_update(vcpu);
}
@@ -182,7 +214,7 @@ static bool supported_xcr0_bit(unsigned bit)
{
u64 mask = ((u64)1 << bit);
- return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
+ return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
}
#define F(x) bit(X86_FEATURE_##x)
@@ -383,6 +415,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case 0xd: {
int idx, i;
+ entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
+ entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (idx = 1, i = 1; idx < 64; ++idx) {
if (*nent >= maxnent)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2bc1e81045b0..ddc3f3d2afdb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2025,6 +2025,17 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
return rc;
}
+static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
+{
+ int rc;
+
+ rc = em_ret_far(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ rsp_increment(ctxt, ctxt->src.val);
+ return X86EMUL_CONTINUE;
+}
+
static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
{
/* Save real source value, then compare EAX against destination. */
@@ -3763,7 +3774,8 @@ static const struct opcode opcode_table[256] = {
G(ByteOp, group11), G(0, group11),
/* 0xC8 - 0xCF */
I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
- N, I(ImplicitOps | Stack, em_ret_far),
+ I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm),
+ I(ImplicitOps | Stack, em_ret_far),
D(ImplicitOps), DI(SrcImmByte, intn),
D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
/* 0xD0 - 0xD7 */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dce0df8150df..40772ef0f2b1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2570,11 +2570,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
kvm_release_pfn_clean(pfn);
}
-static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
-{
- mmu_free_roots(vcpu);
-}
-
static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
bool no_dirty_log)
{
@@ -3424,18 +3419,11 @@ out_unlock:
return 0;
}
-static void nonpaging_free(struct kvm_vcpu *vcpu)
-{
- mmu_free_roots(vcpu);
-}
-
-static int nonpaging_init_context(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context)
+static void nonpaging_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- context->new_cr3 = nonpaging_new_cr3;
context->page_fault = nonpaging_page_fault;
context->gva_to_gpa = nonpaging_gva_to_gpa;
- context->free = nonpaging_free;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
context->update_pte = nonpaging_update_pte;
@@ -3444,7 +3432,6 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu,
context->root_hpa = INVALID_PAGE;
context->direct_map = true;
context->nx = false;
- return 0;
}
void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -3454,9 +3441,8 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
-static void paging_new_cr3(struct kvm_vcpu *vcpu)
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
{
- pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
mmu_free_roots(vcpu);
}
@@ -3471,11 +3457,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
vcpu->arch.mmu.inject_page_fault(vcpu, fault);
}
-static void paging_free(struct kvm_vcpu *vcpu)
-{
- nonpaging_free(vcpu);
-}
-
static bool sync_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
unsigned access, int *nr_present)
{
@@ -3665,9 +3646,9 @@ static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
mmu->last_pte_bitmap = map;
}
-static int paging64_init_context_common(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context,
- int level)
+static void paging64_init_context_common(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context,
+ int level)
{
context->nx = is_nx(vcpu);
context->root_level = level;
@@ -3677,27 +3658,24 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
update_last_pte_bitmap(vcpu, context);
ASSERT(is_pae(vcpu));
- context->new_cr3 = paging_new_cr3;
context->page_fault = paging64_page_fault;
context->gva_to_gpa = paging64_gva_to_gpa;
context->sync_page = paging64_sync_page;
context->invlpg = paging64_invlpg;
context->update_pte = paging64_update_pte;
- context->free = paging_free;
context->shadow_root_level = level;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
- return 0;
}
-static int paging64_init_context(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context)
+static void paging64_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- return paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
+ paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL);
}
-static int paging32_init_context(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context)
+static void paging32_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
context->nx = false;
context->root_level = PT32_ROOT_LEVEL;
@@ -3706,33 +3684,28 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
update_permission_bitmask(vcpu, context, false);
update_last_pte_bitmap(vcpu, context);
- context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault;
context->gva_to_gpa = paging32_gva_to_gpa;
- context->free = paging_free;
context->sync_page = paging32_sync_page;
context->invlpg = paging32_invlpg;
context->update_pte = paging32_update_pte;
context->shadow_root_level = PT32E_ROOT_LEVEL;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
- return 0;
}
-static int paging32E_init_context(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context)
+static void paging32E_init_context(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *context)
{
- return paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
+ paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
}
-static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *context = vcpu->arch.walk_mmu;
context->base_role.word = 0;
- context->new_cr3 = nonpaging_new_cr3;
context->page_fault = tdp_page_fault;
- context->free = nonpaging_free;
context->sync_page = nonpaging_sync_page;
context->invlpg = nonpaging_invlpg;
context->update_pte = nonpaging_update_pte;
@@ -3767,37 +3740,32 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
update_permission_bitmask(vcpu, context, false);
update_last_pte_bitmap(vcpu, context);
-
- return 0;
}
-int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
- int r;
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
if (!is_paging(vcpu))
- r = nonpaging_init_context(vcpu, context);
+ nonpaging_init_context(vcpu, context);
else if (is_long_mode(vcpu))
- r = paging64_init_context(vcpu, context);
+ paging64_init_context(vcpu, context);
else if (is_pae(vcpu))
- r = paging32E_init_context(vcpu, context);
+ paging32E_init_context(vcpu, context);
else
- r = paging32_init_context(vcpu, context);
+ paging32_init_context(vcpu, context);
vcpu->arch.mmu.base_role.nxe = is_nx(vcpu);
vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
vcpu->arch.mmu.base_role.smep_andnot_wp
= smep && !is_write_protection(vcpu);
-
- return r;
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
-int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly)
{
ASSERT(vcpu);
@@ -3806,37 +3774,30 @@ int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
context->nx = true;
- context->new_cr3 = paging_new_cr3;
context->page_fault = ept_page_fault;
context->gva_to_gpa = ept_gva_to_gpa;
context->sync_page = ept_sync_page;
context->invlpg = ept_invlpg;
context->update_pte = ept_update_pte;
- context->free = paging_free;
context->root_level = context->shadow_root_level;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
update_permission_bitmask(vcpu, context, true);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
-
- return 0;
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
-static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
+static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
- int r = kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
-
+ kvm_init_shadow_mmu(vcpu, vcpu->arch.walk_mmu);
vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3;
vcpu->arch.walk_mmu->get_cr3 = get_cr3;
vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read;
vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
-
- return r;
}
-static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
@@ -3873,11 +3834,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
update_permission_bitmask(vcpu, g_context, false);
update_last_pte_bitmap(vcpu, g_context);
-
- return 0;
}
-static int init_kvm_mmu(struct kvm_vcpu *vcpu)
+static void init_kvm_mmu(struct kvm_vcpu *vcpu)
{
if (mmu_is_nested(vcpu))
return init_kvm_nested_mmu(vcpu);
@@ -3887,18 +3846,12 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
return init_kvm_softmmu(vcpu);
}
-static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
+void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
- if (VALID_PAGE(vcpu->arch.mmu.root_hpa))
- /* mmu.free() should set root_hpa = INVALID_PAGE */
- vcpu->arch.mmu.free(vcpu);
-}
-int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
-{
- destroy_kvm_mmu(vcpu);
- return init_kvm_mmu(vcpu);
+ kvm_mmu_unload(vcpu);
+ init_kvm_mmu(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
@@ -3923,6 +3876,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
mmu_free_roots(vcpu);
+ WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
}
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@ -4281,12 +4235,12 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
return alloc_mmu_pages(vcpu);
}
-int kvm_mmu_setup(struct kvm_vcpu *vcpu)
+void kvm_mmu_setup(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
- return init_kvm_mmu(vcpu);
+ init_kvm_mmu(vcpu);
}
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
@@ -4428,7 +4382,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int idx;
@@ -4478,9 +4432,8 @@ unlock:
break;
}
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return freed;
-
}
static unsigned long
@@ -4574,7 +4527,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
- destroy_kvm_mmu(vcpu);
+ kvm_mmu_unload(vcpu);
free_mmu_pages(vcpu);
mmu_free_memory_caches(vcpu);
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 77e044a0f5f7..292615274358 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -70,8 +70,8 @@ enum {
};
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
-int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
-int kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
+void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 043330159179..ad75d77999d0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -99,6 +99,7 @@ struct guest_walker {
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
+ bool pte_writable[PT_MAX_FULL_LEVELS];
unsigned pt_access;
unsigned pte_access;
gfn_t gfn;
@@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
if (pte == orig_pte)
continue;
+ /*
+ * If the slot is read-only, simply do not process the accessed
+ * and dirty bits. This is the correct thing to do if the slot
+ * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
+ * are only supported if the accessed and dirty bits are already
+ * set in the ROM (so that MMIO writes are never needed).
+ *
+ * Note that NPT does not allow this at all and faults, since
+ * it always wants nested page table entries for the guest
+ * page tables to be writable. And EPT works but will simply
+ * overwrite the read-only memory to set the accessed and dirty
+ * bits.
+ */
+ if (unlikely(!walker->pte_writable[level - 1]))
+ continue;
+
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
if (ret)
return ret;
@@ -309,7 +326,8 @@ retry_walk:
goto error;
real_gfn = gpa_to_gfn(real_gfn);
- host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+ host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
+ &walker->pte_writable[walker->level - 1]);
if (unlikely(kvm_is_error_hva(host_addr)))
goto error;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c0bc80391e40..c7168a5cff1b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1959,11 +1959,9 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
nested_svm_vmexit(svm);
}
-static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
- int r;
-
- r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
+ kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
@@ -1971,8 +1969,6 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
vcpu->arch.mmu.shadow_root_level = get_npt_level();
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
-
- return r;
}
static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1f1da43ff2a2..06fd7629068a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1898,16 +1898,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
/*
* KVM wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2.
- * This function assumes it is called with the exit reason in vmcs02 being
- * a #PF exception (this is the only case in which KVM injects a #PF when L2
- * is running).
*/
-static int nested_pf_handled(struct kvm_vcpu *vcpu)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
- if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
+ if (!(vmcs12->exception_bitmap & (1u << nr)))
return 0;
nested_vmx_vmexit(vcpu);
@@ -1921,8 +1917,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info = nr | INTR_INFO_VALID_MASK;
- if (nr == PF_VECTOR && is_guest_mode(vcpu) &&
- !vmx->nested.nested_run_pending && nested_pf_handled(vcpu))
+ if (!reinject && is_guest_mode(vcpu) &&
+ nested_vmx_check_exception(vcpu, nr))
return;
if (has_error_code) {
@@ -2204,9 +2200,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
- VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+ if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+ !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
+ nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+ nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ }
nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
- VM_EXIT_LOAD_IA32_EFER);
+ VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2252,6 +2254,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
nested_vmx_secondary_ctls_low = 0;
nested_vmx_secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_WBINVD_EXITING;
if (enable_ept) {
@@ -3255,25 +3258,29 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
if (!test_bit(VCPU_EXREG_PDPTR,
(unsigned long *)&vcpu->arch.regs_dirty))
return;
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]);
- vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]);
- vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]);
- vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]);
+ vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
+ vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
+ vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
+ vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
}
}
static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
- vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
- vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
- vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+ mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+ mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+ mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+ mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
}
__set_bit(VCPU_EXREG_PDPTR,
@@ -3376,8 +3383,10 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (enable_ept) {
eptp = construct_eptp(cr3);
vmcs_write64(EPT_POINTER, eptp);
- guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) :
- vcpu->kvm->arch.ept_identity_map_addr;
+ if (is_paging(vcpu) || is_guest_mode(vcpu))
+ guest_cr3 = kvm_read_cr3(vcpu);
+ else
+ guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
ept_load_pdptrs(vcpu);
}
@@ -4875,6 +4884,17 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[2] = 0xc1;
}
+static bool nested_cr0_valid(struct vmcs12 *vmcs12, unsigned long val)
+{
+ unsigned long always_on = VMXON_CR0_ALWAYSON;
+
+ if (nested_vmx_secondary_ctls_high &
+ SECONDARY_EXEC_UNRESTRICTED_GUEST &&
+ nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
+ always_on &= ~(X86_CR0_PE | X86_CR0_PG);
+ return (val & always_on) == always_on;
+}
+
/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
{
@@ -4893,9 +4913,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
val = (val & ~vmcs12->cr0_guest_host_mask) |
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
- /* TODO: will have to take unrestricted guest mode into
- * account */
- if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)
+ if (!nested_cr0_valid(vmcs12, val))
return 1;
if (kvm_set_cr0(vcpu, val))
@@ -5339,6 +5357,17 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
return 0;
}
+ /*
+ * EPT violation happened while executing iret from NMI,
+ * "blocked by NMI" bit has to be set before next VM entry.
+ * There are errata that may cause this bit to not be set:
+ * AAK134, BY25.
+ */
+ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ cpu_has_virtual_nmis() &&
+ (exit_qualification & INTR_INFO_UNBLOCK_NMI))
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
+
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
trace_kvm_page_fault(gpa, exit_qualification);
@@ -6707,6 +6736,27 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
+static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
+{
+ u64 delta_tsc_l1;
+ u32 preempt_val_l1, preempt_val_l2, preempt_scale;
+
+ if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
+ PIN_BASED_VMX_PREEMPTION_TIMER))
+ return;
+ preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
+ MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
+ preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+ delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
+ - vcpu->arch.last_guest_tsc;
+ preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
+ if (preempt_val_l2 <= preempt_val_l1)
+ preempt_val_l2 = 0;
+ else
+ preempt_val_l2 -= preempt_val_l1;
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
+}
+
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
@@ -6721,20 +6771,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
if (vmx->emulation_required)
return handle_invalid_guest_state(vcpu);
- /*
- * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
- * we did not inject a still-pending event to L1 now because of
- * nested_run_pending, we need to re-enable this bit.
- */
- if (vmx->nested.nested_run_pending)
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-
- if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH ||
- exit_reason == EXIT_REASON_VMRESUME))
- vmx->nested.nested_run_pending = 1;
- else
- vmx->nested.nested_run_pending = 0;
-
if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
nested_vmx_vmexit(vcpu);
return 1;
@@ -7046,9 +7082,9 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
case INTR_TYPE_HARD_EXCEPTION:
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
u32 err = vmcs_read32(error_code_field);
- kvm_queue_exception_e(vcpu, vector, err);
+ kvm_requeue_exception_e(vcpu, vector, err);
} else
- kvm_queue_exception(vcpu, vector);
+ kvm_requeue_exception(vcpu, vector);
break;
case INTR_TYPE_SOFT_INTR:
vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
@@ -7131,6 +7167,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
+ if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
+ nested_adjust_preemption_timer(vcpu);
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -7269,6 +7307,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
+ /*
+ * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
+ * we did not inject a still-pending event to L1 now because of
+ * nested_run_pending, we need to re-enable this bit.
+ */
+ if (vmx->nested.nested_run_pending)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ vmx->nested.nested_run_pending = 0;
+
vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
@@ -7486,9 +7534,9 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
return get_vmcs12(vcpu)->ept_pointer;
}
-static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
- int r = kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
+ kvm_init_shadow_ept_mmu(vcpu, &vcpu->arch.mmu,
nested_vmx_ept_caps & VMX_EPT_EXECUTE_ONLY_BIT);
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
@@ -7496,8 +7544,6 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
-
- return r;
}
static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -7505,6 +7551,20 @@ static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
vcpu->arch.walk_mmu = &vcpu->arch.mmu;
}
+static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ WARN_ON(!is_guest_mode(vcpu));
+
+ /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
+ if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
+ nested_vmx_vmexit(vcpu);
+ else
+ kvm_inject_page_fault(vcpu, fault);
+}
+
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7518,6 +7578,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
+ u32 exit_control;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7691,7 +7752,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
* bits are further modified by vmx_set_efer() below.
*/
- vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+ exit_control = vmcs_config.vmexit_ctrl;
+ if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
+ exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+ vmcs_write32(VM_EXIT_CONTROLS, exit_control);
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
@@ -7758,6 +7822,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
kvm_set_cr3(vcpu, vmcs12->guest_cr3);
kvm_mmu_reset_context(vcpu);
+ if (!enable_ept)
+ vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
+
/*
* L1 may access the L2's PDPTR, so save them to construct vmcs12
*/
@@ -7861,7 +7928,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}
- if (((vmcs12->guest_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
+ if (!nested_cr0_valid(vmcs12, vmcs12->guest_cr0) ||
((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
@@ -7923,6 +7990,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
enter_guest_mode(vcpu);
+ vmx->nested.nested_run_pending = 1;
+
vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
cpu = get_cpu();
@@ -7990,7 +8059,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
u32 idt_vectoring;
unsigned int nr;
- if (vcpu->arch.exception.pending) {
+ if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) {
nr = vcpu->arch.exception.nr;
idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
@@ -8090,6 +8159,11 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_pending_dbg_exceptions =
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+ if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
+ (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+ vmcs12->vmx_preemption_timer_value =
+ vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+
/*
* In some cases (usually, nested EPT), L2 is allowed to change its
* own CR3 without exiting. If it has changed it, we must keep it.
@@ -8115,6 +8189,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
+ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
+ vmcs12->guest_ia32_efer = vcpu->arch.efer;
vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
@@ -8186,7 +8262,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* fpu_active (which may have changed).
* Note that vmx_set_cr0 refers to efer set above.
*/
- kvm_set_cr0(vcpu, vmcs12->host_cr0);
+ vmx_set_cr0(vcpu, vmcs12->host_cr0);
/*
* If we did fpu_activate()/fpu_deactivate() during L2's run, we need
* to apply the same changes to L1's vmcs. We just set cr0 correctly,
@@ -8209,6 +8285,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
kvm_set_cr3(vcpu, vmcs12->host_cr3);
kvm_mmu_reset_context(vcpu);
+ if (!enable_ept)
+ vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+
if (enable_vpid) {
/*
* Trivially support vpid by letting L2s share their parent
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5ca72a5cdb6..edf2a07df3a3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -586,7 +586,7 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
return 1;
if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
return 1;
- if (xcr0 & ~host_xcr0)
+ if (xcr0 & ~vcpu->arch.guest_supported_xcr0)
return 1;
kvm_put_guest_xcr0(vcpu);
vcpu->arch.xcr0 = xcr0;
@@ -684,7 +684,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
vcpu->arch.cr3 = cr3;
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
- vcpu->arch.mmu.new_cr3(vcpu);
+ kvm_mmu_new_cr3(vcpu);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2984,11 +2984,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
- if (cpu_has_xsave)
+ if (cpu_has_xsave) {
memcpy(guest_xsave->region,
&vcpu->arch.guest_fpu.state->xsave,
- xstate_size);
- else {
+ vcpu->arch.guest_xstate_size);
+ *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
+ vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+ } else {
memcpy(guest_xsave->region,
&vcpu->arch.guest_fpu.state->fxsave,
sizeof(struct i387_fxsave_struct));
@@ -3003,10 +3005,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
u64 xstate_bv =
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
- if (cpu_has_xsave)
+ if (cpu_has_xsave) {
+ /*
+ * Here we allow setting states that are not present in
+ * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
+ * with old userspace.
+ */
+ if (xstate_bv & ~KVM_SUPPORTED_XCR0)
+ return -EINVAL;
+ if (xstate_bv & ~host_xcr0)
+ return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state->xsave,
- guest_xsave->region, xstate_size);
- else {
+ guest_xsave->region, vcpu->arch.guest_xstate_size);
+ } else {
if (xstate_bv & ~XSTATE_FPSSE)
return -EINVAL;
memcpy(&vcpu->arch.guest_fpu.state->fxsave,
@@ -5263,7 +5274,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != freq->cpu)
@@ -5273,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
send_ipi = 1;
}
}
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
@@ -5426,12 +5437,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
int i;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
atomic_set(&kvm_guest_has_master_clock, 0);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
}
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -6688,7 +6699,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
if (r)
return r;
kvm_vcpu_reset(vcpu);
- r = kvm_mmu_setup(vcpu);
+ kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
return r;
@@ -6940,6 +6951,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.ia32_tsc_adjust_msr = 0x0;
vcpu->arch.pv_time_enabled = false;
+
+ vcpu->arch.guest_supported_xcr0 = 0;
+ vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
kvm_async_pf_hash_reset(vcpu);
kvm_pmu_init(vcpu);
@@ -7283,7 +7298,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
int r;
if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
- is_error_page(work->page))
+ work->wakeup_all)
return;
r = kvm_mmu_reload(vcpu);
@@ -7393,7 +7408,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct x86_exception fault;
trace_kvm_async_pf_ready(work->arch.token, work->gva);
- if (is_error_page(work->page))
+ if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e224f7a671b6..587fb9ede436 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -122,6 +122,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception);
+#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
extern u64 host_xcr0;
extern struct static_key kvm_no_apic_vcpu;
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 96b2c6697c9d..992d63bb154f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -16,7 +16,7 @@ clean-files := inat-tables.c
obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
-lib-y := delay.o
+lib-y := delay.o misc.o
lib-y += thunk_$(BITS).o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c
new file mode 100644
index 000000000000..76b373af03f0
--- /dev/null
+++ b/arch/x86/lib/misc.c
@@ -0,0 +1,21 @@
+/*
+ * Count the digits of @val including a possible sign.
+ *
+ * (Typed on and submitted from hpa's mobile phone.)
+ */
+int num_digits(int val)
+{
+ int m = 10;
+ int d = 1;
+
+ if (val < 0) {
+ d++;
+ val = -val;
+ }
+
+ while (val >= m) {
+ m *= 10;
+ d++;
+ }
+ return d;
+}
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index a6b1b86d2253..518532e6a3fa 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -47,6 +47,21 @@ int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
}
EXPORT_SYMBOL(rdmsr_on_cpu);
+int rdmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
+ *q = rv.reg.q;
+
+ return err;
+}
+EXPORT_SYMBOL(rdmsrl_on_cpu);
+
int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
{
int err;
@@ -63,6 +78,22 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
}
EXPORT_SYMBOL(wrmsr_on_cpu);
+int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ rv.reg.q = q;
+
+ err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+
+ return err;
+}
+EXPORT_SYMBOL(wrmsrl_on_cpu);
+
static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
struct msr *msrs,
void (*msr_func) (void *info))
@@ -159,6 +190,37 @@ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
}
EXPORT_SYMBOL(wrmsr_safe_on_cpu);
+int wrmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ rv.reg.q = q;
+
+ err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(wrmsrl_safe_on_cpu);
+
+int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
+ *q = rv.reg.q;
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(rdmsrl_safe_on_cpu);
+
/*
* These variants are significantly slower, but allows control over
* the entire 32-bit GPR set.
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 4f74d94c8d97..5465b8613944 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -11,39 +11,26 @@
#include <linux/sched.h>
/*
- * best effort, GUP based copy_from_user() that is NMI-safe
+ * We rely on the nested NMI work to allow atomic faults from the NMI path; the
+ * nested NMI paths are careful to preserve CR2.
*/
unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
{
- unsigned long offset, addr = (unsigned long)from;
- unsigned long size, len = 0;
- struct page *page;
- void *map;
- int ret;
+ unsigned long ret;
if (__range_not_ok(from, n, TASK_SIZE))
- return len;
-
- do {
- ret = __get_user_pages_fast(addr, 1, 0, &page);
- if (!ret)
- break;
-
- offset = addr & (PAGE_SIZE - 1);
- size = min(PAGE_SIZE - offset, n - len);
-
- map = kmap_atomic(page);
- memcpy(to, map+offset, size);
- kunmap_atomic(map);
- put_page(page);
-
- len += size;
- to += size;
- addr += size;
-
- } while (len < n);
-
- return len;
+ return 0;
+
+ /*
+ * Even though this function is typically called from NMI/IRQ context
+ * disable pagefaults so that its behaviour is consistent even when
+ * called form other contexts.
+ */
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(to, from, n);
+ pagefault_enable();
+
+ return n - ret;
}
EXPORT_SYMBOL_GPL(copy_from_user_nmi);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 3eb18acd0e40..e2f5e21c03b3 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -654,14 +654,13 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
* Returns number of bytes that could not be copied.
* On success, this will be zero.
*/
-unsigned long
-copy_to_user(void __user *to, const void *from, unsigned long n)
+unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
{
if (access_ok(VERIFY_WRITE, to, n))
n = __copy_to_user(to, from, n);
return n;
}
-EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(_copy_to_user);
/**
* copy_from_user: - Copy a block of data from user space.
@@ -679,8 +678,7 @@ EXPORT_SYMBOL(copy_to_user);
* If some data could not be copied, this function will pad the copied
* data to the requested size using zero bytes.
*/
-unsigned long
-_copy_from_user(void *to, const void __user *from, unsigned long n)
+unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
{
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3aaeffcfd67a..e7e1cac74e8d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -51,7 +51,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
return 0;
}
-static inline int __kprobes notify_page_fault(struct pt_regs *regs)
+static inline int __kprobes kprobes_fault(struct pt_regs *regs)
{
int ret = 0;
@@ -596,7 +596,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(KERN_CONT " at %p\n", (void *) address);
printk(KERN_ALERT "IP:");
- printk_address(regs->ip, 1);
+ printk_address(regs->ip);
dump_pagetable(address);
}
@@ -1048,7 +1048,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
return;
/* kprobes don't want to hook the spurious faults: */
- if (notify_page_fault(regs))
+ if (kprobes_fault(regs))
return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
@@ -1060,23 +1060,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
}
/* kprobes don't want to hook the spurious faults: */
- if (unlikely(notify_page_fault(regs)))
+ if (unlikely(kprobes_fault(regs)))
return;
- /*
- * It's safe to allow irq's after cr2 has been saved and the
- * vmalloc fault has been handled.
- *
- * User-mode registers count as a user access even for any
- * potential system fault or CPU buglet:
- */
- if (user_mode_vm(regs)) {
- local_irq_enable();
- error_code |= PF_USER;
- flags |= FAULT_FLAG_USER;
- } else {
- if (regs->flags & X86_EFLAGS_IF)
- local_irq_enable();
- }
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
@@ -1088,8 +1073,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
}
}
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-
/*
* If we're in an interrupt, have no user context or are running
* in an atomic region then we must not take the fault:
@@ -1099,6 +1082,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
return;
}
+ /*
+ * It's safe to allow irq's after cr2 has been saved and the
+ * vmalloc fault has been handled.
+ *
+ * User-mode registers count as a user access even for any
+ * potential system fault or CPU buglet:
+ */
+ if (user_mode_vm(regs)) {
+ local_irq_enable();
+ error_code |= PF_USER;
+ flags |= FAULT_FLAG_USER;
+ } else {
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_enable();
+ }
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
if (error_code & PF_WRITE)
flags |= FAULT_FLAG_WRITE;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 04664cdb7fda..ce32017c5e38 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -399,8 +399,25 @@ static unsigned long __init init_range_memory_mapping(
return mapped_ram_size;
}
-/* (PUD_SHIFT-PMD_SHIFT)/2 */
-#define STEP_SIZE_SHIFT 5
+static unsigned long __init get_new_step_size(unsigned long step_size)
+{
+ /*
+ * Explain why we shift by 5 and why we don't have to worry about
+ * 'step_size << 5' overflowing:
+ *
+ * initial mapped size is PMD_SIZE (2M).
+ * We can not set step_size to be PUD_SIZE (1G) yet.
+ * In worse case, when we cross the 1G boundary, and
+ * PG_LEVEL_2M is not set, we will need 1+1+512 pages (2M + 8k)
+ * to map 1G range with PTE. Use 5 as shift for now.
+ *
+ * Don't need to worry about overflow, on 32bit, when step_size
+ * is 0, round_down() returns 0 for start, and that turns it
+ * into 0x100000000ULL.
+ */
+ return step_size << 5;
+}
+
void __init init_mem_mapping(void)
{
unsigned long end, real_end, start, last_start;
@@ -449,7 +466,7 @@ void __init init_mem_mapping(void)
min_pfn_mapped = last_start >> PAGE_SHIFT;
/* only increase step_size after big range get mapped */
if (new_mapped_ram_size > mapped_ram_size)
- step_size <<= STEP_SIZE_SHIFT;
+ step_size = get_new_step_size(step_size);
mapped_ram_size += new_mapped_ram_size;
}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4287f1ffba7e..5bdc5430597c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -806,6 +806,9 @@ void __init mem_init(void)
BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
#undef high_memory
#undef __FIXADDR_TOP
+#ifdef CONFIG_RANDOMIZE_BASE
+ BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE);
+#endif
#ifdef CONFIG_HIGHMEM
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 79c216aa0e2b..516593e1ce33 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -772,13 +772,21 @@ out:
return;
}
+static void bpf_jit_free_deferred(struct work_struct *work)
+{
+ struct sk_filter *fp = container_of(work, struct sk_filter, work);
+ unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+ struct bpf_binary_header *header = (void *)addr;
+
+ set_memory_rw(addr, header->pages);
+ module_free(NULL, header);
+ kfree(fp);
+}
+
void bpf_jit_free(struct sk_filter *fp)
{
if (fp->bpf_func != sk_run_filter) {
- unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
- struct bpf_binary_header *header = (void *)addr;
-
- set_memory_rw(addr, header->pages);
- module_free(NULL, header);
+ INIT_WORK(&fp->work, bpf_jit_free_deferred);
+ schedule_work(&fp->work);
}
}
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index ee0af58ca5bd..e063eed0f912 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_X86_VISWS) += visws.o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
obj-$(CONFIG_X86_NUMACHIP) += numachip.o
-obj-$(CONFIG_X86_INTEL_MID) += mrst.o
+obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o
obj-y += common.o early.o
obj-y += bus_numa.o
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index f5809fa2753e..b046e070e088 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -231,7 +231,7 @@ static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int wh
offset = quirk_aspm_offset[GET_INDEX(bus->self->device, devfn)];
if ((offset) && (where == offset))
- value = value & 0xfffffffc;
+ value = value & ~PCI_EXP_LNKCTL_ASPMC;
return raw_pci_write(pci_domain_nr(bus), bus->number,
devfn, where, size, value);
@@ -252,7 +252,7 @@ static struct pci_ops quirk_pcie_aspm_ops = {
*/
static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)
{
- int cap_base, i;
+ int i;
struct pci_bus *pbus;
struct pci_dev *dev;
@@ -278,7 +278,7 @@ static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)
for (i = GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i)
quirk_aspm_offset[i] = 0;
- pbus->ops = pbus->parent->ops;
+ pci_bus_set_ops(pbus, pbus->parent->ops);
} else {
/*
* If devices are attached to the root port at power-up or
@@ -286,13 +286,15 @@ static void pcie_rootport_aspm_quirk(struct pci_dev *pdev)
* each root port to save the register offsets and replace the
* bus ops.
*/
- list_for_each_entry(dev, &pbus->devices, bus_list) {
+ list_for_each_entry(dev, &pbus->devices, bus_list)
/* There are 0 to 8 devices attached to this bus */
- cap_base = pci_find_capability(dev, PCI_CAP_ID_EXP);
- quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)] = cap_base + 0x10;
- }
- pbus->ops = &quirk_pcie_aspm_ops;
+ quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)] =
+ dev->pcie_cap + PCI_EXP_LNKCTL;
+
+ pci_bus_set_ops(pbus, &quirk_pcie_aspm_ops);
+ dev_info(&pbus->dev, "writes to ASPM control bits will be ignored\n");
}
+
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA, pcie_rootport_aspm_quirk);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA1, pcie_rootport_aspm_quirk);
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/intel_mid_pci.c
index 903fded50786..51384ca727ad 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -1,5 +1,5 @@
/*
- * Moorestown PCI support
+ * Intel MID PCI support
* Copyright (c) 2008 Intel Corporation
* Jesse Barnes <jesse.barnes@intel.com>
*
@@ -150,12 +150,12 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
* shim. Therefore, use the header type in shim instead.
*/
if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
- return 0;
+ return false;
if (bus == 0 && (devfn == PCI_DEVFN(2, 0)
|| devfn == PCI_DEVFN(0, 0)
|| devfn == PCI_DEVFN(3, 0)))
- return 1;
- return 0; /* Langwell on others */
+ return true;
+ return false; /* Langwell on others */
}
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
@@ -205,7 +205,7 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
where, size, value);
}
-static int mrst_pci_irq_enable(struct pci_dev *dev)
+static int intel_mid_pci_irq_enable(struct pci_dev *dev)
{
u8 pin;
struct io_apic_irq_attr irq_attr;
@@ -225,23 +225,23 @@ static int mrst_pci_irq_enable(struct pci_dev *dev)
return 0;
}
-struct pci_ops pci_mrst_ops = {
+struct pci_ops intel_mid_pci_ops = {
.read = pci_read,
.write = pci_write,
};
/**
- * pci_mrst_init - installs pci_mrst_ops
+ * intel_mid_pci_init - installs intel_mid_pci_ops
*
* Moorestown has an interesting PCI implementation (see above).
* Called when the early platform detection installs it.
*/
-int __init pci_mrst_init(void)
+int __init intel_mid_pci_init(void)
{
pr_info("Intel MID platform detected, using MID PCI ops\n");
pci_mmcfg_late_init();
- pcibios_enable_irq = mrst_pci_irq_enable;
- pci_root_ops = pci_mrst_ops;
+ pcibios_enable_irq = intel_mid_pci_irq_enable;
+ pci_root_ops = intel_mid_pci_ops;
pci_soc_mode = 1;
/* Continue with standard init */
return 1;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 5596c7bdd327..082e88129712 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -700,7 +700,7 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
if (!(pci_probe & PCI_PROBE_MMCONF) || pci_mmcfg_arch_init_failed)
return -ENODEV;
- if (start > end || !addr)
+ if (start > end)
return -EINVAL;
mutex_lock(&pci_mmcfg_lock);
@@ -716,6 +716,11 @@ int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
return -EEXIST;
}
+ if (!addr) {
+ mutex_unlock(&pci_mmcfg_lock);
+ return -EINVAL;
+ }
+
rc = -EBUSY;
cfg = pci_mmconfig_alloc(seg, start, end, addr);
if (cfg == NULL) {
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 01e0231a113e..20342d4c82ce 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -4,7 +4,7 @@ obj-y += efi/
obj-y += geode/
obj-y += goldfish/
obj-y += iris/
-obj-y += mrst/
+obj-y += intel-mid/
obj-y += olpc/
obj-y += scx200/
obj-y += sfi/
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index 6db1cc4c7534..b7b0b35c1981 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1,2 +1,3 @@
obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o
+obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o
diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c
new file mode 100644
index 000000000000..6599a0027b76
--- /dev/null
+++ b/arch/x86/platform/efi/early_printk.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2013 Intel Corporation; author Matt Fleming
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ */
+
+#include <linux/console.h>
+#include <linux/efi.h>
+#include <linux/font.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <asm/setup.h>
+
+static const struct font_desc *font;
+static u32 efi_x, efi_y;
+
+static __init void early_efi_clear_scanline(unsigned int y)
+{
+ unsigned long base, *dst;
+ u16 len;
+
+ base = boot_params.screen_info.lfb_base;
+ len = boot_params.screen_info.lfb_linelength;
+
+ dst = early_ioremap(base + y*len, len);
+ if (!dst)
+ return;
+
+ memset(dst, 0, len);
+ early_iounmap(dst, len);
+}
+
+static __init void early_efi_scroll_up(void)
+{
+ unsigned long base, *dst, *src;
+ u16 len;
+ u32 i, height;
+
+ base = boot_params.screen_info.lfb_base;
+ len = boot_params.screen_info.lfb_linelength;
+ height = boot_params.screen_info.lfb_height;
+
+ for (i = 0; i < height - font->height; i++) {
+ dst = early_ioremap(base + i*len, len);
+ if (!dst)
+ return;
+
+ src = early_ioremap(base + (i + font->height) * len, len);
+ if (!src) {
+ early_iounmap(dst, len);
+ return;
+ }
+
+ memmove(dst, src, len);
+
+ early_iounmap(src, len);
+ early_iounmap(dst, len);
+ }
+}
+
+static void early_efi_write_char(u32 *dst, unsigned char c, unsigned int h)
+{
+ const u32 color_black = 0x00000000;
+ const u32 color_white = 0x00ffffff;
+ const u8 *src;
+ u8 s8;
+ int m;
+
+ src = font->data + c * font->height;
+ s8 = *(src + h);
+
+ for (m = 0; m < 8; m++) {
+ if ((s8 >> (7 - m)) & 1)
+ *dst = color_white;
+ else
+ *dst = color_black;
+ dst++;
+ }
+}
+
+static __init void
+early_efi_write(struct console *con, const char *str, unsigned int num)
+{
+ struct screen_info *si;
+ unsigned long base;
+ unsigned int len;
+ const char *s;
+ void *dst;
+
+ base = boot_params.screen_info.lfb_base;
+ si = &boot_params.screen_info;
+ len = si->lfb_linelength;
+
+ while (num) {
+ unsigned int linemax;
+ unsigned int h, count = 0;
+
+ for (s = str; *s && *s != '\n'; s++) {
+ if (count == num)
+ break;
+ count++;
+ }
+
+ linemax = (si->lfb_width - efi_x) / font->width;
+ if (count > linemax)
+ count = linemax;
+
+ for (h = 0; h < font->height; h++) {
+ unsigned int n, x;
+
+ dst = early_ioremap(base + (efi_y + h) * len, len);
+ if (!dst)
+ return;
+
+ s = str;
+ n = count;
+ x = efi_x;
+
+ while (n-- > 0) {
+ early_efi_write_char(dst + x*4, *s, h);
+ x += font->width;
+ s++;
+ }
+
+ early_iounmap(dst, len);
+ }
+
+ num -= count;
+ efi_x += count * font->width;
+ str += count;
+
+ if (num > 0 && *s == '\n') {
+ efi_x = 0;
+ efi_y += font->height;
+ str++;
+ num--;
+ }
+
+ if (efi_x >= si->lfb_width) {
+ efi_x = 0;
+ efi_y += font->height;
+ }
+
+ if (efi_y + font->height >= si->lfb_height) {
+ u32 i;
+
+ efi_y -= font->height;
+ early_efi_scroll_up();
+
+ for (i = 0; i < font->height; i++)
+ early_efi_clear_scanline(efi_y + i);
+ }
+ }
+}
+
+static __init int early_efi_setup(struct console *con, char *options)
+{
+ struct screen_info *si;
+ u16 xres, yres;
+ u32 i;
+
+ si = &boot_params.screen_info;
+ xres = si->lfb_width;
+ yres = si->lfb_height;
+
+ /*
+ * early_efi_write_char() implicitly assumes a framebuffer with
+ * 32-bits per pixel.
+ */
+ if (si->lfb_depth != 32)
+ return -ENODEV;
+
+ font = get_default_font(xres, yres, -1, -1);
+ if (!font)
+ return -ENODEV;
+
+ efi_y = rounddown(yres, font->height) - font->height;
+ for (i = 0; i < (yres - efi_y) / font->height; i++)
+ early_efi_scroll_up();
+
+ return 0;
+}
+
+struct console early_efi_console = {
+ .name = "earlyefi",
+ .write = early_efi_write,
+ .setup = early_efi_setup,
+ .flags = CON_PRINTBUFFER,
+ .index = -1,
+};
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 90f6ed127096..92c02344a060 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -60,19 +60,6 @@
static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
-struct efi __read_mostly efi = {
- .mps = EFI_INVALID_TABLE_ADDR,
- .acpi = EFI_INVALID_TABLE_ADDR,
- .acpi20 = EFI_INVALID_TABLE_ADDR,
- .smbios = EFI_INVALID_TABLE_ADDR,
- .sal_systab = EFI_INVALID_TABLE_ADDR,
- .boot_info = EFI_INVALID_TABLE_ADDR,
- .hcdp = EFI_INVALID_TABLE_ADDR,
- .uga = EFI_INVALID_TABLE_ADDR,
- .uv_systab = EFI_INVALID_TABLE_ADDR,
-};
-EXPORT_SYMBOL(efi);
-
struct efi_memory_map memmap;
static struct efi efi_phys __initdata;
@@ -80,6 +67,13 @@ static efi_system_table_t efi_systab __initdata;
unsigned long x86_efi_facility;
+static __initdata efi_config_table_type_t arch_tables[] = {
+#ifdef CONFIG_X86_UV
+ {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
+#endif
+ {NULL_GUID, NULL, NULL},
+};
+
/*
* Returns 1 if 'facility' is enabled, 0 otherwise.
*/
@@ -399,6 +393,8 @@ int __init efi_memblock_x86_reserve_range(void)
memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+ efi.memmap = &memmap;
+
return 0;
}
@@ -578,80 +574,6 @@ static int __init efi_systab_init(void *phys)
return 0;
}
-static int __init efi_config_init(u64 tables, int nr_tables)
-{
- void *config_tables, *tablep;
- int i, sz;
-
- if (efi_enabled(EFI_64BIT))
- sz = sizeof(efi_config_table_64_t);
- else
- sz = sizeof(efi_config_table_32_t);
-
- /*
- * Let's see what config tables the firmware passed to us.
- */
- config_tables = early_ioremap(tables, nr_tables * sz);
- if (config_tables == NULL) {
- pr_err("Could not map Configuration table!\n");
- return -ENOMEM;
- }
-
- tablep = config_tables;
- pr_info("");
- for (i = 0; i < efi.systab->nr_tables; i++) {
- efi_guid_t guid;
- unsigned long table;
-
- if (efi_enabled(EFI_64BIT)) {
- u64 table64;
- guid = ((efi_config_table_64_t *)tablep)->guid;
- table64 = ((efi_config_table_64_t *)tablep)->table;
- table = table64;
-#ifdef CONFIG_X86_32
- if (table64 >> 32) {
- pr_cont("\n");
- pr_err("Table located above 4GB, disabling EFI.\n");
- early_iounmap(config_tables,
- efi.systab->nr_tables * sz);
- return -EINVAL;
- }
-#endif
- } else {
- guid = ((efi_config_table_32_t *)tablep)->guid;
- table = ((efi_config_table_32_t *)tablep)->table;
- }
- if (!efi_guidcmp(guid, MPS_TABLE_GUID)) {
- efi.mps = table;
- pr_cont(" MPS=0x%lx ", table);
- } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) {
- efi.acpi20 = table;
- pr_cont(" ACPI 2.0=0x%lx ", table);
- } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) {
- efi.acpi = table;
- pr_cont(" ACPI=0x%lx ", table);
- } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) {
- efi.smbios = table;
- pr_cont(" SMBIOS=0x%lx ", table);
-#ifdef CONFIG_X86_UV
- } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) {
- efi.uv_systab = table;
- pr_cont(" UVsystab=0x%lx ", table);
-#endif
- } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) {
- efi.hcdp = table;
- pr_cont(" HCDP=0x%lx ", table);
- } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) {
- efi.uga = table;
- pr_cont(" UGA=0x%lx ", table);
- }
- tablep += sz;
- }
- pr_cont("\n");
- early_iounmap(config_tables, efi.systab->nr_tables * sz);
- return 0;
-}
-
static int __init efi_runtime_init(void)
{
efi_runtime_services_t *runtime;
@@ -745,7 +667,7 @@ void __init efi_init(void)
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff, vendor);
- if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
+ if (efi_config_init(arch_tables))
return;
set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
@@ -816,34 +738,6 @@ static void __init runtime_code_page_mkexec(void)
}
}
-/*
- * We can't ioremap data in EFI boot services RAM, because we've already mapped
- * it as RAM. So, look it up in the existing EFI memory map instead. Only
- * callable after efi_enter_virtual_mode and before efi_free_boot_services.
- */
-void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
-{
- void *p;
- if (WARN_ON(!memmap.map))
- return NULL;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
- u64 size = md->num_pages << EFI_PAGE_SHIFT;
- u64 end = md->phys_addr + size;
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
- if (!md->virt_addr)
- continue;
- if (phys_addr >= md->phys_addr && phys_addr < end) {
- phys_addr += md->virt_addr - md->phys_addr;
- return (__force void __iomem *)(unsigned long)phys_addr;
- }
- }
- return NULL;
-}
-
void efi_memory_uc(u64 addr, unsigned long size)
{
unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
@@ -912,10 +806,13 @@ void __init efi_enter_virtual_mode(void)
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA)
- continue;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
+#ifdef CONFIG_X86_64
+ if (md->type != EFI_BOOT_SERVICES_CODE &&
+ md->type != EFI_BOOT_SERVICES_DATA)
+#endif
+ continue;
+ }
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index 90e23e7679a5..76b6632d3143 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -98,7 +98,7 @@ static struct platform_device alix_leds_dev = {
.dev.platform_data = &alix_leds_data,
};
-static struct __initdata platform_device *alix_devs[] = {
+static struct platform_device *alix_devs[] __initdata = {
&alix_buttons_dev,
&alix_leds_dev,
};
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
index c2e6d53558be..aa733fba2471 100644
--- a/arch/x86/platform/geode/geos.c
+++ b/arch/x86/platform/geode/geos.c
@@ -87,7 +87,7 @@ static struct platform_device geos_leds_dev = {
.dev.platform_data = &geos_leds_data,
};
-static struct __initdata platform_device *geos_devs[] = {
+static struct platform_device *geos_devs[] __initdata = {
&geos_buttons_dev,
&geos_leds_dev,
};
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
index 646e3b5b4bb6..927e38c0089f 100644
--- a/arch/x86/platform/geode/net5501.c
+++ b/arch/x86/platform/geode/net5501.c
@@ -78,7 +78,7 @@ static struct platform_device net5501_leds_dev = {
.dev.platform_data = &net5501_leds_data,
};
-static struct __initdata platform_device *net5501_devs[] = {
+static struct platform_device *net5501_devs[] __initdata = {
&net5501_buttons_dev,
&net5501_leds_dev,
};
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
new file mode 100644
index 000000000000..01cc29ea5ff7
--- /dev/null
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o
+obj-$(CONFIG_X86_INTEL_MID) += intel_mid_vrtc.o
+obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o
+# SFI specific code
+ifdef CONFIG_X86_INTEL_MID
+obj-$(CONFIG_SFI) += sfi.o device_libs/
+endif
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
new file mode 100644
index 000000000000..097e7a7940d8
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -0,0 +1,22 @@
+# IPC Devices
+obj-y += platform_ipc.o
+obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o
+obj-$(subst m,y,$(CONFIG_SND_MFLD_MACHINE)) += platform_msic_audio.o
+obj-$(subst m,y,$(CONFIG_GPIO_MSIC)) += platform_msic_gpio.o
+obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_ocd.o
+obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_battery.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
+obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o
+obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
+# I2C Devices
+obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
+obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_max7315.o
+obj-$(subst m,y,$(CONFIG_INPUT_MPU3050)) += platform_mpu3050.o
+obj-$(subst m,y,$(CONFIG_INPUT_BMA150)) += platform_bma023.o
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
+obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o
+# SPI Devices
+obj-$(subst m,y,$(CONFIG_SERIAL_MRST_MAX3110)) += platform_max3111.o
+# MISC Devices
+obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bma023.c b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
new file mode 100644
index 000000000000..0ae7f2ae2296
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
@@ -0,0 +1,20 @@
+/*
+ * platform_bma023.c: bma023 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <asm/intel-mid.h>
+
+static const struct devs_id bma023_dev_id __initconst = {
+ .name = "bma023",
+ .type = SFI_DEV_TYPE_I2C,
+ .delay = 1,
+};
+
+sfi_device(bma023_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
new file mode 100644
index 000000000000..0d942c1d26d5
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
@@ -0,0 +1,41 @@
+/*
+ * platform_emc1403.c: emc1403 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <asm/intel-mid.h>
+
+static void __init *emc1403_platform_data(void *info)
+{
+ static short intr2nd_pdata;
+ struct i2c_board_info *i2c_info = info;
+ int intr = get_gpio_by_name("thermal_int");
+ int intr2nd = get_gpio_by_name("thermal_alert");
+
+ if (intr == -1 || intr2nd == -1)
+ return NULL;
+
+ i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+ intr2nd_pdata = intr2nd + INTEL_MID_IRQ_OFFSET;
+
+ return &intr2nd_pdata;
+}
+
+static const struct devs_id emc1403_dev_id __initconst = {
+ .name = "emc1403",
+ .type = SFI_DEV_TYPE_I2C,
+ .delay = 1,
+ .get_platform_data = &emc1403_platform_data,
+};
+
+sfi_device(emc1403_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
new file mode 100644
index 000000000000..a013a4834bbe
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
@@ -0,0 +1,83 @@
+/*
+ * platform_gpio_keys.c: gpio_keys platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/input.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/gpio_keys.h>
+#include <linux/platform_device.h>
+#include <asm/intel-mid.h>
+
+#define DEVICE_NAME "gpio-keys"
+
+/*
+ * we will search these buttons in SFI GPIO table (by name)
+ * and register them dynamically. Please add all possible
+ * buttons here, we will shrink them if no GPIO found.
+ */
+static struct gpio_keys_button gpio_button[] = {
+ {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000},
+ {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20},
+ {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20},
+ {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20},
+ {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20},
+ {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20},
+ {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20},
+ {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20},
+ {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20},
+ {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20},
+};
+
+static struct gpio_keys_platform_data gpio_keys = {
+ .buttons = gpio_button,
+ .rep = 1,
+ .nbuttons = -1, /* will fill it after search */
+};
+
+static struct platform_device pb_device = {
+ .name = DEVICE_NAME,
+ .id = -1,
+ .dev = {
+ .platform_data = &gpio_keys,
+ },
+};
+
+/*
+ * Shrink the non-existent buttons, register the gpio button
+ * device if there is some
+ */
+static int __init pb_keys_init(void)
+{
+ struct gpio_keys_button *gb = gpio_button;
+ int i, num, good = 0;
+
+ num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
+ for (i = 0; i < num; i++) {
+ gb[i].gpio = get_gpio_by_name(gb[i].desc);
+ pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc,
+ gb[i].gpio);
+ if (gb[i].gpio == -1)
+ continue;
+
+ if (i != good)
+ gb[good] = gb[i];
+ good++;
+ }
+
+ if (good) {
+ gpio_keys.nbuttons = good;
+ return platform_device_register(&pb_device);
+ }
+ return 0;
+}
+late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c
new file mode 100644
index 000000000000..a84b73d6c4a0
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c
@@ -0,0 +1,68 @@
+/*
+ * platform_ipc.c: IPC platform library file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/sfi.h>
+#include <linux/gpio.h>
+#include <asm/intel-mid.h>
+#include "platform_ipc.h"
+
+void __init ipc_device_handler(struct sfi_device_table_entry *pentry,
+ struct devs_id *dev)
+{
+ struct platform_device *pdev;
+ void *pdata = NULL;
+ static struct resource res __initdata = {
+ .name = "IRQ",
+ .flags = IORESOURCE_IRQ,
+ };
+
+ pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n",
+ pentry->name, pentry->irq);
+
+ /*
+ * We need to call platform init of IPC devices to fill misc_pdata
+ * structure. It will be used in msic_init for initialization.
+ */
+ if (dev != NULL)
+ pdata = dev->get_platform_data(pentry);
+
+ /*
+ * On Medfield the platform device creation is handled by the MSIC
+ * MFD driver so we don't need to do it here.
+ */
+ if (intel_mid_has_msic())
+ return;
+
+ pdev = platform_device_alloc(pentry->name, 0);
+ if (pdev == NULL) {
+ pr_err("out of memory for SFI platform device '%s'.\n",
+ pentry->name);
+ return;
+ }
+ res.start = pentry->irq;
+ platform_device_add_resources(pdev, &res, 1);
+
+ pdev->dev.platform_data = pdata;
+ intel_scu_device_register(pdev);
+}
+
+static const struct devs_id pmic_audio_dev_id __initconst = {
+ .name = "pmic_audio",
+ .type = SFI_DEV_TYPE_IPC,
+ .delay = 1,
+ .device_handler = &ipc_device_handler,
+};
+
+sfi_device(pmic_audio_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h
new file mode 100644
index 000000000000..8f568dd79605
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h
@@ -0,0 +1,17 @@
+/*
+ * platform_ipc.h: IPC platform library header file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _PLATFORM_IPC_H_
+#define _PLATFORM_IPC_H_
+
+extern void __init ipc_device_handler(struct sfi_device_table_entry *pentry,
+ struct devs_id *dev) __attribute__((weak));
+#endif
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
new file mode 100644
index 000000000000..15278c11f714
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
@@ -0,0 +1,39 @@
+/*
+ * platform_lis331.c: lis331 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <asm/intel-mid.h>
+
+static void __init *lis331dl_platform_data(void *info)
+{
+ static short intr2nd_pdata;
+ struct i2c_board_info *i2c_info = info;
+ int intr = get_gpio_by_name("accel_int");
+ int intr2nd = get_gpio_by_name("accel_2");
+
+ if (intr == -1 || intr2nd == -1)
+ return NULL;
+
+ i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+ intr2nd_pdata = intr2nd + INTEL_MID_IRQ_OFFSET;
+
+ return &intr2nd_pdata;
+}
+
+static const struct devs_id lis331dl_dev_id __initconst = {
+ .name = "i2c_accel",
+ .type = SFI_DEV_TYPE_I2C,
+ .get_platform_data = &lis331dl_platform_data,
+};
+
+sfi_device(lis331dl_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max3111.c b/arch/x86/platform/intel-mid/device_libs/platform_max3111.c
new file mode 100644
index 000000000000..afd1df94e0e5
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max3111.c
@@ -0,0 +1,35 @@
+/*
+ * platform_max3111.c: max3111 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <asm/intel-mid.h>
+
+static void __init *max3111_platform_data(void *info)
+{
+ struct spi_board_info *spi_info = info;
+ int intr = get_gpio_by_name("max3111_int");
+
+ spi_info->mode = SPI_MODE_0;
+ if (intr == -1)
+ return NULL;
+ spi_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+ return NULL;
+}
+
+static const struct devs_id max3111_dev_id __initconst = {
+ .name = "spi_max3111",
+ .type = SFI_DEV_TYPE_SPI,
+ .get_platform_data = &max3111_platform_data,
+};
+
+sfi_device(max3111_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
new file mode 100644
index 000000000000..94ade10024ae
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
@@ -0,0 +1,79 @@
+/*
+ * platform_max7315.c: max7315 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/platform_data/pca953x.h>
+#include <asm/intel-mid.h>
+
+#define MAX7315_NUM 2
+
+static void __init *max7315_platform_data(void *info)
+{
+ static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
+ static int nr;
+ struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
+ struct i2c_board_info *i2c_info = info;
+ int gpio_base, intr;
+ char base_pin_name[SFI_NAME_LEN + 1];
+ char intr_pin_name[SFI_NAME_LEN + 1];
+
+ if (nr == MAX7315_NUM) {
+ pr_err("too many max7315s, we only support %d\n",
+ MAX7315_NUM);
+ return NULL;
+ }
+ /* we have several max7315 on the board, we only need load several
+ * instances of the same pca953x driver to cover them
+ */
+ strcpy(i2c_info->type, "max7315");
+ if (nr++) {
+ sprintf(base_pin_name, "max7315_%d_base", nr);
+ sprintf(intr_pin_name, "max7315_%d_int", nr);
+ } else {
+ strcpy(base_pin_name, "max7315_base");
+ strcpy(intr_pin_name, "max7315_int");
+ }
+
+ gpio_base = get_gpio_by_name(base_pin_name);
+ intr = get_gpio_by_name(intr_pin_name);
+
+ if (gpio_base == -1)
+ return NULL;
+ max7315->gpio_base = gpio_base;
+ if (intr != -1) {
+ i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+ max7315->irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
+ } else {
+ i2c_info->irq = -1;
+ max7315->irq_base = -1;
+ }
+ return max7315;
+}
+
+static const struct devs_id max7315_dev_id __initconst = {
+ .name = "i2c_max7315",
+ .type = SFI_DEV_TYPE_I2C,
+ .delay = 1,
+ .get_platform_data = &max7315_platform_data,
+};
+
+static const struct devs_id max7315_2_dev_id __initconst = {
+ .name = "i2c_max7315_2",
+ .type = SFI_DEV_TYPE_I2C,
+ .delay = 1,
+ .get_platform_data = &max7315_platform_data,
+};
+
+sfi_device(max7315_dev_id);
+sfi_device(max7315_2_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
new file mode 100644
index 000000000000..dd28d63c84fb
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
@@ -0,0 +1,36 @@
+/*
+ * platform_mpu3050.c: mpu3050 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <asm/intel-mid.h>
+
+static void *mpu3050_platform_data(void *info)
+{
+ struct i2c_board_info *i2c_info = info;
+ int intr = get_gpio_by_name("mpu3050_int");
+
+ if (intr == -1)
+ return NULL;
+
+ i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+ return NULL;
+}
+
+static const struct devs_id mpu3050_dev_id __initconst = {
+ .name = "mpu3050",
+ .type = SFI_DEV_TYPE_I2C,
+ .delay = 1,
+ .get_platform_data = &mpu3050_platform_data,
+};
+
+sfi_device(mpu3050_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.c b/arch/x86/platform/intel-mid/device_libs/platform_msic.c
new file mode 100644
index 000000000000..9f4a775a69d6
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.c
@@ -0,0 +1,87 @@
+/*
+ * platform_msic.c: MSIC platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel_scu_ipc.h>
+#include <asm/intel-mid.h>
+#include "platform_msic.h"
+
+struct intel_msic_platform_data msic_pdata;
+
+static struct resource msic_resources[] = {
+ {
+ .start = INTEL_MSIC_IRQ_PHYS_BASE,
+ .end = INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct platform_device msic_device = {
+ .name = "intel_msic",
+ .id = -1,
+ .dev = {
+ .platform_data = &msic_pdata,
+ },
+ .num_resources = ARRAY_SIZE(msic_resources),
+ .resource = msic_resources,
+};
+
+static int msic_scu_status_change(struct notifier_block *nb,
+ unsigned long code, void *data)
+{
+ if (code == SCU_DOWN) {
+ platform_device_unregister(&msic_device);
+ return 0;
+ }
+
+ return platform_device_register(&msic_device);
+}
+
+static int __init msic_init(void)
+{
+ static struct notifier_block msic_scu_notifier = {
+ .notifier_call = msic_scu_status_change,
+ };
+
+ /*
+ * We need to be sure that the SCU IPC is ready before MSIC device
+ * can be registered.
+ */
+ if (intel_mid_has_msic())
+ intel_scu_notifier_add(&msic_scu_notifier);
+
+ return 0;
+}
+arch_initcall(msic_init);
+
+/*
+ * msic_generic_platform_data - sets generic platform data for the block
+ * @info: pointer to the SFI device table entry for this block
+ * @block: MSIC block
+ *
+ * Function sets IRQ number from the SFI table entry for given device to
+ * the MSIC platform data.
+ */
+void *msic_generic_platform_data(void *info, enum intel_msic_block block)
+{
+ struct sfi_device_table_entry *entry = info;
+
+ BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST);
+ msic_pdata.irq[block] = entry->irq;
+
+ return NULL;
+}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.h b/arch/x86/platform/intel-mid/device_libs/platform_msic.h
new file mode 100644
index 000000000000..917eb56d77da
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.h
@@ -0,0 +1,19 @@
+/*
+ * platform_msic.h: MSIC platform data header file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _PLATFORM_MSIC_H_
+#define _PLATFORM_MSIC_H_
+
+extern struct intel_msic_platform_data msic_pdata;
+
+extern void *msic_generic_platform_data(void *info,
+ enum intel_msic_block block) __attribute__((weak));
+#endif
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
new file mode 100644
index 000000000000..29629397d2b3
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
@@ -0,0 +1,47 @@
+/*
+ * platform_msic_audio.c: MSIC audio platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void *msic_audio_platform_data(void *info)
+{
+ struct platform_device *pdev;
+
+ pdev = platform_device_register_simple("sst-platform", -1, NULL, 0);
+
+ if (IS_ERR(pdev)) {
+ pr_err("failed to create audio platform device\n");
+ return NULL;
+ }
+
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO);
+}
+
+static const struct devs_id msic_audio_dev_id __initconst = {
+ .name = "msic_audio",
+ .type = SFI_DEV_TYPE_IPC,
+ .delay = 1,
+ .get_platform_data = &msic_audio_platform_data,
+ .device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_audio_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
new file mode 100644
index 000000000000..f446c33df1a8
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
@@ -0,0 +1,37 @@
+/*
+ * platform_msic_battery.c: MSIC battery platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_battery_platform_data(void *info)
+{
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY);
+}
+
+static const struct devs_id msic_battery_dev_id __initconst = {
+ .name = "msic_battery",
+ .type = SFI_DEV_TYPE_IPC,
+ .delay = 1,
+ .get_platform_data = &msic_battery_platform_data,
+ .device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_battery_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
new file mode 100644
index 000000000000..2a4f7b1dd917
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
@@ -0,0 +1,48 @@
+/*
+ * platform_msic_gpio.c: MSIC GPIO platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/sfi.h>
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_gpio_platform_data(void *info)
+{
+ static struct intel_msic_gpio_pdata msic_gpio_pdata;
+
+ int gpio = get_gpio_by_name("msic_gpio_base");
+
+ if (gpio < 0)
+ return NULL;
+
+ msic_gpio_pdata.gpio_base = gpio;
+ msic_pdata.gpio = &msic_gpio_pdata;
+
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO);
+}
+
+static const struct devs_id msic_gpio_dev_id __initconst = {
+ .name = "msic_gpio",
+ .type = SFI_DEV_TYPE_IPC,
+ .delay = 1,
+ .get_platform_data = &msic_gpio_platform_data,
+ .device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_gpio_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
new file mode 100644
index 000000000000..6497111ddb54
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
@@ -0,0 +1,49 @@
+/*
+ * platform_msic_ocd.c: MSIC OCD platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/sfi.h>
+#include <linux/init.h>
+#include <linux/gpio.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_ocd_platform_data(void *info)
+{
+ static struct intel_msic_ocd_pdata msic_ocd_pdata;
+ int gpio;
+
+ gpio = get_gpio_by_name("ocd_gpio");
+
+ if (gpio < 0)
+ return NULL;
+
+ msic_ocd_pdata.gpio = gpio;
+ msic_pdata.ocd = &msic_ocd_pdata;
+
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
+}
+
+static const struct devs_id msic_ocd_dev_id __initconst = {
+ .name = "msic_ocd",
+ .type = SFI_DEV_TYPE_IPC,
+ .delay = 1,
+ .get_platform_data = &msic_ocd_platform_data,
+ .device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_ocd_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
new file mode 100644
index 000000000000..83a3459bc337
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
@@ -0,0 +1,36 @@
+/*
+ * platform_msic_power_btn.c: MSIC power btn platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/sfi.h>
+#include <linux/init.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_power_btn_platform_data(void *info)
+{
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN);
+}
+
+static const struct devs_id msic_power_btn_dev_id __initconst = {
+ .name = "msic_power_btn",
+ .type = SFI_DEV_TYPE_IPC,
+ .delay = 1,
+ .get_platform_data = &msic_power_btn_platform_data,
+ .device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_power_btn_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
new file mode 100644
index 000000000000..a351878b96bc
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
@@ -0,0 +1,37 @@
+/*
+ * platform_msic_thermal.c: msic_thermal platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/input.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/gpio.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/intel_msic.h>
+#include <asm/intel-mid.h>
+
+#include "platform_msic.h"
+#include "platform_ipc.h"
+
+static void __init *msic_thermal_platform_data(void *info)
+{
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
+}
+
+static const struct devs_id msic_thermal_dev_id __initconst = {
+ .name = "msic_thermal",
+ .type = SFI_DEV_TYPE_IPC,
+ .delay = 1,
+ .get_platform_data = &msic_thermal_platform_data,
+ .device_handler = &ipc_device_handler,
+};
+
+sfi_device(msic_thermal_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
new file mode 100644
index 000000000000..d87182a09263
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c
@@ -0,0 +1,54 @@
+/*
+ * platform_pmic_gpio.c: PMIC GPIO platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/intel_pmic_gpio.h>
+#include <asm/intel-mid.h>
+
+#include "platform_ipc.h"
+
+static void __init *pmic_gpio_platform_data(void *info)
+{
+ static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
+ int gpio_base = get_gpio_by_name("pmic_gpio_base");
+
+ if (gpio_base == -1)
+ gpio_base = 64;
+ pmic_gpio_pdata.gpio_base = gpio_base;
+ pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
+ pmic_gpio_pdata.gpiointr = 0xffffeff8;
+
+ return &pmic_gpio_pdata;
+}
+
+static const struct devs_id pmic_gpio_spi_dev_id __initconst = {
+ .name = "pmic_gpio",
+ .type = SFI_DEV_TYPE_SPI,
+ .delay = 1,
+ .get_platform_data = &pmic_gpio_platform_data,
+};
+
+static const struct devs_id pmic_gpio_ipc_dev_id __initconst = {
+ .name = "pmic_gpio",
+ .type = SFI_DEV_TYPE_IPC,
+ .delay = 1,
+ .get_platform_data = &pmic_gpio_platform_data,
+ .device_handler = &ipc_device_handler
+};
+
+sfi_device(pmic_gpio_spi_dev_id);
+sfi_device(pmic_gpio_ipc_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
new file mode 100644
index 000000000000..740fc757050c
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
@@ -0,0 +1,36 @@
+/*
+ * platform_tc35876x.c: tc35876x platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/i2c/tc35876x.h>
+#include <asm/intel-mid.h>
+
+/*tc35876x DSI_LVDS bridge chip and panel platform data*/
+static void *tc35876x_platform_data(void *data)
+{
+ static struct tc35876x_platform_data pdata;
+
+ /* gpio pins set to -1 will not be used by the driver */
+ pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
+ pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
+ pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
+
+ return &pdata;
+}
+
+static const struct devs_id tc35876x_dev_id __initconst = {
+ .name = "i2c_disp_brig",
+ .type = SFI_DEV_TYPE_I2C,
+ .get_platform_data = &tc35876x_platform_data,
+};
+
+sfi_device(tc35876x_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
new file mode 100644
index 000000000000..22881c9a6737
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
@@ -0,0 +1,57 @@
+/*
+ * platform_tca6416.c: tca6416 platform data initilization file
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/platform_data/pca953x.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <asm/intel-mid.h>
+
+#define TCA6416_NAME "tca6416"
+#define TCA6416_BASE "tca6416_base"
+#define TCA6416_INTR "tca6416_int"
+
+static void *tca6416_platform_data(void *info)
+{
+ static struct pca953x_platform_data tca6416;
+ struct i2c_board_info *i2c_info = info;
+ int gpio_base, intr;
+ char base_pin_name[SFI_NAME_LEN + 1];
+ char intr_pin_name[SFI_NAME_LEN + 1];
+
+ strcpy(i2c_info->type, TCA6416_NAME);
+ strcpy(base_pin_name, TCA6416_BASE);
+ strcpy(intr_pin_name, TCA6416_INTR);
+
+ gpio_base = get_gpio_by_name(base_pin_name);
+ intr = get_gpio_by_name(intr_pin_name);
+
+ if (gpio_base == -1)
+ return NULL;
+ tca6416.gpio_base = gpio_base;
+ if (intr != -1) {
+ i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+ tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
+ } else {
+ i2c_info->irq = -1;
+ tca6416.irq_base = -1;
+ }
+ return &tca6416;
+}
+
+static const struct devs_id tca6416_dev_id __initconst = {
+ .name = "tca6416",
+ .type = SFI_DEV_TYPE_I2C,
+ .delay = 1,
+ .get_platform_data = &tca6416_platform_data,
+};
+
+sfi_device(tca6416_dev_id);
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c
index 028454f0c3a5..4f702f554f6e 100644
--- a/arch/x86/platform/mrst/early_printk_mrst.c
+++ b/arch/x86/platform/intel-mid/early_printk_intel_mid.c
@@ -1,5 +1,5 @@
/*
- * early_printk_mrst.c - early consoles for Intel MID platforms
+ * early_printk_intel_mid.c - early consoles for Intel MID platforms
*
* Copyright (c) 2008-2010, Intel Corporation
*
@@ -27,7 +27,7 @@
#include <asm/fixmap.h>
#include <asm/pgtable.h>
-#include <asm/mrst.h>
+#include <asm/intel-mid.h>
#define MRST_SPI_TIMEOUT 0x200000
#define MRST_REGBASE_SPI0 0xff128000
@@ -152,7 +152,7 @@ void mrst_early_console_init(void)
spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9;
freq = 100000000 / (spi0_cdiv + 1);
- if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL)
+ if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL)
mrst_spi_paddr = MRST_REGBASE_SPI1;
pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
@@ -213,13 +213,14 @@ static void early_mrst_spi_putc(char c)
}
if (!timeout)
- pr_warning("MRST earlycon: timed out\n");
+ pr_warn("MRST earlycon: timed out\n");
else
max3110_write_data(c);
}
/* Early SPI only uses polling mode */
-static void early_mrst_spi_write(struct console *con, const char *str, unsigned n)
+static void early_mrst_spi_write(struct console *con, const char *str,
+ unsigned n)
{
int i;
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
new file mode 100644
index 000000000000..523a1c8f7f07
--- /dev/null
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -0,0 +1,213 @@
+/*
+ * intel-mid.c: Intel MID platform setup code
+ *
+ * (C) Copyright 2008, 2012 Intel Corporation
+ * Author: Jacob Pan (jacob.jun.pan@intel.com)
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#define pr_fmt(fmt) "intel_mid: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/sfi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+
+#include <asm/setup.h>
+#include <asm/mpspec_def.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/intel-mid.h>
+#include <asm/intel_mid_vrtc.h>
+#include <asm/io.h>
+#include <asm/i8259.h>
+#include <asm/intel_scu_ipc.h>
+#include <asm/apb_timer.h>
+#include <asm/reboot.h>
+
+/*
+ * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
+ * cmdline option x86_intel_mid_timer can be used to override the configuration
+ * to prefer one or the other.
+ * at runtime, there are basically three timer configurations:
+ * 1. per cpu apbt clock only
+ * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
+ * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
+ *
+ * by default (without cmdline option), platform code first detects cpu type
+ * to see if we are on lincroft or penwell, then set up both lapic or apbt
+ * clocks accordingly.
+ * i.e. by default, medfield uses configuration #2, moorestown uses #1.
+ * config #3 is supported but not recommended on medfield.
+ *
+ * rating and feature summary:
+ * lapic (with C3STOP) --------- 100
+ * apbt (always-on) ------------ 110
+ * lapic (always-on,ARAT) ------ 150
+ */
+
+enum intel_mid_timer_options intel_mid_timer_options;
+
+enum intel_mid_cpu_type __intel_mid_cpu_chip;
+EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip);
+
+static void intel_mid_power_off(void)
+{
+}
+
+static void intel_mid_reboot(void)
+{
+ intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+}
+
+static unsigned long __init intel_mid_calibrate_tsc(void)
+{
+ unsigned long fast_calibrate;
+ u32 lo, hi, ratio, fsb;
+
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
+ ratio = (hi >> 8) & 0x1f;
+ pr_debug("ratio is %d\n", ratio);
+ if (!ratio) {
+ pr_err("read a zero ratio, should be incorrect!\n");
+ pr_err("force tsc ratio to 16 ...\n");
+ ratio = 16;
+ }
+ rdmsr(MSR_FSB_FREQ, lo, hi);
+ if ((lo & 0x7) == 0x7)
+ fsb = PENWELL_FSB_FREQ_83SKU;
+ else
+ fsb = PENWELL_FSB_FREQ_100SKU;
+ fast_calibrate = ratio * fsb;
+ pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
+ lapic_timer_frequency = fsb * 1000 / HZ;
+ /* mark tsc clocksource as reliable */
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
+
+ if (fast_calibrate)
+ return fast_calibrate;
+
+ return 0;
+}
+
+static void __init intel_mid_time_init(void)
+{
+ sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
+ switch (intel_mid_timer_options) {
+ case INTEL_MID_TIMER_APBT_ONLY:
+ break;
+ case INTEL_MID_TIMER_LAPIC_APBT:
+ x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
+ x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
+ break;
+ default:
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
+ break;
+ x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
+ x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
+ return;
+ }
+ /* we need at least one APB timer */
+ pre_init_apic_IRQ0();
+ apbt_time_init();
+}
+
+static void __cpuinit intel_mid_arch_setup(void)
+{
+ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
+ __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL;
+ else {
+ pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
+ __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL;
+ }
+}
+
+/* MID systems don't have i8042 controller */
+static int intel_mid_i8042_detect(void)
+{
+ return 0;
+}
+
+/*
+ * Moorestown does not have external NMI source nor port 0x61 to report
+ * NMI status. The possible NMI sources are from pmu as a result of NMI
+ * watchdog or lock debug. Reading io port 0x61 results in 0xff which
+ * misled NMI handler.
+ */
+static unsigned char intel_mid_get_nmi_reason(void)
+{
+ return 0;
+}
+
+/*
+ * Moorestown specific x86_init function overrides and early setup
+ * calls.
+ */
+void __init x86_intel_mid_early_setup(void)
+{
+ x86_init.resources.probe_roms = x86_init_noop;
+ x86_init.resources.reserve_resources = x86_init_noop;
+
+ x86_init.timers.timer_init = intel_mid_time_init;
+ x86_init.timers.setup_percpu_clockev = x86_init_noop;
+
+ x86_init.irqs.pre_vector_init = x86_init_noop;
+
+ x86_init.oem.arch_setup = intel_mid_arch_setup;
+
+ x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
+
+ x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
+ x86_platform.i8042_detect = intel_mid_i8042_detect;
+ x86_init.timers.wallclock_init = intel_mid_rtc_init;
+ x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
+
+ x86_init.pci.init = intel_mid_pci_init;
+ x86_init.pci.fixup_irqs = x86_init_noop;
+
+ legacy_pic = &null_legacy_pic;
+
+ pm_power_off = intel_mid_power_off;
+ machine_ops.emergency_restart = intel_mid_reboot;
+
+ /* Avoid searching for BIOS MP tables */
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+ set_bit(MP_BUS_ISA, mp_bus_not_pci);
+}
+
+/*
+ * if user does not want to use per CPU apb timer, just give it a lower rating
+ * than local apic timer and skip the late per cpu timer init.
+ */
+static inline int __init setup_x86_intel_mid_timer(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (strcmp("apbt_only", arg) == 0)
+ intel_mid_timer_options = INTEL_MID_TIMER_APBT_ONLY;
+ else if (strcmp("lapic_and_apbt", arg) == 0)
+ intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
+ else {
+ pr_warn("X86 INTEL_MID timer option %s not recognised"
+ " use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
+ arg);
+ return -EINVAL;
+ }
+ return 0;
+}
+__setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
+
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
index 5e355b134ba4..4762cff7facd 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
@@ -1,5 +1,5 @@
/*
- * vrtc.c: Driver for virtual RTC device on Intel MID platform
+ * intel_mid_vrtc.c: Driver for virtual RTC device on Intel MID platform
*
* (C) Copyright 2009 Intel Corporation
*
@@ -23,8 +23,8 @@
#include <linux/sfi.h>
#include <linux/platform_device.h>
-#include <asm/mrst.h>
-#include <asm/mrst-vrtc.h>
+#include <asm/intel-mid.h>
+#include <asm/intel_mid_vrtc.h>
#include <asm/time.h>
#include <asm/fixmap.h>
@@ -79,7 +79,7 @@ void vrtc_get_time(struct timespec *now)
/* vRTC YEAR reg contains the offset to 1972 */
year += 1972;
- printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
+ pr_info("vRTC: sec: %d min: %d hour: %d day: %d "
"mon: %d year: %d\n", sec, min, hour, mday, mon, year);
now->tv_sec = mktime(year, mon, mday, hour, min, sec);
@@ -109,15 +109,14 @@ int vrtc_set_mmss(const struct timespec *now)
vrtc_cmos_write(tm.tm_sec, RTC_SECONDS);
spin_unlock_irqrestore(&rtc_lock, flags);
} else {
- printk(KERN_ERR
- "%s: Invalid vRTC value: write of %lx to vRTC failed\n",
+ pr_err("%s: Invalid vRTC value: write of %lx to vRTC failed\n",
__FUNCTION__, now->tv_sec);
retval = -EINVAL;
}
return retval;
}
-void __init mrst_rtc_init(void)
+void __init intel_mid_rtc_init(void)
{
unsigned long vrtc_paddr;
@@ -155,10 +154,10 @@ static struct platform_device vrtc_device = {
};
/* Register the RTC device if appropriate */
-static int __init mrst_device_create(void)
+static int __init intel_mid_device_create(void)
{
/* No Moorestown, no device */
- if (!mrst_identify_cpu())
+ if (!intel_mid_identify_cpu())
return -ENODEV;
/* No timer, no device */
if (!sfi_mrtc_num)
@@ -175,4 +174,4 @@ static int __init mrst_device_create(void)
return platform_device_register(&vrtc_device);
}
-module_init(mrst_device_create);
+module_init(intel_mid_device_create);
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
new file mode 100644
index 000000000000..c84c1ca396bf
--- /dev/null
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -0,0 +1,488 @@
+/*
+ * intel_mid_sfi.c: Intel MID SFI initialization code
+ *
+ * (C) Copyright 2013 Intel Corporation
+ * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
+#include <linux/sfi.h>
+#include <linux/intel_pmic_gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/i2c.h>
+#include <linux/skbuff.h>
+#include <linux/gpio.h>
+#include <linux/gpio_keys.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/mmc/core.h>
+#include <linux/mmc/card.h>
+#include <linux/blkdev.h>
+
+#include <asm/setup.h>
+#include <asm/mpspec_def.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/intel-mid.h>
+#include <asm/intel_mid_vrtc.h>
+#include <asm/io.h>
+#include <asm/i8259.h>
+#include <asm/intel_scu_ipc.h>
+#include <asm/apb_timer.h>
+#include <asm/reboot.h>
+
+#define SFI_SIG_OEM0 "OEM0"
+#define MAX_IPCDEVS 24
+#define MAX_SCU_SPI 24
+#define MAX_SCU_I2C 24
+
+static struct platform_device *ipc_devs[MAX_IPCDEVS];
+static struct spi_board_info *spi_devs[MAX_SCU_SPI];
+static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
+static struct sfi_gpio_table_entry *gpio_table;
+static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
+static int ipc_next_dev;
+static int spi_next_dev;
+static int i2c_next_dev;
+static int i2c_bus[MAX_SCU_I2C];
+static int gpio_num_entry;
+static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
+int sfi_mrtc_num;
+int sfi_mtimer_num;
+
+struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
+EXPORT_SYMBOL_GPL(sfi_mrtc_array);
+
+struct blocking_notifier_head intel_scu_notifier =
+ BLOCKING_NOTIFIER_INIT(intel_scu_notifier);
+EXPORT_SYMBOL_GPL(intel_scu_notifier);
+
+#define intel_mid_sfi_get_pdata(dev, priv) \
+ ((dev)->get_platform_data ? (dev)->get_platform_data(priv) : NULL)
+
+/* parse all the mtimer info to a static mtimer array */
+int __init sfi_parse_mtmr(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_timer_table_entry *pentry;
+ struct mpc_intsrc mp_irq;
+ int totallen;
+
+ sb = (struct sfi_table_simple *)table;
+ if (!sfi_mtimer_num) {
+ sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
+ struct sfi_timer_table_entry);
+ pentry = (struct sfi_timer_table_entry *) sb->pentry;
+ totallen = sfi_mtimer_num * sizeof(*pentry);
+ memcpy(sfi_mtimer_array, pentry, totallen);
+ }
+
+ pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
+ pentry = sfi_mtimer_array;
+ for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
+ pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz, irq = %d\n",
+ totallen, (u32)pentry->phys_addr,
+ pentry->freq_hz, pentry->irq);
+ if (!pentry->irq)
+ continue;
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
+ mp_irq.irqflag = 5;
+ mp_irq.srcbus = MP_BUS_ISA;
+ mp_irq.srcbusirq = pentry->irq; /* IRQ */
+ mp_irq.dstapic = MP_APIC_ALL;
+ mp_irq.dstirq = pentry->irq;
+ mp_save_irq(&mp_irq);
+ }
+
+ return 0;
+}
+
+struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
+{
+ int i;
+ if (hint < sfi_mtimer_num) {
+ if (!sfi_mtimer_usage[hint]) {
+ pr_debug("hint taken for timer %d irq %d\n",
+ hint, sfi_mtimer_array[hint].irq);
+ sfi_mtimer_usage[hint] = 1;
+ return &sfi_mtimer_array[hint];
+ }
+ }
+ /* take the first timer available */
+ for (i = 0; i < sfi_mtimer_num;) {
+ if (!sfi_mtimer_usage[i]) {
+ sfi_mtimer_usage[i] = 1;
+ return &sfi_mtimer_array[i];
+ }
+ i++;
+ }
+ return NULL;
+}
+
+void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
+{
+ int i;
+ for (i = 0; i < sfi_mtimer_num;) {
+ if (mtmr->irq == sfi_mtimer_array[i].irq) {
+ sfi_mtimer_usage[i] = 0;
+ return;
+ }
+ i++;
+ }
+}
+
+/* parse all the mrtc info to a global mrtc array */
+int __init sfi_parse_mrtc(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_rtc_table_entry *pentry;
+ struct mpc_intsrc mp_irq;
+
+ int totallen;
+
+ sb = (struct sfi_table_simple *)table;
+ if (!sfi_mrtc_num) {
+ sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
+ struct sfi_rtc_table_entry);
+ pentry = (struct sfi_rtc_table_entry *)sb->pentry;
+ totallen = sfi_mrtc_num * sizeof(*pentry);
+ memcpy(sfi_mrtc_array, pentry, totallen);
+ }
+
+ pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
+ pentry = sfi_mrtc_array;
+ for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
+ pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
+ totallen, (u32)pentry->phys_addr, pentry->irq);
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = 0xf; /* level trigger and active low */
+ mp_irq.srcbus = MP_BUS_ISA;
+ mp_irq.srcbusirq = pentry->irq; /* IRQ */
+ mp_irq.dstapic = MP_APIC_ALL;
+ mp_irq.dstirq = pentry->irq;
+ mp_save_irq(&mp_irq);
+ }
+ return 0;
+}
+
+
+/*
+ * Parsing GPIO table first, since the DEVS table will need this table
+ * to map the pin name to the actual pin.
+ */
+static int __init sfi_parse_gpio(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_gpio_table_entry *pentry;
+ int num, i;
+
+ if (gpio_table)
+ return 0;
+ sb = (struct sfi_table_simple *)table;
+ num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
+ pentry = (struct sfi_gpio_table_entry *)sb->pentry;
+
+ gpio_table = kmalloc(num * sizeof(*pentry), GFP_KERNEL);
+ if (!gpio_table)
+ return -1;
+ memcpy(gpio_table, pentry, num * sizeof(*pentry));
+ gpio_num_entry = num;
+
+ pr_debug("GPIO pin info:\n");
+ for (i = 0; i < num; i++, pentry++)
+ pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
+ " pin = %d\n", i,
+ pentry->controller_name,
+ pentry->pin_name,
+ pentry->pin_no);
+ return 0;
+}
+
+int get_gpio_by_name(const char *name)
+{
+ struct sfi_gpio_table_entry *pentry = gpio_table;
+ int i;
+
+ if (!pentry)
+ return -1;
+ for (i = 0; i < gpio_num_entry; i++, pentry++) {
+ if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
+ return pentry->pin_no;
+ }
+ return -1;
+}
+
+void __init intel_scu_device_register(struct platform_device *pdev)
+{
+ if (ipc_next_dev == MAX_IPCDEVS)
+ pr_err("too many SCU IPC devices");
+ else
+ ipc_devs[ipc_next_dev++] = pdev;
+}
+
+static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
+{
+ struct spi_board_info *new_dev;
+
+ if (spi_next_dev == MAX_SCU_SPI) {
+ pr_err("too many SCU SPI devices");
+ return;
+ }
+
+ new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ if (!new_dev) {
+ pr_err("failed to alloc mem for delayed spi dev %s\n",
+ sdev->modalias);
+ return;
+ }
+ memcpy(new_dev, sdev, sizeof(*sdev));
+
+ spi_devs[spi_next_dev++] = new_dev;
+}
+
+static void __init intel_scu_i2c_device_register(int bus,
+ struct i2c_board_info *idev)
+{
+ struct i2c_board_info *new_dev;
+
+ if (i2c_next_dev == MAX_SCU_I2C) {
+ pr_err("too many SCU I2C devices");
+ return;
+ }
+
+ new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
+ if (!new_dev) {
+ pr_err("failed to alloc mem for delayed i2c dev %s\n",
+ idev->type);
+ return;
+ }
+ memcpy(new_dev, idev, sizeof(*idev));
+
+ i2c_bus[i2c_next_dev] = bus;
+ i2c_devs[i2c_next_dev++] = new_dev;
+}
+
+/* Called by IPC driver */
+void intel_scu_devices_create(void)
+{
+ int i;
+
+ for (i = 0; i < ipc_next_dev; i++)
+ platform_device_add(ipc_devs[i]);
+
+ for (i = 0; i < spi_next_dev; i++)
+ spi_register_board_info(spi_devs[i], 1);
+
+ for (i = 0; i < i2c_next_dev; i++) {
+ struct i2c_adapter *adapter;
+ struct i2c_client *client;
+
+ adapter = i2c_get_adapter(i2c_bus[i]);
+ if (adapter) {
+ client = i2c_new_device(adapter, i2c_devs[i]);
+ if (!client)
+ pr_err("can't create i2c device %s\n",
+ i2c_devs[i]->type);
+ } else
+ i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
+ }
+ intel_scu_notifier_post(SCU_AVAILABLE, NULL);
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_create);
+
+/* Called by IPC driver */
+void intel_scu_devices_destroy(void)
+{
+ int i;
+
+ intel_scu_notifier_post(SCU_DOWN, NULL);
+
+ for (i = 0; i < ipc_next_dev; i++)
+ platform_device_del(ipc_devs[i]);
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
+
+static void __init install_irq_resource(struct platform_device *pdev, int irq)
+{
+ /* Single threaded */
+ static struct resource res __initdata = {
+ .name = "IRQ",
+ .flags = IORESOURCE_IRQ,
+ };
+ res.start = irq;
+ platform_device_add_resources(pdev, &res, 1);
+}
+
+static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry,
+ struct devs_id *dev)
+{
+ struct platform_device *pdev;
+ void *pdata = NULL;
+
+ pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n",
+ pentry->name, pentry->irq);
+ pdata = intel_mid_sfi_get_pdata(dev, pentry);
+
+ pdev = platform_device_alloc(pentry->name, 0);
+ if (pdev == NULL) {
+ pr_err("out of memory for SFI platform device '%s'.\n",
+ pentry->name);
+ return;
+ }
+ install_irq_resource(pdev, pentry->irq);
+
+ pdev->dev.platform_data = pdata;
+ platform_device_add(pdev);
+}
+
+static void __init sfi_handle_spi_dev(struct sfi_device_table_entry *pentry,
+ struct devs_id *dev)
+{
+ struct spi_board_info spi_info;
+ void *pdata = NULL;
+
+ memset(&spi_info, 0, sizeof(spi_info));
+ strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
+ spi_info.irq = ((pentry->irq == (u8)0xff) ? 0 : pentry->irq);
+ spi_info.bus_num = pentry->host_num;
+ spi_info.chip_select = pentry->addr;
+ spi_info.max_speed_hz = pentry->max_freq;
+ pr_debug("SPI bus=%d, name=%16.16s, irq=0x%2x, max_freq=%d, cs=%d\n",
+ spi_info.bus_num,
+ spi_info.modalias,
+ spi_info.irq,
+ spi_info.max_speed_hz,
+ spi_info.chip_select);
+
+ pdata = intel_mid_sfi_get_pdata(dev, &spi_info);
+
+ spi_info.platform_data = pdata;
+ if (dev->delay)
+ intel_scu_spi_device_register(&spi_info);
+ else
+ spi_register_board_info(&spi_info, 1);
+}
+
+static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry,
+ struct devs_id *dev)
+{
+ struct i2c_board_info i2c_info;
+ void *pdata = NULL;
+
+ memset(&i2c_info, 0, sizeof(i2c_info));
+ strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
+ i2c_info.irq = ((pentry->irq == (u8)0xff) ? 0 : pentry->irq);
+ i2c_info.addr = pentry->addr;
+ pr_debug("I2C bus = %d, name = %16.16s, irq = 0x%2x, addr = 0x%x\n",
+ pentry->host_num,
+ i2c_info.type,
+ i2c_info.irq,
+ i2c_info.addr);
+ pdata = intel_mid_sfi_get_pdata(dev, &i2c_info);
+ i2c_info.platform_data = pdata;
+
+ if (dev->delay)
+ intel_scu_i2c_device_register(pentry->host_num, &i2c_info);
+ else
+ i2c_register_board_info(pentry->host_num, &i2c_info, 1);
+}
+
+extern struct devs_id *const __x86_intel_mid_dev_start[],
+ *const __x86_intel_mid_dev_end[];
+
+static struct devs_id __init *get_device_id(u8 type, char *name)
+{
+ struct devs_id *const *dev_table;
+
+ for (dev_table = __x86_intel_mid_dev_start;
+ dev_table < __x86_intel_mid_dev_end; dev_table++) {
+ struct devs_id *dev = *dev_table;
+ if (dev->type == type &&
+ !strncmp(dev->name, name, SFI_NAME_LEN)) {
+ return dev;
+ }
+ }
+
+ return NULL;
+}
+
+static int __init sfi_parse_devs(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_device_table_entry *pentry;
+ struct devs_id *dev = NULL;
+ int num, i;
+ int ioapic;
+ struct io_apic_irq_attr irq_attr;
+
+ sb = (struct sfi_table_simple *)table;
+ num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
+ pentry = (struct sfi_device_table_entry *)sb->pentry;
+
+ for (i = 0; i < num; i++, pentry++) {
+ int irq = pentry->irq;
+
+ if (irq != (u8)0xff) { /* native RTE case */
+ /* these SPI2 devices are not exposed to system as PCI
+ * devices, but they have separate RTE entry in IOAPIC
+ * so we have to enable them one by one here
+ */
+ ioapic = mp_find_ioapic(irq);
+ irq_attr.ioapic = ioapic;
+ irq_attr.ioapic_pin = irq;
+ irq_attr.trigger = 1;
+ irq_attr.polarity = 1;
+ io_apic_set_pci_routing(NULL, irq, &irq_attr);
+ } else
+ irq = 0; /* No irq */
+
+ dev = get_device_id(pentry->type, pentry->name);
+
+ if (!dev)
+ continue;
+
+ if (dev->device_handler) {
+ dev->device_handler(pentry, dev);
+ } else {
+ switch (pentry->type) {
+ case SFI_DEV_TYPE_IPC:
+ sfi_handle_ipc_dev(pentry, dev);
+ break;
+ case SFI_DEV_TYPE_SPI:
+ sfi_handle_spi_dev(pentry, dev);
+ break;
+ case SFI_DEV_TYPE_I2C:
+ sfi_handle_i2c_dev(pentry, dev);
+ break;
+ case SFI_DEV_TYPE_UART:
+ case SFI_DEV_TYPE_HSI:
+ default:
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static int __init intel_mid_platform_init(void)
+{
+ sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
+ sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
+ return 0;
+}
+arch_initcall(intel_mid_platform_init);
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
deleted file mode 100644
index af1da7e623f9..000000000000
--- a/arch/x86/platform/mrst/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_X86_INTEL_MID) += mrst.o
-obj-$(CONFIG_X86_INTEL_MID) += vrtc.o
-obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
deleted file mode 100644
index 3ca5957b7a34..000000000000
--- a/arch/x86/platform/mrst/mrst.c
+++ /dev/null
@@ -1,1052 +0,0 @@
-/*
- * mrst.c: Intel Moorestown platform specific setup code
- *
- * (C) Copyright 2008 Intel Corporation
- * Author: Jacob Pan (jacob.jun.pan@intel.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#define pr_fmt(fmt) "mrst: " fmt
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/scatterlist.h>
-#include <linux/sfi.h>
-#include <linux/intel_pmic_gpio.h>
-#include <linux/spi/spi.h>
-#include <linux/i2c.h>
-#include <linux/platform_data/pca953x.h>
-#include <linux/gpio_keys.h>
-#include <linux/input.h>
-#include <linux/platform_device.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/mfd/intel_msic.h>
-#include <linux/gpio.h>
-#include <linux/i2c/tc35876x.h>
-
-#include <asm/setup.h>
-#include <asm/mpspec_def.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/io_apic.h>
-#include <asm/mrst.h>
-#include <asm/mrst-vrtc.h>
-#include <asm/io.h>
-#include <asm/i8259.h>
-#include <asm/intel_scu_ipc.h>
-#include <asm/apb_timer.h>
-#include <asm/reboot.h>
-
-/*
- * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
- * cmdline option x86_mrst_timer can be used to override the configuration
- * to prefer one or the other.
- * at runtime, there are basically three timer configurations:
- * 1. per cpu apbt clock only
- * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
- * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
- *
- * by default (without cmdline option), platform code first detects cpu type
- * to see if we are on lincroft or penwell, then set up both lapic or apbt
- * clocks accordingly.
- * i.e. by default, medfield uses configuration #2, moorestown uses #1.
- * config #3 is supported but not recommended on medfield.
- *
- * rating and feature summary:
- * lapic (with C3STOP) --------- 100
- * apbt (always-on) ------------ 110
- * lapic (always-on,ARAT) ------ 150
- */
-
-enum mrst_timer_options mrst_timer_options;
-
-static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
-static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
-enum mrst_cpu_type __mrst_cpu_chip;
-EXPORT_SYMBOL_GPL(__mrst_cpu_chip);
-
-int sfi_mtimer_num;
-
-struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
-EXPORT_SYMBOL_GPL(sfi_mrtc_array);
-int sfi_mrtc_num;
-
-static void mrst_power_off(void)
-{
-}
-
-static void mrst_reboot(void)
-{
- intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
-}
-
-/* parse all the mtimer info to a static mtimer array */
-static int __init sfi_parse_mtmr(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_timer_table_entry *pentry;
- struct mpc_intsrc mp_irq;
- int totallen;
-
- sb = (struct sfi_table_simple *)table;
- if (!sfi_mtimer_num) {
- sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
- struct sfi_timer_table_entry);
- pentry = (struct sfi_timer_table_entry *) sb->pentry;
- totallen = sfi_mtimer_num * sizeof(*pentry);
- memcpy(sfi_mtimer_array, pentry, totallen);
- }
-
- pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
- pentry = sfi_mtimer_array;
- for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
- pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
- " irq = %d\n", totallen, (u32)pentry->phys_addr,
- pentry->freq_hz, pentry->irq);
- if (!pentry->irq)
- continue;
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
-/* triggering mode edge bit 2-3, active high polarity bit 0-1 */
- mp_irq.irqflag = 5;
- mp_irq.srcbus = MP_BUS_ISA;
- mp_irq.srcbusirq = pentry->irq; /* IRQ */
- mp_irq.dstapic = MP_APIC_ALL;
- mp_irq.dstirq = pentry->irq;
- mp_save_irq(&mp_irq);
- }
-
- return 0;
-}
-
-struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
-{
- int i;
- if (hint < sfi_mtimer_num) {
- if (!sfi_mtimer_usage[hint]) {
- pr_debug("hint taken for timer %d irq %d\n",\
- hint, sfi_mtimer_array[hint].irq);
- sfi_mtimer_usage[hint] = 1;
- return &sfi_mtimer_array[hint];
- }
- }
- /* take the first timer available */
- for (i = 0; i < sfi_mtimer_num;) {
- if (!sfi_mtimer_usage[i]) {
- sfi_mtimer_usage[i] = 1;
- return &sfi_mtimer_array[i];
- }
- i++;
- }
- return NULL;
-}
-
-void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
-{
- int i;
- for (i = 0; i < sfi_mtimer_num;) {
- if (mtmr->irq == sfi_mtimer_array[i].irq) {
- sfi_mtimer_usage[i] = 0;
- return;
- }
- i++;
- }
-}
-
-/* parse all the mrtc info to a global mrtc array */
-int __init sfi_parse_mrtc(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_rtc_table_entry *pentry;
- struct mpc_intsrc mp_irq;
-
- int totallen;
-
- sb = (struct sfi_table_simple *)table;
- if (!sfi_mrtc_num) {
- sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
- struct sfi_rtc_table_entry);
- pentry = (struct sfi_rtc_table_entry *)sb->pentry;
- totallen = sfi_mrtc_num * sizeof(*pentry);
- memcpy(sfi_mrtc_array, pentry, totallen);
- }
-
- pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
- pentry = sfi_mrtc_array;
- for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
- pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
- totallen, (u32)pentry->phys_addr, pentry->irq);
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = 0xf; /* level trigger and active low */
- mp_irq.srcbus = MP_BUS_ISA;
- mp_irq.srcbusirq = pentry->irq; /* IRQ */
- mp_irq.dstapic = MP_APIC_ALL;
- mp_irq.dstirq = pentry->irq;
- mp_save_irq(&mp_irq);
- }
- return 0;
-}
-
-static unsigned long __init mrst_calibrate_tsc(void)
-{
- unsigned long fast_calibrate;
- u32 lo, hi, ratio, fsb;
-
- rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
- pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
- ratio = (hi >> 8) & 0x1f;
- pr_debug("ratio is %d\n", ratio);
- if (!ratio) {
- pr_err("read a zero ratio, should be incorrect!\n");
- pr_err("force tsc ratio to 16 ...\n");
- ratio = 16;
- }
- rdmsr(MSR_FSB_FREQ, lo, hi);
- if ((lo & 0x7) == 0x7)
- fsb = PENWELL_FSB_FREQ_83SKU;
- else
- fsb = PENWELL_FSB_FREQ_100SKU;
- fast_calibrate = ratio * fsb;
- pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
- lapic_timer_frequency = fsb * 1000 / HZ;
- /* mark tsc clocksource as reliable */
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
-
- if (fast_calibrate)
- return fast_calibrate;
-
- return 0;
-}
-
-static void __init mrst_time_init(void)
-{
- sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
- switch (mrst_timer_options) {
- case MRST_TIMER_APBT_ONLY:
- break;
- case MRST_TIMER_LAPIC_APBT:
- x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
- x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
- break;
- default:
- if (!boot_cpu_has(X86_FEATURE_ARAT))
- break;
- x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
- x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
- return;
- }
- /* we need at least one APB timer */
- pre_init_apic_IRQ0();
- apbt_time_init();
-}
-
-static void mrst_arch_setup(void)
-{
- if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
- __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
- else {
- pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n",
- boot_cpu_data.x86, boot_cpu_data.x86_model);
- __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
- }
-}
-
-/* MID systems don't have i8042 controller */
-static int mrst_i8042_detect(void)
-{
- return 0;
-}
-
-/*
- * Moorestown does not have external NMI source nor port 0x61 to report
- * NMI status. The possible NMI sources are from pmu as a result of NMI
- * watchdog or lock debug. Reading io port 0x61 results in 0xff which
- * misled NMI handler.
- */
-static unsigned char mrst_get_nmi_reason(void)
-{
- return 0;
-}
-
-/*
- * Moorestown specific x86_init function overrides and early setup
- * calls.
- */
-void __init x86_mrst_early_setup(void)
-{
- x86_init.resources.probe_roms = x86_init_noop;
- x86_init.resources.reserve_resources = x86_init_noop;
-
- x86_init.timers.timer_init = mrst_time_init;
- x86_init.timers.setup_percpu_clockev = x86_init_noop;
-
- x86_init.irqs.pre_vector_init = x86_init_noop;
-
- x86_init.oem.arch_setup = mrst_arch_setup;
-
- x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
-
- x86_platform.calibrate_tsc = mrst_calibrate_tsc;
- x86_platform.i8042_detect = mrst_i8042_detect;
- x86_init.timers.wallclock_init = mrst_rtc_init;
- x86_platform.get_nmi_reason = mrst_get_nmi_reason;
-
- x86_init.pci.init = pci_mrst_init;
- x86_init.pci.fixup_irqs = x86_init_noop;
-
- legacy_pic = &null_legacy_pic;
-
- /* Moorestown specific power_off/restart method */
- pm_power_off = mrst_power_off;
- machine_ops.emergency_restart = mrst_reboot;
-
- /* Avoid searching for BIOS MP tables */
- x86_init.mpparse.find_smp_config = x86_init_noop;
- x86_init.mpparse.get_smp_config = x86_init_uint_noop;
- set_bit(MP_BUS_ISA, mp_bus_not_pci);
-}
-
-/*
- * if user does not want to use per CPU apb timer, just give it a lower rating
- * than local apic timer and skip the late per cpu timer init.
- */
-static inline int __init setup_x86_mrst_timer(char *arg)
-{
- if (!arg)
- return -EINVAL;
-
- if (strcmp("apbt_only", arg) == 0)
- mrst_timer_options = MRST_TIMER_APBT_ONLY;
- else if (strcmp("lapic_and_apbt", arg) == 0)
- mrst_timer_options = MRST_TIMER_LAPIC_APBT;
- else {
- pr_warning("X86 MRST timer option %s not recognised"
- " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
- arg);
- return -EINVAL;
- }
- return 0;
-}
-__setup("x86_mrst_timer=", setup_x86_mrst_timer);
-
-/*
- * Parsing GPIO table first, since the DEVS table will need this table
- * to map the pin name to the actual pin.
- */
-static struct sfi_gpio_table_entry *gpio_table;
-static int gpio_num_entry;
-
-static int __init sfi_parse_gpio(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_gpio_table_entry *pentry;
- int num, i;
-
- if (gpio_table)
- return 0;
- sb = (struct sfi_table_simple *)table;
- num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
- pentry = (struct sfi_gpio_table_entry *)sb->pentry;
-
- gpio_table = kmalloc(num * sizeof(*pentry), GFP_KERNEL);
- if (!gpio_table)
- return -1;
- memcpy(gpio_table, pentry, num * sizeof(*pentry));
- gpio_num_entry = num;
-
- pr_debug("GPIO pin info:\n");
- for (i = 0; i < num; i++, pentry++)
- pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
- " pin = %d\n", i,
- pentry->controller_name,
- pentry->pin_name,
- pentry->pin_no);
- return 0;
-}
-
-static int get_gpio_by_name(const char *name)
-{
- struct sfi_gpio_table_entry *pentry = gpio_table;
- int i;
-
- if (!pentry)
- return -1;
- for (i = 0; i < gpio_num_entry; i++, pentry++) {
- if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
- return pentry->pin_no;
- }
- return -1;
-}
-
-/*
- * Here defines the array of devices platform data that IAFW would export
- * through SFI "DEVS" table, we use name and type to match the device and
- * its platform data.
- */
-struct devs_id {
- char name[SFI_NAME_LEN + 1];
- u8 type;
- u8 delay;
- void *(*get_platform_data)(void *info);
-};
-
-/* the offset for the mapping of global gpio pin to irq */
-#define MRST_IRQ_OFFSET 0x100
-
-static void __init *pmic_gpio_platform_data(void *info)
-{
- static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
- int gpio_base = get_gpio_by_name("pmic_gpio_base");
-
- if (gpio_base == -1)
- gpio_base = 64;
- pmic_gpio_pdata.gpio_base = gpio_base;
- pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
- pmic_gpio_pdata.gpiointr = 0xffffeff8;
-
- return &pmic_gpio_pdata;
-}
-
-static void __init *max3111_platform_data(void *info)
-{
- struct spi_board_info *spi_info = info;
- int intr = get_gpio_by_name("max3111_int");
-
- spi_info->mode = SPI_MODE_0;
- if (intr == -1)
- return NULL;
- spi_info->irq = intr + MRST_IRQ_OFFSET;
- return NULL;
-}
-
-/* we have multiple max7315 on the board ... */
-#define MAX7315_NUM 2
-static void __init *max7315_platform_data(void *info)
-{
- static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
- static int nr;
- struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
- struct i2c_board_info *i2c_info = info;
- int gpio_base, intr;
- char base_pin_name[SFI_NAME_LEN + 1];
- char intr_pin_name[SFI_NAME_LEN + 1];
-
- if (nr == MAX7315_NUM) {
- pr_err("too many max7315s, we only support %d\n",
- MAX7315_NUM);
- return NULL;
- }
- /* we have several max7315 on the board, we only need load several
- * instances of the same pca953x driver to cover them
- */
- strcpy(i2c_info->type, "max7315");
- if (nr++) {
- sprintf(base_pin_name, "max7315_%d_base", nr);
- sprintf(intr_pin_name, "max7315_%d_int", nr);
- } else {
- strcpy(base_pin_name, "max7315_base");
- strcpy(intr_pin_name, "max7315_int");
- }
-
- gpio_base = get_gpio_by_name(base_pin_name);
- intr = get_gpio_by_name(intr_pin_name);
-
- if (gpio_base == -1)
- return NULL;
- max7315->gpio_base = gpio_base;
- if (intr != -1) {
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
- } else {
- i2c_info->irq = -1;
- max7315->irq_base = -1;
- }
- return max7315;
-}
-
-static void *tca6416_platform_data(void *info)
-{
- static struct pca953x_platform_data tca6416;
- struct i2c_board_info *i2c_info = info;
- int gpio_base, intr;
- char base_pin_name[SFI_NAME_LEN + 1];
- char intr_pin_name[SFI_NAME_LEN + 1];
-
- strcpy(i2c_info->type, "tca6416");
- strcpy(base_pin_name, "tca6416_base");
- strcpy(intr_pin_name, "tca6416_int");
-
- gpio_base = get_gpio_by_name(base_pin_name);
- intr = get_gpio_by_name(intr_pin_name);
-
- if (gpio_base == -1)
- return NULL;
- tca6416.gpio_base = gpio_base;
- if (intr != -1) {
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- tca6416.irq_base = gpio_base + MRST_IRQ_OFFSET;
- } else {
- i2c_info->irq = -1;
- tca6416.irq_base = -1;
- }
- return &tca6416;
-}
-
-static void *mpu3050_platform_data(void *info)
-{
- struct i2c_board_info *i2c_info = info;
- int intr = get_gpio_by_name("mpu3050_int");
-
- if (intr == -1)
- return NULL;
-
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- return NULL;
-}
-
-static void __init *emc1403_platform_data(void *info)
-{
- static short intr2nd_pdata;
- struct i2c_board_info *i2c_info = info;
- int intr = get_gpio_by_name("thermal_int");
- int intr2nd = get_gpio_by_name("thermal_alert");
-
- if (intr == -1 || intr2nd == -1)
- return NULL;
-
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
-
- return &intr2nd_pdata;
-}
-
-static void __init *lis331dl_platform_data(void *info)
-{
- static short intr2nd_pdata;
- struct i2c_board_info *i2c_info = info;
- int intr = get_gpio_by_name("accel_int");
- int intr2nd = get_gpio_by_name("accel_2");
-
- if (intr == -1 || intr2nd == -1)
- return NULL;
-
- i2c_info->irq = intr + MRST_IRQ_OFFSET;
- intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
-
- return &intr2nd_pdata;
-}
-
-static void __init *no_platform_data(void *info)
-{
- return NULL;
-}
-
-static struct resource msic_resources[] = {
- {
- .start = INTEL_MSIC_IRQ_PHYS_BASE,
- .end = INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1,
- .flags = IORESOURCE_MEM,
- },
-};
-
-static struct intel_msic_platform_data msic_pdata;
-
-static struct platform_device msic_device = {
- .name = "intel_msic",
- .id = -1,
- .dev = {
- .platform_data = &msic_pdata,
- },
- .num_resources = ARRAY_SIZE(msic_resources),
- .resource = msic_resources,
-};
-
-static inline bool mrst_has_msic(void)
-{
- return mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL;
-}
-
-static int msic_scu_status_change(struct notifier_block *nb,
- unsigned long code, void *data)
-{
- if (code == SCU_DOWN) {
- platform_device_unregister(&msic_device);
- return 0;
- }
-
- return platform_device_register(&msic_device);
-}
-
-static int __init msic_init(void)
-{
- static struct notifier_block msic_scu_notifier = {
- .notifier_call = msic_scu_status_change,
- };
-
- /*
- * We need to be sure that the SCU IPC is ready before MSIC device
- * can be registered.
- */
- if (mrst_has_msic())
- intel_scu_notifier_add(&msic_scu_notifier);
-
- return 0;
-}
-arch_initcall(msic_init);
-
-/*
- * msic_generic_platform_data - sets generic platform data for the block
- * @info: pointer to the SFI device table entry for this block
- * @block: MSIC block
- *
- * Function sets IRQ number from the SFI table entry for given device to
- * the MSIC platform data.
- */
-static void *msic_generic_platform_data(void *info, enum intel_msic_block block)
-{
- struct sfi_device_table_entry *entry = info;
-
- BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST);
- msic_pdata.irq[block] = entry->irq;
-
- return no_platform_data(info);
-}
-
-static void *msic_battery_platform_data(void *info)
-{
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY);
-}
-
-static void *msic_gpio_platform_data(void *info)
-{
- static struct intel_msic_gpio_pdata pdata;
- int gpio = get_gpio_by_name("msic_gpio_base");
-
- if (gpio < 0)
- return NULL;
-
- pdata.gpio_base = gpio;
- msic_pdata.gpio = &pdata;
-
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO);
-}
-
-static void *msic_audio_platform_data(void *info)
-{
- struct platform_device *pdev;
-
- pdev = platform_device_register_simple("sst-platform", -1, NULL, 0);
- if (IS_ERR(pdev)) {
- pr_err("failed to create audio platform device\n");
- return NULL;
- }
-
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO);
-}
-
-static void *msic_power_btn_platform_data(void *info)
-{
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN);
-}
-
-static void *msic_ocd_platform_data(void *info)
-{
- static struct intel_msic_ocd_pdata pdata;
- int gpio = get_gpio_by_name("ocd_gpio");
-
- if (gpio < 0)
- return NULL;
-
- pdata.gpio = gpio;
- msic_pdata.ocd = &pdata;
-
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
-}
-
-static void *msic_thermal_platform_data(void *info)
-{
- return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
-}
-
-/* tc35876x DSI-LVDS bridge chip and panel platform data */
-static void *tc35876x_platform_data(void *data)
-{
- static struct tc35876x_platform_data pdata;
-
- /* gpio pins set to -1 will not be used by the driver */
- pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
- pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
- pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
-
- return &pdata;
-}
-
-static const struct devs_id __initconst device_ids[] = {
- {"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data},
- {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
- {"pmic_gpio", SFI_DEV_TYPE_IPC, 1, &pmic_gpio_platform_data},
- {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
- {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
- {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
- {"tca6416", SFI_DEV_TYPE_I2C, 1, &tca6416_platform_data},
- {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
- {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
- {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
- {"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data},
- {"i2c_disp_brig", SFI_DEV_TYPE_I2C, 0, &tc35876x_platform_data},
-
- /* MSIC subdevices */
- {"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
- {"msic_gpio", SFI_DEV_TYPE_IPC, 1, &msic_gpio_platform_data},
- {"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data},
- {"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data},
- {"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data},
- {"msic_thermal", SFI_DEV_TYPE_IPC, 1, &msic_thermal_platform_data},
-
- {},
-};
-
-#define MAX_IPCDEVS 24
-static struct platform_device *ipc_devs[MAX_IPCDEVS];
-static int ipc_next_dev;
-
-#define MAX_SCU_SPI 24
-static struct spi_board_info *spi_devs[MAX_SCU_SPI];
-static int spi_next_dev;
-
-#define MAX_SCU_I2C 24
-static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
-static int i2c_bus[MAX_SCU_I2C];
-static int i2c_next_dev;
-
-static void __init intel_scu_device_register(struct platform_device *pdev)
-{
- if(ipc_next_dev == MAX_IPCDEVS)
- pr_err("too many SCU IPC devices");
- else
- ipc_devs[ipc_next_dev++] = pdev;
-}
-
-static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
-{
- struct spi_board_info *new_dev;
-
- if (spi_next_dev == MAX_SCU_SPI) {
- pr_err("too many SCU SPI devices");
- return;
- }
-
- new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
- if (!new_dev) {
- pr_err("failed to alloc mem for delayed spi dev %s\n",
- sdev->modalias);
- return;
- }
- memcpy(new_dev, sdev, sizeof(*sdev));
-
- spi_devs[spi_next_dev++] = new_dev;
-}
-
-static void __init intel_scu_i2c_device_register(int bus,
- struct i2c_board_info *idev)
-{
- struct i2c_board_info *new_dev;
-
- if (i2c_next_dev == MAX_SCU_I2C) {
- pr_err("too many SCU I2C devices");
- return;
- }
-
- new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
- if (!new_dev) {
- pr_err("failed to alloc mem for delayed i2c dev %s\n",
- idev->type);
- return;
- }
- memcpy(new_dev, idev, sizeof(*idev));
-
- i2c_bus[i2c_next_dev] = bus;
- i2c_devs[i2c_next_dev++] = new_dev;
-}
-
-BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
-EXPORT_SYMBOL_GPL(intel_scu_notifier);
-
-/* Called by IPC driver */
-void intel_scu_devices_create(void)
-{
- int i;
-
- for (i = 0; i < ipc_next_dev; i++)
- platform_device_add(ipc_devs[i]);
-
- for (i = 0; i < spi_next_dev; i++)
- spi_register_board_info(spi_devs[i], 1);
-
- for (i = 0; i < i2c_next_dev; i++) {
- struct i2c_adapter *adapter;
- struct i2c_client *client;
-
- adapter = i2c_get_adapter(i2c_bus[i]);
- if (adapter) {
- client = i2c_new_device(adapter, i2c_devs[i]);
- if (!client)
- pr_err("can't create i2c device %s\n",
- i2c_devs[i]->type);
- } else
- i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
- }
- intel_scu_notifier_post(SCU_AVAILABLE, NULL);
-}
-EXPORT_SYMBOL_GPL(intel_scu_devices_create);
-
-/* Called by IPC driver */
-void intel_scu_devices_destroy(void)
-{
- int i;
-
- intel_scu_notifier_post(SCU_DOWN, NULL);
-
- for (i = 0; i < ipc_next_dev; i++)
- platform_device_del(ipc_devs[i]);
-}
-EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
-
-static void __init install_irq_resource(struct platform_device *pdev, int irq)
-{
- /* Single threaded */
- static struct resource __initdata res = {
- .name = "IRQ",
- .flags = IORESOURCE_IRQ,
- };
- res.start = irq;
- platform_device_add_resources(pdev, &res, 1);
-}
-
-static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *entry)
-{
- const struct devs_id *dev = device_ids;
- struct platform_device *pdev;
- void *pdata = NULL;
-
- while (dev->name[0]) {
- if (dev->type == SFI_DEV_TYPE_IPC &&
- !strncmp(dev->name, entry->name, SFI_NAME_LEN)) {
- pdata = dev->get_platform_data(entry);
- break;
- }
- dev++;
- }
-
- /*
- * On Medfield the platform device creation is handled by the MSIC
- * MFD driver so we don't need to do it here.
- */
- if (mrst_has_msic())
- return;
-
- pdev = platform_device_alloc(entry->name, 0);
- if (pdev == NULL) {
- pr_err("out of memory for SFI platform device '%s'.\n",
- entry->name);
- return;
- }
- install_irq_resource(pdev, entry->irq);
-
- pdev->dev.platform_data = pdata;
- intel_scu_device_register(pdev);
-}
-
-static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
-{
- const struct devs_id *dev = device_ids;
- void *pdata = NULL;
-
- while (dev->name[0]) {
- if (dev->type == SFI_DEV_TYPE_SPI &&
- !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
- pdata = dev->get_platform_data(spi_info);
- break;
- }
- dev++;
- }
- spi_info->platform_data = pdata;
- if (dev->delay)
- intel_scu_spi_device_register(spi_info);
- else
- spi_register_board_info(spi_info, 1);
-}
-
-static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
-{
- const struct devs_id *dev = device_ids;
- void *pdata = NULL;
-
- while (dev->name[0]) {
- if (dev->type == SFI_DEV_TYPE_I2C &&
- !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
- pdata = dev->get_platform_data(i2c_info);
- break;
- }
- dev++;
- }
- i2c_info->platform_data = pdata;
-
- if (dev->delay)
- intel_scu_i2c_device_register(bus, i2c_info);
- else
- i2c_register_board_info(bus, i2c_info, 1);
- }
-
-
-static int __init sfi_parse_devs(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
- struct sfi_device_table_entry *pentry;
- struct spi_board_info spi_info;
- struct i2c_board_info i2c_info;
- int num, i, bus;
- int ioapic;
- struct io_apic_irq_attr irq_attr;
-
- sb = (struct sfi_table_simple *)table;
- num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
- pentry = (struct sfi_device_table_entry *)sb->pentry;
-
- for (i = 0; i < num; i++, pentry++) {
- int irq = pentry->irq;
-
- if (irq != (u8)0xff) { /* native RTE case */
- /* these SPI2 devices are not exposed to system as PCI
- * devices, but they have separate RTE entry in IOAPIC
- * so we have to enable them one by one here
- */
- ioapic = mp_find_ioapic(irq);
- irq_attr.ioapic = ioapic;
- irq_attr.ioapic_pin = irq;
- irq_attr.trigger = 1;
- irq_attr.polarity = 1;
- io_apic_set_pci_routing(NULL, irq, &irq_attr);
- } else
- irq = 0; /* No irq */
-
- switch (pentry->type) {
- case SFI_DEV_TYPE_IPC:
- pr_debug("info[%2d]: IPC bus, name = %16.16s, "
- "irq = 0x%2x\n", i, pentry->name, pentry->irq);
- sfi_handle_ipc_dev(pentry);
- break;
- case SFI_DEV_TYPE_SPI:
- memset(&spi_info, 0, sizeof(spi_info));
- strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
- spi_info.irq = irq;
- spi_info.bus_num = pentry->host_num;
- spi_info.chip_select = pentry->addr;
- spi_info.max_speed_hz = pentry->max_freq;
- pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
- "irq = 0x%2x, max_freq = %d, cs = %d\n", i,
- spi_info.bus_num,
- spi_info.modalias,
- spi_info.irq,
- spi_info.max_speed_hz,
- spi_info.chip_select);
- sfi_handle_spi_dev(&spi_info);
- break;
- case SFI_DEV_TYPE_I2C:
- memset(&i2c_info, 0, sizeof(i2c_info));
- bus = pentry->host_num;
- strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
- i2c_info.irq = irq;
- i2c_info.addr = pentry->addr;
- pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
- "irq = 0x%2x, addr = 0x%x\n", i, bus,
- i2c_info.type,
- i2c_info.irq,
- i2c_info.addr);
- sfi_handle_i2c_dev(bus, &i2c_info);
- break;
- case SFI_DEV_TYPE_UART:
- case SFI_DEV_TYPE_HSI:
- default:
- ;
- }
- }
- return 0;
-}
-
-static int __init mrst_platform_init(void)
-{
- sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
- sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
- return 0;
-}
-arch_initcall(mrst_platform_init);
-
-/*
- * we will search these buttons in SFI GPIO table (by name)
- * and register them dynamically. Please add all possible
- * buttons here, we will shrink them if no GPIO found.
- */
-static struct gpio_keys_button gpio_button[] = {
- {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000},
- {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20},
- {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20},
- {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20},
- {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20},
- {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20},
- {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20},
- {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20},
- {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20},
- {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20},
-};
-
-static struct gpio_keys_platform_data mrst_gpio_keys = {
- .buttons = gpio_button,
- .rep = 1,
- .nbuttons = -1, /* will fill it after search */
-};
-
-static struct platform_device pb_device = {
- .name = "gpio-keys",
- .id = -1,
- .dev = {
- .platform_data = &mrst_gpio_keys,
- },
-};
-
-/*
- * Shrink the non-existent buttons, register the gpio button
- * device if there is some
- */
-static int __init pb_keys_init(void)
-{
- struct gpio_keys_button *gb = gpio_button;
- int i, num, good = 0;
-
- num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
- for (i = 0; i < num; i++) {
- gb[i].gpio = get_gpio_by_name(gb[i].desc);
- pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, gb[i].gpio);
- if (gb[i].gpio == -1)
- continue;
-
- if (i != good)
- gb[good] = gb[i];
- good++;
- }
-
- if (good) {
- mrst_gpio_keys.nbuttons = good;
- return platform_device_register(&pb_device);
- }
- return 0;
-}
-late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index fef7d0ba7e3a..649a12befba9 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -40,16 +40,9 @@ static bool lid_wake_on_close;
*/
static int set_lid_wake_behavior(bool wake_on_close)
{
- struct acpi_object_list arg_list;
- union acpi_object arg;
acpi_status status;
- arg_list.count = 1;
- arg_list.pointer = &arg;
- arg.type = ACPI_TYPE_INTEGER;
- arg.integer.value = wake_on_close;
-
- status = acpi_evaluate_object(NULL, "\\_SB.PCI0.LID.LIDW", &arg_list, NULL);
+ status = acpi_execute_simple_method(NULL, "\\_SB.PCI0.LID.LIDW", wake_on_close);
if (ACPI_FAILURE(status)) {
pr_warning(PFX "failed to set lid behavior\n");
return 1;
diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile
index 6c40995fefb8..52079bebd014 100644
--- a/arch/x86/platform/uv/Makefile
+++ b/arch/x86/platform/uv/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
+obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o uv_nmi.o
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
new file mode 100644
index 000000000000..d3d02688f6f7
--- /dev/null
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -0,0 +1,711 @@
+/*
+ * SGI NMI/TRACE support routines
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Mike Travis
+ */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/kdb.h>
+#include <linux/kexec.h>
+#include <linux/kgdb.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/apic.h>
+#include <asm/current.h>
+#include <asm/kdebug.h>
+#include <asm/local64.h>
+#include <asm/nmi.h>
+#include <asm/traps.h>
+#include <asm/uv/uv.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_mmrs.h>
+
+void (*uv_trace_func)(const char *f, const int l, const char *fmt, ...);
+EXPORT_SYMBOL(uv_trace_func);
+
+void (*uv_trace_nmi_func)(unsigned int reason, struct pt_regs *regs);
+EXPORT_SYMBOL(uv_trace_nmi_func);
+
+
+/*
+ * UV handler for NMI
+ *
+ * Handle system-wide NMI events generated by the global 'power nmi' command.
+ *
+ * Basic operation is to field the NMI interrupt on each cpu and wait
+ * until all cpus have arrived into the nmi handler. If some cpus do not
+ * make it into the handler, try and force them in with the IPI(NMI) signal.
+ *
+ * We also have to lessen UV Hub MMR accesses as much as possible as this
+ * disrupts the UV Hub's primary mission of directing NumaLink traffic and
+ * can cause system problems to occur.
+ *
+ * To do this we register our primary NMI notifier on the NMI_UNKNOWN
+ * chain. This reduces the number of false NMI calls when the perf
+ * tools are running which generate an enormous number of NMIs per
+ * second (~4M/s for 1024 cpu threads). Our secondary NMI handler is
+ * very short as it only checks that if it has been "pinged" with the
+ * IPI(NMI) signal as mentioned above, and does not read the UV Hub's MMR.
+ *
+ */
+
+static struct uv_hub_nmi_s **uv_hub_nmi_list;
+
+DEFINE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_nmi);
+
+static unsigned long nmi_mmr;
+static unsigned long nmi_mmr_clear;
+static unsigned long nmi_mmr_pending;
+
+static atomic_t uv_in_nmi;
+static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1);
+static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1);
+static atomic_t uv_nmi_slave_continue;
+static atomic_t uv_nmi_kexec_failed;
+static cpumask_var_t uv_nmi_cpu_mask;
+
+/* Values for uv_nmi_slave_continue */
+#define SLAVE_CLEAR 0
+#define SLAVE_CONTINUE 1
+#define SLAVE_EXIT 2
+
+/*
+ * Default is all stack dumps go to the console and buffer.
+ * Lower level to send to log buffer only.
+ */
+static int uv_nmi_loglevel = 7;
+module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644);
+
+/*
+ * The following values show statistics on how perf events are affecting
+ * this system.
+ */
+static int param_get_local64(char *buffer, const struct kernel_param *kp)
+{
+ return sprintf(buffer, "%lu\n", local64_read((local64_t *)kp->arg));
+}
+
+static int param_set_local64(const char *val, const struct kernel_param *kp)
+{
+ /* clear on any write */
+ local64_set((local64_t *)kp->arg, 0);
+ return 0;
+}
+
+static struct kernel_param_ops param_ops_local64 = {
+ .get = param_get_local64,
+ .set = param_set_local64,
+};
+#define param_check_local64(name, p) __param_check(name, p, local64_t)
+
+static local64_t uv_nmi_count;
+module_param_named(nmi_count, uv_nmi_count, local64, 0644);
+
+static local64_t uv_nmi_misses;
+module_param_named(nmi_misses, uv_nmi_misses, local64, 0644);
+
+static local64_t uv_nmi_ping_count;
+module_param_named(ping_count, uv_nmi_ping_count, local64, 0644);
+
+static local64_t uv_nmi_ping_misses;
+module_param_named(ping_misses, uv_nmi_ping_misses, local64, 0644);
+
+/*
+ * Following values allow tuning for large systems under heavy loading
+ */
+static int uv_nmi_initial_delay = 100;
+module_param_named(initial_delay, uv_nmi_initial_delay, int, 0644);
+
+static int uv_nmi_slave_delay = 100;
+module_param_named(slave_delay, uv_nmi_slave_delay, int, 0644);
+
+static int uv_nmi_loop_delay = 100;
+module_param_named(loop_delay, uv_nmi_loop_delay, int, 0644);
+
+static int uv_nmi_trigger_delay = 10000;
+module_param_named(trigger_delay, uv_nmi_trigger_delay, int, 0644);
+
+static int uv_nmi_wait_count = 100;
+module_param_named(wait_count, uv_nmi_wait_count, int, 0644);
+
+static int uv_nmi_retry_count = 500;
+module_param_named(retry_count, uv_nmi_retry_count, int, 0644);
+
+/*
+ * Valid NMI Actions:
+ * "dump" - dump process stack for each cpu
+ * "ips" - dump IP info for each cpu
+ * "kdump" - do crash dump
+ * "kdb" - enter KDB/KGDB (default)
+ */
+static char uv_nmi_action[8] = "kdb";
+module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644);
+
+static inline bool uv_nmi_action_is(const char *action)
+{
+ return (strncmp(uv_nmi_action, action, strlen(action)) == 0);
+}
+
+/* Setup which NMI support is present in system */
+static void uv_nmi_setup_mmrs(void)
+{
+ if (uv_read_local_mmr(UVH_NMI_MMRX_SUPPORTED)) {
+ uv_write_local_mmr(UVH_NMI_MMRX_REQ,
+ 1UL << UVH_NMI_MMRX_REQ_SHIFT);
+ nmi_mmr = UVH_NMI_MMRX;
+ nmi_mmr_clear = UVH_NMI_MMRX_CLEAR;
+ nmi_mmr_pending = 1UL << UVH_NMI_MMRX_SHIFT;
+ pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMRX_TYPE);
+ } else {
+ nmi_mmr = UVH_NMI_MMR;
+ nmi_mmr_clear = UVH_NMI_MMR_CLEAR;
+ nmi_mmr_pending = 1UL << UVH_NMI_MMR_SHIFT;
+ pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMR_TYPE);
+ }
+}
+
+/* Read NMI MMR and check if NMI flag was set by BMC. */
+static inline int uv_nmi_test_mmr(struct uv_hub_nmi_s *hub_nmi)
+{
+ hub_nmi->nmi_value = uv_read_local_mmr(nmi_mmr);
+ atomic_inc(&hub_nmi->read_mmr_count);
+ return !!(hub_nmi->nmi_value & nmi_mmr_pending);
+}
+
+static inline void uv_local_mmr_clear_nmi(void)
+{
+ uv_write_local_mmr(nmi_mmr_clear, nmi_mmr_pending);
+}
+
+/*
+ * If first cpu in on this hub, set hub_nmi "in_nmi" and "owner" values and
+ * return true. If first cpu in on the system, set global "in_nmi" flag.
+ */
+static int uv_set_in_nmi(int cpu, struct uv_hub_nmi_s *hub_nmi)
+{
+ int first = atomic_add_unless(&hub_nmi->in_nmi, 1, 1);
+
+ if (first) {
+ atomic_set(&hub_nmi->cpu_owner, cpu);
+ if (atomic_add_unless(&uv_in_nmi, 1, 1))
+ atomic_set(&uv_nmi_cpu, cpu);
+
+ atomic_inc(&hub_nmi->nmi_count);
+ }
+ return first;
+}
+
+/* Check if this is a system NMI event */
+static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi)
+{
+ int cpu = smp_processor_id();
+ int nmi = 0;
+
+ local64_inc(&uv_nmi_count);
+ uv_cpu_nmi.queries++;
+
+ do {
+ nmi = atomic_read(&hub_nmi->in_nmi);
+ if (nmi)
+ break;
+
+ if (raw_spin_trylock(&hub_nmi->nmi_lock)) {
+
+ /* check hub MMR NMI flag */
+ if (uv_nmi_test_mmr(hub_nmi)) {
+ uv_set_in_nmi(cpu, hub_nmi);
+ nmi = 1;
+ break;
+ }
+
+ /* MMR NMI flag is clear */
+ raw_spin_unlock(&hub_nmi->nmi_lock);
+
+ } else {
+ /* wait a moment for the hub nmi locker to set flag */
+ cpu_relax();
+ udelay(uv_nmi_slave_delay);
+
+ /* re-check hub in_nmi flag */
+ nmi = atomic_read(&hub_nmi->in_nmi);
+ if (nmi)
+ break;
+ }
+
+ /* check if this BMC missed setting the MMR NMI flag */
+ if (!nmi) {
+ nmi = atomic_read(&uv_in_nmi);
+ if (nmi)
+ uv_set_in_nmi(cpu, hub_nmi);
+ }
+
+ } while (0);
+
+ if (!nmi)
+ local64_inc(&uv_nmi_misses);
+
+ return nmi;
+}
+
+/* Need to reset the NMI MMR register, but only once per hub. */
+static inline void uv_clear_nmi(int cpu)
+{
+ struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
+
+ if (cpu == atomic_read(&hub_nmi->cpu_owner)) {
+ atomic_set(&hub_nmi->cpu_owner, -1);
+ atomic_set(&hub_nmi->in_nmi, 0);
+ uv_local_mmr_clear_nmi();
+ raw_spin_unlock(&hub_nmi->nmi_lock);
+ }
+}
+
+/* Print non-responding cpus */
+static void uv_nmi_nr_cpus_pr(char *fmt)
+{
+ static char cpu_list[1024];
+ int len = sizeof(cpu_list);
+ int c = cpumask_weight(uv_nmi_cpu_mask);
+ int n = cpulist_scnprintf(cpu_list, len, uv_nmi_cpu_mask);
+
+ if (n >= len-1)
+ strcpy(&cpu_list[len - 6], "...\n");
+
+ printk(fmt, c, cpu_list);
+}
+
+/* Ping non-responding cpus attemping to force them into the NMI handler */
+static void uv_nmi_nr_cpus_ping(void)
+{
+ int cpu;
+
+ for_each_cpu(cpu, uv_nmi_cpu_mask)
+ atomic_set(&uv_cpu_nmi_per(cpu).pinging, 1);
+
+ apic->send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI);
+}
+
+/* Clean up flags for cpus that ignored both NMI and ping */
+static void uv_nmi_cleanup_mask(void)
+{
+ int cpu;
+
+ for_each_cpu(cpu, uv_nmi_cpu_mask) {
+ atomic_set(&uv_cpu_nmi_per(cpu).pinging, 0);
+ atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_OUT);
+ cpumask_clear_cpu(cpu, uv_nmi_cpu_mask);
+ }
+}
+
+/* Loop waiting as cpus enter nmi handler */
+static int uv_nmi_wait_cpus(int first)
+{
+ int i, j, k, n = num_online_cpus();
+ int last_k = 0, waiting = 0;
+
+ if (first) {
+ cpumask_copy(uv_nmi_cpu_mask, cpu_online_mask);
+ k = 0;
+ } else {
+ k = n - cpumask_weight(uv_nmi_cpu_mask);
+ }
+
+ udelay(uv_nmi_initial_delay);
+ for (i = 0; i < uv_nmi_retry_count; i++) {
+ int loop_delay = uv_nmi_loop_delay;
+
+ for_each_cpu(j, uv_nmi_cpu_mask) {
+ if (atomic_read(&uv_cpu_nmi_per(j).state)) {
+ cpumask_clear_cpu(j, uv_nmi_cpu_mask);
+ if (++k >= n)
+ break;
+ }
+ }
+ if (k >= n) { /* all in? */
+ k = n;
+ break;
+ }
+ if (last_k != k) { /* abort if no new cpus coming in */
+ last_k = k;
+ waiting = 0;
+ } else if (++waiting > uv_nmi_wait_count)
+ break;
+
+ /* extend delay if waiting only for cpu 0 */
+ if (waiting && (n - k) == 1 &&
+ cpumask_test_cpu(0, uv_nmi_cpu_mask))
+ loop_delay *= 100;
+
+ udelay(loop_delay);
+ }
+ atomic_set(&uv_nmi_cpus_in_nmi, k);
+ return n - k;
+}
+
+/* Wait until all slave cpus have entered UV NMI handler */
+static void uv_nmi_wait(int master)
+{
+ /* indicate this cpu is in */
+ atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_IN);
+
+ /* if not the first cpu in (the master), then we are a slave cpu */
+ if (!master)
+ return;
+
+ do {
+ /* wait for all other cpus to gather here */
+ if (!uv_nmi_wait_cpus(1))
+ break;
+
+ /* if not all made it in, send IPI NMI to them */
+ uv_nmi_nr_cpus_pr(KERN_ALERT
+ "UV: Sending NMI IPI to %d non-responding CPUs: %s\n");
+ uv_nmi_nr_cpus_ping();
+
+ /* if all cpus are in, then done */
+ if (!uv_nmi_wait_cpus(0))
+ break;
+
+ uv_nmi_nr_cpus_pr(KERN_ALERT
+ "UV: %d CPUs not in NMI loop: %s\n");
+ } while (0);
+
+ pr_alert("UV: %d of %d CPUs in NMI\n",
+ atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus());
+}
+
+static void uv_nmi_dump_cpu_ip_hdr(void)
+{
+ printk(KERN_DEFAULT
+ "\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n",
+ "CPU", "PID", "COMMAND", "IP");
+}
+
+static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
+{
+ printk(KERN_DEFAULT "UV: %4d %6d %-32.32s ",
+ cpu, current->pid, current->comm);
+
+ printk_address(regs->ip);
+}
+
+/* Dump this cpu's state */
+static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs)
+{
+ const char *dots = " ................................. ";
+
+ if (uv_nmi_action_is("ips")) {
+ if (cpu == 0)
+ uv_nmi_dump_cpu_ip_hdr();
+
+ if (current->pid != 0)
+ uv_nmi_dump_cpu_ip(cpu, regs);
+
+ } else if (uv_nmi_action_is("dump")) {
+ printk(KERN_DEFAULT
+ "UV:%sNMI process trace for CPU %d\n", dots, cpu);
+ show_regs(regs);
+ }
+ atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE);
+}
+
+/* Trigger a slave cpu to dump it's state */
+static void uv_nmi_trigger_dump(int cpu)
+{
+ int retry = uv_nmi_trigger_delay;
+
+ if (atomic_read(&uv_cpu_nmi_per(cpu).state) != UV_NMI_STATE_IN)
+ return;
+
+ atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP);
+ do {
+ cpu_relax();
+ udelay(10);
+ if (atomic_read(&uv_cpu_nmi_per(cpu).state)
+ != UV_NMI_STATE_DUMP)
+ return;
+ } while (--retry > 0);
+
+ pr_crit("UV: CPU %d stuck in process dump function\n", cpu);
+ atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP_DONE);
+}
+
+/* Wait until all cpus ready to exit */
+static void uv_nmi_sync_exit(int master)
+{
+ atomic_dec(&uv_nmi_cpus_in_nmi);
+ if (master) {
+ while (atomic_read(&uv_nmi_cpus_in_nmi) > 0)
+ cpu_relax();
+ atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR);
+ } else {
+ while (atomic_read(&uv_nmi_slave_continue))
+ cpu_relax();
+ }
+}
+
+/* Walk through cpu list and dump state of each */
+static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
+{
+ if (master) {
+ int tcpu;
+ int ignored = 0;
+ int saved_console_loglevel = console_loglevel;
+
+ pr_alert("UV: tracing %s for %d CPUs from CPU %d\n",
+ uv_nmi_action_is("ips") ? "IPs" : "processes",
+ atomic_read(&uv_nmi_cpus_in_nmi), cpu);
+
+ console_loglevel = uv_nmi_loglevel;
+ atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
+ for_each_online_cpu(tcpu) {
+ if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask))
+ ignored++;
+ else if (tcpu == cpu)
+ uv_nmi_dump_state_cpu(tcpu, regs);
+ else
+ uv_nmi_trigger_dump(tcpu);
+ }
+ if (ignored)
+ printk(KERN_DEFAULT "UV: %d CPUs ignored NMI\n",
+ ignored);
+
+ console_loglevel = saved_console_loglevel;
+ pr_alert("UV: process trace complete\n");
+ } else {
+ while (!atomic_read(&uv_nmi_slave_continue))
+ cpu_relax();
+ while (atomic_read(&uv_cpu_nmi.state) != UV_NMI_STATE_DUMP)
+ cpu_relax();
+ uv_nmi_dump_state_cpu(cpu, regs);
+ }
+ uv_nmi_sync_exit(master);
+}
+
+static void uv_nmi_touch_watchdogs(void)
+{
+ touch_softlockup_watchdog_sync();
+ clocksource_touch_watchdog();
+ rcu_cpu_stall_reset();
+ touch_nmi_watchdog();
+}
+
+#if defined(CONFIG_KEXEC)
+static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
+{
+ /* Call crash to dump system state */
+ if (master) {
+ pr_emerg("UV: NMI executing crash_kexec on CPU%d\n", cpu);
+ crash_kexec(regs);
+
+ pr_emerg("UV: crash_kexec unexpectedly returned, ");
+ if (!kexec_crash_image) {
+ pr_cont("crash kernel not loaded\n");
+ atomic_set(&uv_nmi_kexec_failed, 1);
+ uv_nmi_sync_exit(1);
+ return;
+ }
+ pr_cont("kexec busy, stalling cpus while waiting\n");
+ }
+
+ /* If crash exec fails the slaves should return, otherwise stall */
+ while (atomic_read(&uv_nmi_kexec_failed) == 0)
+ mdelay(10);
+
+ /* Crash kernel most likely not loaded, return in an orderly fashion */
+ uv_nmi_sync_exit(0);
+}
+
+#else /* !CONFIG_KEXEC */
+static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs)
+{
+ if (master)
+ pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n");
+}
+#endif /* !CONFIG_KEXEC */
+
+#ifdef CONFIG_KGDB_KDB
+/* Call KDB from NMI handler */
+static void uv_call_kdb(int cpu, struct pt_regs *regs, int master)
+{
+ int ret;
+
+ if (master) {
+ /* call KGDB NMI handler as MASTER */
+ ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs,
+ &uv_nmi_slave_continue);
+ if (ret) {
+ pr_alert("KDB returned error, is kgdboc set?\n");
+ atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
+ }
+ } else {
+ /* wait for KGDB signal that it's ready for slaves to enter */
+ int sig;
+
+ do {
+ cpu_relax();
+ sig = atomic_read(&uv_nmi_slave_continue);
+ } while (!sig);
+
+ /* call KGDB as slave */
+ if (sig == SLAVE_CONTINUE)
+ kgdb_nmicallback(cpu, regs);
+ }
+ uv_nmi_sync_exit(master);
+}
+
+#else /* !CONFIG_KGDB_KDB */
+static inline void uv_call_kdb(int cpu, struct pt_regs *regs, int master)
+{
+ pr_err("UV: NMI error: KGDB/KDB is not enabled in this kernel\n");
+}
+#endif /* !CONFIG_KGDB_KDB */
+
+/*
+ * UV NMI handler
+ */
+int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+{
+ struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
+ int cpu = smp_processor_id();
+ int master = 0;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ /* If not a UV System NMI, ignore */
+ if (!atomic_read(&uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) {
+ local_irq_restore(flags);
+ return NMI_DONE;
+ }
+
+ /* Call possible NMI trace function */
+ if (unlikely(uv_trace_nmi_func))
+ (uv_trace_nmi_func)(reason, regs);
+
+ /* Indicate we are the first CPU into the NMI handler */
+ master = (atomic_read(&uv_nmi_cpu) == cpu);
+
+ /* If NMI action is "kdump", then attempt to do it */
+ if (uv_nmi_action_is("kdump"))
+ uv_nmi_kdump(cpu, master, regs);
+
+ /* Pause as all cpus enter the NMI handler */
+ uv_nmi_wait(master);
+
+ /* Dump state of each cpu */
+ if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump"))
+ uv_nmi_dump_state(cpu, regs, master);
+
+ /* Call KDB if enabled */
+ else if (uv_nmi_action_is("kdb"))
+ uv_call_kdb(cpu, regs, master);
+
+ /* Clear per_cpu "in nmi" flag */
+ atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT);
+
+ /* Clear MMR NMI flag on each hub */
+ uv_clear_nmi(cpu);
+
+ /* Clear global flags */
+ if (master) {
+ if (cpumask_weight(uv_nmi_cpu_mask))
+ uv_nmi_cleanup_mask();
+ atomic_set(&uv_nmi_cpus_in_nmi, -1);
+ atomic_set(&uv_nmi_cpu, -1);
+ atomic_set(&uv_in_nmi, 0);
+ }
+
+ uv_nmi_touch_watchdogs();
+ local_irq_restore(flags);
+
+ return NMI_HANDLED;
+}
+
+/*
+ * NMI handler for pulling in CPUs when perf events are grabbing our NMI
+ */
+int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs)
+{
+ int ret;
+
+ uv_cpu_nmi.queries++;
+ if (!atomic_read(&uv_cpu_nmi.pinging)) {
+ local64_inc(&uv_nmi_ping_misses);
+ return NMI_DONE;
+ }
+
+ uv_cpu_nmi.pings++;
+ local64_inc(&uv_nmi_ping_count);
+ ret = uv_handle_nmi(reason, regs);
+ atomic_set(&uv_cpu_nmi.pinging, 0);
+ return ret;
+}
+
+void uv_register_nmi_notifier(void)
+{
+ if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
+ pr_warn("UV: NMI handler failed to register\n");
+
+ if (register_nmi_handler(NMI_LOCAL, uv_handle_nmi_ping, 0, "uvping"))
+ pr_warn("UV: PING NMI handler failed to register\n");
+}
+
+void uv_nmi_init(void)
+{
+ unsigned int value;
+
+ /*
+ * Unmask NMI on all cpus
+ */
+ value = apic_read(APIC_LVT1) | APIC_DM_NMI;
+ value &= ~APIC_LVT_MASKED;
+ apic_write(APIC_LVT1, value);
+}
+
+void uv_nmi_setup(void)
+{
+ int size = sizeof(void *) * (1 << NODES_SHIFT);
+ int cpu, nid;
+
+ /* Setup hub nmi info */
+ uv_nmi_setup_mmrs();
+ uv_hub_nmi_list = kzalloc(size, GFP_KERNEL);
+ pr_info("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size);
+ BUG_ON(!uv_hub_nmi_list);
+ size = sizeof(struct uv_hub_nmi_s);
+ for_each_present_cpu(cpu) {
+ nid = cpu_to_node(cpu);
+ if (uv_hub_nmi_list[nid] == NULL) {
+ uv_hub_nmi_list[nid] = kzalloc_node(size,
+ GFP_KERNEL, nid);
+ BUG_ON(!uv_hub_nmi_list[nid]);
+ raw_spin_lock_init(&(uv_hub_nmi_list[nid]->nmi_lock));
+ atomic_set(&uv_hub_nmi_list[nid]->cpu_owner, -1);
+ }
+ uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid];
+ }
+ BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL));
+}
+
+
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index f7bab68a4b83..11f9285a2ff6 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -722,15 +722,25 @@ static void percpu_init(void)
/*
* Check to see if a symbol lies in the .data..percpu section.
- * For some as yet not understood reason the "__init_begin"
- * symbol which immediately preceeds the .data..percpu section
- * also shows up as it it were part of it so we do an explict
- * check for that symbol name and ignore it.
+ *
+ * The linker incorrectly associates some symbols with the
+ * .data..percpu section so we also need to check the symbol
+ * name to make sure that we classify the symbol correctly.
+ *
+ * The GNU linker incorrectly associates:
+ * __init_begin
+ * __per_cpu_load
+ *
+ * The "gold" linker incorrectly associates:
+ * init_per_cpu__irq_stack_union
+ * init_per_cpu__gdt_page
*/
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
{
return (sym->st_shndx == per_cpu_shndx) &&
- strcmp(symname, "__init_begin");
+ strcmp(symname, "__init_begin") &&
+ strcmp(symname, "__per_cpu_load") &&
+ strncmp(symname, "init_per_cpu_", 13);
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index fdc3ba28ca38..b3f36369e667 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -468,8 +468,8 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
* 3 PCD PWT UC UC UC
* 4 PAT WB WC WB
* 5 PAT PWT WC WP WT
- * 6 PAT PCD UC- UC UC-
- * 7 PAT PCD PWT UC UC UC
+ * 6 PAT PCD UC- rsv UC-
+ * 7 PAT PCD PWT UC rsv UC
*/
void xen_set_pat(u64 pat)
@@ -2328,12 +2328,14 @@ static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
return success;
}
-int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
- unsigned int address_bits)
+int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
+ unsigned int address_bits,
+ dma_addr_t *dma_handle)
{
unsigned long *in_frames = discontig_frames, out_frame;
unsigned long flags;
int success;
+ unsigned long vstart = (unsigned long)phys_to_virt(pstart);
/*
* Currently an auto-translated guest will not perform I/O, nor will
@@ -2368,15 +2370,17 @@ int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
spin_unlock_irqrestore(&xen_reservation_lock, flags);
+ *dma_handle = virt_to_machine(vstart).maddr;
return success ? 0 : -ENOMEM;
}
EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
-void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
+void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
{
unsigned long *out_frames = discontig_frames, in_frame;
unsigned long flags;
int success;
+ unsigned long vstart;
if (xen_feature(XENFEAT_auto_translated_physmap))
return;
@@ -2384,6 +2388,7 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
if (unlikely(order > MAX_CONTIG_ORDER))
return;
+ vstart = (unsigned long)phys_to_virt(pstart);
memset((void *) vstart, 0, PAGE_SIZE << order);
spin_lock_irqsave(&xen_reservation_lock, flags);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 8b901e8d782d..2ae8699e8767 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -799,10 +799,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, mididx, idx;
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ /* don't track P2M changes in autotranslate guests */
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
return true;
- }
+
if (unlikely(pfn >= MAX_P2M_PFN)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
@@ -879,7 +879,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
- int ret = 0;
pfn = page_to_pfn(page);
if (!PageHighMem(page)) {
@@ -926,8 +925,8 @@ int m2p_add_override(unsigned long mfn, struct page *page,
* frontend pages while they are being shared with the backend,
* because mfn_to_pfn (that ends up being called by GUPF) will
* return the backend pfn rather than the frontend pfn. */
- ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
- if (ret == 0 && get_phys_to_machine(pfn) == mfn)
+ pfn = mfn_to_pfn_no_overrides(mfn);
+ if (get_phys_to_machine(pfn) == mfn)
set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
return 0;
@@ -942,7 +941,6 @@ int m2p_remove_override(struct page *page,
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
- int ret = 0;
pfn = page_to_pfn(page);
mfn = get_phys_to_machine(pfn);
@@ -1029,8 +1027,8 @@ int m2p_remove_override(struct page *page,
* the original pfn causes mfn_to_pfn(mfn) to return the frontend
* pfn again. */
mfn &= ~FOREIGN_FRAME_BIT;
- ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
- if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+ pfn = mfn_to_pfn_no_overrides(mfn);
+ if (get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
m2p_find_override(mfn) == NULL)
set_phys_to_machine(pfn, mfn);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d1e4777b4e75..31d04758b76f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -278,6 +278,15 @@ static void __init xen_smp_prepare_boot_cpu(void)
old memory can be recycled */
make_lowmem_page_readwrite(xen_initial_gdt);
+#ifdef CONFIG_X86_32
+ /*
+ * Xen starts us with XEN_FLAT_RING1_DS, but linux code
+ * expects __USER_DS
+ */
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+#endif
+
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 253f63fceea1..be6b86078957 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -259,6 +259,14 @@ void xen_uninit_lock_cpu(int cpu)
}
+/*
+ * Our init of PV spinlocks is split in two init functions due to us
+ * using paravirt patching and jump labels patching and having to do
+ * all of this before SMP code is invoked.
+ *
+ * The paravirt patching needs to be done _before_ the alternative asm code
+ * is started, otherwise we would not patch the core kernel code.
+ */
void __init xen_init_spinlocks(void)
{
@@ -267,12 +275,26 @@ void __init xen_init_spinlocks(void)
return;
}
- static_key_slow_inc(&paravirt_ticketlocks_enabled);
-
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
}
+/*
+ * While the jump_label init code needs to happend _after_ the jump labels are
+ * enabled and before SMP is started. Hence we use pre-SMP initcall level
+ * init. We cannot do it in xen_init_spinlocks as that is done before
+ * jump labels are activated.
+ */
+static __init int xen_init_spinlocks_jump(void)
+{
+ if (!xen_pvspin)
+ return 0;
+
+ static_key_slow_inc(&paravirt_ticketlocks_enabled);
+ return 0;
+}
+early_initcall(xen_init_spinlocks_jump);
+
static __init int xen_parse_nopvspin(char *arg)
{
xen_pvspin = false;