summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-07-28 15:29:58 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2014-07-28 15:29:58 +1000
commit54733a93a7ed04286df1654b156c567fee962a11 (patch)
tree7b2b3ac53678599229931f461177589bd0c80b8c /arch
parente21379221c89358623ea474d2e52c470b7ab8065 (diff)
parent09ea0f7492ff0d276a3dbd26f25ec37ec152ebc1 (diff)
next-20140724/tip
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/kernel/perf_event.c7
-rw-r--r--arch/blackfin/kernel/perf_event.c15
-rw-r--r--arch/metag/kernel/perf/perf_event.c19
-rw-r--r--arch/openrisc/Kconfig1
-rw-r--r--arch/openrisc/include/asm/irq.h3
-rw-r--r--arch/openrisc/kernel/irq.c146
-rw-r--r--arch/powerpc/perf/hv-24x7.c6
-rw-r--r--arch/powerpc/perf/hv-gpci.c6
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c12
-rw-r--r--arch/sh/kernel/perf_event.c15
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Makefile9
-rw-r--r--arch/x86/boot/code16gcc.h24
-rw-r--r--arch/x86/include/asm/alternative.h14
-rw-r--r--arch/x86/include/asm/apic.h14
-rw-r--r--arch/x86/include/asm/barrier.h2
-rw-r--r--arch/x86/include/asm/cmpxchg.h4
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h409
-rw-r--r--arch/x86/include/asm/fpu-internal.h11
-rw-r--r--arch/x86/include/asm/hardirq.h3
-rw-r--r--arch/x86/include/asm/i8259.h5
-rw-r--r--arch/x86/include/asm/io_apic.h56
-rw-r--r--arch/x86/include/asm/mc146818rtc.h2
-rw-r--r--arch/x86/include/asm/mpspec.h13
-rw-r--r--arch/x86/include/asm/mutex_32.h16
-rw-r--r--arch/x86/include/asm/mwait.h2
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/prom.h2
-rw-r--r--arch/x86/include/asm/qrwlock.h2
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h19
-rw-r--r--arch/x86/include/asm/vdso.h18
-rw-r--r--arch/x86/include/asm/xsave.h223
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c404
-rw-r--r--arch/x86/kernel/apic/apic.c62
-rw-r--r--arch/x86/kernel/apic/io_apic.c775
-rw-r--r--arch/x86/kernel/cpu/amd.c341
-rw-r--r--arch/x86/kernel/cpu/common.c17
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c9
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh51
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c111
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c5
-rw-r--r--arch/x86/kernel/cpu/proc.c8
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/devicetree.c207
-rw-r--r--arch/x86/kernel/entry_64.S52
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/irqinit.c12
-rw-r--r--arch/x86/kernel/mpparse.c108
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/tsc.c7
-rw-r--r--arch/x86/kernel/xsave.c118
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/pci/acpi.c6
-rw-r--r--arch/x86/pci/intel_mid_pci.c27
-rw-r--r--arch/x86/pci/irq.c15
-rw-r--r--arch/x86/pci/xen.c7
-rw-r--r--arch/x86/platform/ce4100/ce4100.c11
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_wdt.c22
-rw-r--r--arch/x86/platform/intel-mid/sfi.c56
-rw-r--r--arch/x86/platform/sfi/sfi.c10
-rw-r--r--arch/x86/platform/uv/tlb_uv.c69
-rw-r--r--arch/x86/vdso/Makefile16
-rw-r--r--arch/x86/vdso/vdso-fakesections.c21
-rw-r--r--arch/x86/vdso/vdso-layout.lds.S44
-rw-r--r--arch/x86/vdso/vdso2c.c128
-rw-r--r--arch/x86/vdso/vdso2c.h227
-rw-r--r--arch/x86/vdso/vma.c20
71 files changed, 2095 insertions, 1975 deletions
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 63177e4cb66d..b9a5685a990e 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -99,10 +99,6 @@ static int arc_pmu_event_init(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
int ret;
- /* ARC 700 PMU does not support sampling events */
- if (is_sampling_event(event))
- return -ENOENT;
-
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
if (event->attr.config >= PERF_COUNT_HW_MAX)
@@ -298,6 +294,9 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
.read = arc_pmu_read,
};
+ /* ARC 700 PMU does not support sampling events */
+ arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
ret = perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
return ret;
diff --git a/arch/blackfin/kernel/perf_event.c b/arch/blackfin/kernel/perf_event.c
index 974e55496db3..ea2032013cc2 100644
--- a/arch/blackfin/kernel/perf_event.c
+++ b/arch/blackfin/kernel/perf_event.c
@@ -389,14 +389,6 @@ static int bfin_pmu_event_init(struct perf_event *event)
if (attr->exclude_hv || attr->exclude_idle)
return -EPERM;
- /*
- * All of the on-chip counters are "limited", in that they have
- * no interrupts, and are therefore unable to do sampling without
- * further work and timer assistance.
- */
- if (hwc->sample_period)
- return -EINVAL;
-
ret = 0;
switch (attr->type) {
case PERF_TYPE_RAW:
@@ -490,6 +482,13 @@ static int __init bfin_pmu_init(void)
{
int ret;
+ /*
+ * All of the on-chip counters are "limited", in that they have
+ * no interrupts, and are therefore unable to do sampling without
+ * further work and timer assistance.
+ */
+ pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
ret = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
if (!ret)
perf_cpu_notifier(bfin_pmu_notifier);
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
index 5cc4d4dcf3cf..02c08737f6aa 100644
--- a/arch/metag/kernel/perf/perf_event.c
+++ b/arch/metag/kernel/perf/perf_event.c
@@ -568,16 +568,6 @@ static int _hw_perf_event_init(struct perf_event *event)
return -EINVAL;
/*
- * Early cores have "limited" counters - they have no overflow
- * interrupts - and so are unable to do sampling without extra work
- * and timer assistance.
- */
- if (metag_pmu->max_period == 0) {
- if (hwc->sample_period)
- return -EINVAL;
- }
-
- /*
* Don't assign an index until the event is placed into the hardware.
* -1 signifies that we're still deciding where to put it. On SMP
* systems each core has its own set of counters, so we can't do any
@@ -866,6 +856,15 @@ static int __init init_hw_perf_events(void)
pr_info("enabled with %s PMU driver, %d counters available\n",
metag_pmu->name, metag_pmu->max_events);
+ /*
+ * Early cores have "limited" counters - they have no overflow
+ * interrupts - and so are unable to do sampling without extra work
+ * and timer assistance.
+ */
+ if (metag_pmu->max_period == 0) {
+ metag_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ }
+
/* Initialise the active events and reservation mutex */
atomic_set(&metag_pmu->active_events, 0);
mutex_init(&metag_pmu->reserve_mutex);
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e71d712afb79..88e83368bbf5 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -22,6 +22,7 @@ config OPENRISC
select GENERIC_STRNLEN_USER
select MODULES_USE_ELF_RELA
select HAVE_DEBUG_STACKOVERFLOW
+ select OR1K_PIC
config MMU
def_bool y
diff --git a/arch/openrisc/include/asm/irq.h b/arch/openrisc/include/asm/irq.h
index eb612b1865d2..b84634cc95eb 100644
--- a/arch/openrisc/include/asm/irq.h
+++ b/arch/openrisc/include/asm/irq.h
@@ -24,4 +24,7 @@
#define NO_IRQ (-1)
+void handle_IRQ(unsigned int, struct pt_regs *);
+extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
+
#endif /* __ASM_OPENRISC_IRQ_H__ */
diff --git a/arch/openrisc/kernel/irq.c b/arch/openrisc/kernel/irq.c
index 8ec77bc9f1e7..967eb1430203 100644
--- a/arch/openrisc/kernel/irq.c
+++ b/arch/openrisc/kernel/irq.c
@@ -16,11 +16,10 @@
#include <linux/interrupt.h>
#include <linux/init.h>
-#include <linux/of.h>
#include <linux/ftrace.h>
#include <linux/irq.h>
+#include <linux/irqchip.h>
#include <linux/export.h>
-#include <linux/irqdomain.h>
#include <linux/irqflags.h>
/* read interrupt enabled status */
@@ -37,150 +36,31 @@ void arch_local_irq_restore(unsigned long flags)
}
EXPORT_SYMBOL(arch_local_irq_restore);
-
-/* OR1K PIC implementation */
-
-/* We're a couple of cycles faster than the generic implementations with
- * these 'fast' versions.
- */
-
-static void or1k_pic_mask(struct irq_data *data)
-{
- mtspr(SPR_PICMR, mfspr(SPR_PICMR) & ~(1UL << data->hwirq));
-}
-
-static void or1k_pic_unmask(struct irq_data *data)
-{
- mtspr(SPR_PICMR, mfspr(SPR_PICMR) | (1UL << data->hwirq));
-}
-
-static void or1k_pic_ack(struct irq_data *data)
-{
- /* EDGE-triggered interrupts need to be ack'ed in order to clear
- * the latch.
- * LEVEL-triggered interrupts do not need to be ack'ed; however,
- * ack'ing the interrupt has no ill-effect and is quicker than
- * trying to figure out what type it is...
- */
-
- /* The OpenRISC 1000 spec says to write a 1 to the bit to ack the
- * interrupt, but the OR1200 does this backwards and requires a 0
- * to be written...
- */
-
-#ifdef CONFIG_OR1K_1200
- /* There are two oddities with the OR1200 PIC implementation:
- * i) LEVEL-triggered interrupts are latched and need to be cleared
- * ii) the interrupt latch is cleared by writing a 0 to the bit,
- * as opposed to a 1 as mandated by the spec
- */
-
- mtspr(SPR_PICSR, mfspr(SPR_PICSR) & ~(1UL << data->hwirq));
-#else
- WARN(1, "Interrupt handling possibly broken\n");
- mtspr(SPR_PICSR, (1UL << data->hwirq));
-#endif
-}
-
-static void or1k_pic_mask_ack(struct irq_data *data)
-{
- /* Comments for pic_ack apply here, too */
-
-#ifdef CONFIG_OR1K_1200
- mtspr(SPR_PICMR, mfspr(SPR_PICMR) & ~(1UL << data->hwirq));
- mtspr(SPR_PICSR, mfspr(SPR_PICSR) & ~(1UL << data->hwirq));
-#else
- WARN(1, "Interrupt handling possibly broken\n");
- mtspr(SPR_PICMR, (1UL << data->hwirq));
- mtspr(SPR_PICSR, (1UL << data->hwirq));
-#endif
-}
-
-#if 0
-static int or1k_pic_set_type(struct irq_data *data, unsigned int flow_type)
-{
- /* There's nothing to do in the PIC configuration when changing
- * flow type. Level and edge-triggered interrupts are both
- * supported, but it's PIC-implementation specific which type
- * is handled. */
-
- return irq_setup_alt_chip(data, flow_type);
-}
-#endif
-
-static struct irq_chip or1k_dev = {
- .name = "or1k-PIC",
- .irq_unmask = or1k_pic_unmask,
- .irq_mask = or1k_pic_mask,
- .irq_ack = or1k_pic_ack,
- .irq_mask_ack = or1k_pic_mask_ack,
-};
-
-static struct irq_domain *root_domain;
-
-static inline int pic_get_irq(int first)
-{
- int hwirq;
-
- hwirq = ffs(mfspr(SPR_PICSR) >> first);
- if (!hwirq)
- return NO_IRQ;
- else
- hwirq = hwirq + first -1;
-
- return irq_find_mapping(root_domain, hwirq);
-}
-
-
-static int or1k_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
+void __init init_IRQ(void)
{
- irq_set_chip_and_handler_name(irq, &or1k_dev,
- handle_level_irq, "level");
- irq_set_status_flags(irq, IRQ_LEVEL | IRQ_NOPROBE);
-
- return 0;
+ irqchip_init();
}
-static const struct irq_domain_ops or1k_irq_domain_ops = {
- .xlate = irq_domain_xlate_onecell,
- .map = or1k_map,
-};
-
-/*
- * This sets up the IRQ domain for the PIC built in to the OpenRISC
- * 1000 CPU. This is the "root" domain as these are the interrupts
- * that directly trigger an exception in the CPU.
- */
-static void __init or1k_irq_init(void)
-{
- struct device_node *intc = NULL;
-
- /* The interrupt controller device node is mandatory */
- intc = of_find_compatible_node(NULL, NULL, "opencores,or1k-pic");
- BUG_ON(!intc);
-
- /* Disable all interrupts until explicitly requested */
- mtspr(SPR_PICMR, (0UL));
-
- root_domain = irq_domain_add_linear(intc, 32,
- &or1k_irq_domain_ops, NULL);
-}
+static void (*handle_arch_irq)(struct pt_regs *);
-void __init init_IRQ(void)
+void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
{
- or1k_irq_init();
+ handle_arch_irq = handle_irq;
}
-void __irq_entry do_IRQ(struct pt_regs *regs)
+void handle_IRQ(unsigned int irq, struct pt_regs *regs)
{
- int irq = -1;
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
- while ((irq = pic_get_irq(irq + 1)) != NO_IRQ)
- generic_handle_irq(irq);
+ generic_handle_irq(irq);
irq_exit();
set_irq_regs(old_regs);
}
+
+void __irq_entry do_IRQ(struct pt_regs *regs)
+{
+ handle_arch_irq(regs);
+}
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index e0766b82e165..66d0f179650f 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -387,8 +387,7 @@ static int h_24x7_event_init(struct perf_event *event)
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
- event->attr.exclude_guest ||
- is_sampling_event(event)) /* no sampling */
+ event->attr.exclude_guest)
return -EINVAL;
/* no branch sampling */
@@ -513,6 +512,9 @@ static int hv_24x7_init(void)
if (!hv_page_cache)
return -ENOMEM;
+ /* sampling not supported */
+ h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
if (r)
return r;
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index c9d399a2df82..15fc76c93022 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -210,8 +210,7 @@ static int h_gpci_event_init(struct perf_event *event)
event->attr.exclude_hv ||
event->attr.exclude_idle ||
event->attr.exclude_host ||
- event->attr.exclude_guest ||
- is_sampling_event(event)) /* no sampling */
+ event->attr.exclude_guest)
return -EINVAL;
/* no branch sampling */
@@ -284,6 +283,9 @@ static int hv_gpci_init(void)
return -ENODEV;
}
+ /* sampling not supported */
+ h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
if (r)
return r;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index ea75d011a6fc..d3194de7ae1e 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -411,12 +411,6 @@ static int cpumf_pmu_event_init(struct perf_event *event)
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
case PERF_TYPE_RAW:
- /* The CPU measurement counter facility does not have overflow
- * interrupts to do sampling. Sampling must be provided by
- * external means, for example, by timers.
- */
- if (is_sampling_event(event))
- return -ENOENT;
err = __hw_perf_event_init(event);
break;
default:
@@ -681,6 +675,12 @@ static int __init cpumf_pmu_init(void)
goto out;
}
+ /* The CPU measurement counter facility does not have overflow
+ * interrupts to do sampling. Sampling must be provided by
+ * external means, for example, by timers.
+ */
+ cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
if (rc) {
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 02331672b6db..7cfd7f153966 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -129,14 +129,6 @@ static int __hw_perf_event_init(struct perf_event *event)
return -ENODEV;
/*
- * All of the on-chip counters are "limited", in that they have
- * no interrupts, and are therefore unable to do sampling without
- * further work and timer assistance.
- */
- if (hwc->sample_period)
- return -EINVAL;
-
- /*
* See if we need to reserve the counter.
*
* If no events are currently in use, then we have to take a
@@ -392,6 +384,13 @@ int register_sh_pmu(struct sh_pmu *_pmu)
pr_info("Performance Events: %s support registered\n", _pmu->name);
+ /*
+ * All of the on-chip counters are "limited", in that they have
+ * no interrupts, and are therefore unable to do sampling without
+ * further work and timer assistance.
+ */
+ pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
WARN_ON(_pmu->num_events > MAX_HWEVENTS);
perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac5a788d2432..cb61ac7a2b7a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -432,6 +432,7 @@ config X86_INTEL_CE
bool "CE4100 TV platform"
depends on PCI
depends on PCI_GODIRECT
+ depends on X86_IO_APIC
depends on X86_32
depends on X86_EXTENDED_PLATFORM
select X86_REBOOTFIXUPS
@@ -838,6 +839,7 @@ config X86_IO_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+ select IRQ_DOMAIN
config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
bool "Reroute for broken boot IRQs"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 33f71b01fd22..c65fd9650467 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -15,12 +15,9 @@ endif
# that way we can complain to the user if the CPU is insufficient.
#
# The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For
-# older versions of GCC, we need to play evil and unreliable tricks to
-# attempt to ensure that our asm(".code16gcc") is first in the asm
-# output.
-CODE16GCC_CFLAGS := -m32 -include $(srctree)/arch/x86/boot/code16gcc.h \
- $(call cc-option, -fno-toplevel-reorder,\
- $(call cc-option, -fno-unit-at-a-time))
+# older versions of GCC, include an *assembly* header to make sure that
+# gcc doesn't play any games behind our back.
+CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \
diff --git a/arch/x86/boot/code16gcc.h b/arch/x86/boot/code16gcc.h
index d93e48010b61..5ff426535397 100644
--- a/arch/x86/boot/code16gcc.h
+++ b/arch/x86/boot/code16gcc.h
@@ -1,15 +1,11 @@
-/*
- * code16gcc.h
- *
- * This file is -include'd when compiling 16-bit C code.
- * Note: this asm() needs to be emitted before gcc emits any code.
- * Depending on gcc version, this requires -fno-unit-at-a-time or
- * -fno-toplevel-reorder.
- *
- * Hopefully gcc will eventually have a real -m16 option so we can
- * drop this hack long term.
- */
+#
+# code16gcc.h
+#
+# This file is added to the assembler via -Wa when compiling 16-bit C code.
+# This is done this way instead via asm() to make sure gcc does not reorder
+# things around us.
+#
+# gcc 4.9+ has a real -m16 option so we can drop this hack long term.
+#
-#ifndef __ASSEMBLY__
-asm(".code16gcc");
-#endif
+ .code16gcc
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 0a3f9c9f98d5..473bdbee378a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -161,6 +161,20 @@ static inline int alternatives_text_reserved(void *start, void *end)
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
: : "i" (0), ## input)
+/*
+ * This is similar to alternative_input. But it has two features and
+ * respective instructions.
+ *
+ * If CPU has feature2, newinstr2 is used.
+ * Otherwise, if CPU has feature1, newinstr1 is used.
+ * Otherwise, oldinstr is used.
+ */
+#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \
+ feature2, input...) \
+ asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
+ newinstr2, feature2) \
+ : : "i" (0), ## input)
+
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, feature, output, input...) \
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 19b0ebafcd3e..a8a637d56902 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -85,21 +85,13 @@ static inline bool apic_from_smp_config(void)
#include <asm/paravirt.h>
#endif
-#ifdef CONFIG_X86_64
-extern int is_vsmp_box(void);
-#else
-static inline int is_vsmp_box(void)
-{
- return 0;
-}
-#endif
extern int setup_profiling_timer(unsigned int);
static inline void native_apic_mem_write(u32 reg, u32 v)
{
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
- alternative_io("movl %0, %1", "xchgl %0, %1", X86_FEATURE_11AP,
+ alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
ASM_OUTPUT2("=r" (v), "=m" (*addr)),
ASM_OUTPUT2("0" (v), "m" (*addr)));
}
@@ -502,8 +494,6 @@ static inline unsigned default_get_apic_id(unsigned long x)
#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469
#ifdef CONFIG_X86_64
-extern int default_acpi_madt_oem_check(char *, char *);
-
extern void apic_send_IPI_self(int vector);
DECLARE_PER_CPU(int, x2apic_extra_bits);
@@ -552,6 +542,8 @@ static inline int default_apic_id_valid(int apicid)
return (apicid < 255);
}
+extern int default_acpi_madt_oem_check(char *, char *);
+
extern void default_setup_apic_routing(void);
extern struct apic apic_noop;
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 5c7198cca5ed..0f4460b5636d 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -99,7 +99,7 @@
#if defined(CONFIG_X86_PPRO_FENCE)
/*
- * For either of these options x86 doesn't have a strong TSO memory
+ * For this option x86 doesn't have a strong TSO memory
* model and we should fall back to full barriers.
*/
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index d47786acb016..99c105d78b7e 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -4,6 +4,8 @@
#include <linux/compiler.h>
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
+#define __HAVE_ARCH_CMPXCHG 1
+
/*
* Non-existant functions to indicate usage errors at link time
* (or compile-time if the compiler implements __compiletime_error().
@@ -143,7 +145,6 @@ extern void __add_wrong_size(void)
# include <asm/cmpxchg_64.h>
#endif
-#ifdef __HAVE_ARCH_CMPXCHG
#define cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
@@ -152,7 +153,6 @@ extern void __add_wrong_size(void)
#define cmpxchg_local(ptr, old, new) \
__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
-#endif
/*
* xadd() adds "inc" to "*ptr" and atomically returns the previous
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index f8bf2eecab86..f7e142926481 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -34,8 +34,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
: "memory");
}
-#define __HAVE_ARCH_CMPXCHG 1
-
#ifdef CONFIG_X86_CMPXCHG64
#define cmpxchg64(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 614be87f1a9b..1af94697aae5 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -6,8 +6,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
*ptr = val;
}
-#define __HAVE_ARCH_CMPXCHG 1
-
#define cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e265ff95d16d..bb9b258d60e7 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -8,7 +8,7 @@
#include <asm/required-features.h>
#endif
-#define NCAPINTS 10 /* N 32-bit words worth of info */
+#define NCAPINTS 11 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -18,213 +18,218 @@
*/
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME (0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
/* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH (0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM (0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */
+#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
+#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY (2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN (2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI (2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX (3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
/* cpu types for specific tunings: */
-#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
-#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
-#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
-#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */
-#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS (3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
-#define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */
-#define X86_FEATURE_NONSTOP_TSC_S3 (3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
+/* free, was #define X86_FEATURE_FXSAVE_LEAK ( 3*32+10) * "" FXSAVE leaks FOP/FIP/FOP */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
+/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */
+#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
+/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
+#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX (4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID (4*32+10) /* Context ID */
-#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM (4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID (4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES (4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND (4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */
+#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
+#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
+#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
+#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */
+#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE (6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_PERFCTR_L2 (6*32+28) /* L2 performance counter extensions */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
* CPUID levels like 0x6, 0xA etc, word 7
*/
-#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */
-#define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB (7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */
-#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
-#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_IDA ( 7*32+ 0) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT ( 7*32+ 1) /* Always Running APIC Timer */
+#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_PLN ( 7*32+ 5) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS ( 7*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_DTHERM ( 7*32+ 7) /* Digital Thermal Sensor */
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */
-#define X86_FEATURE_NPT (8*32+ 5) /* AMD Nested Page Table support */
-#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */
-#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
-#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_NPT ( 8*32+ 5) /* AMD Nested Page Table support */
+#define X86_FEATURE_LBRV ( 8*32+ 6) /* AMD LBR Virtualization support */
+#define X86_FEATURE_SVML ( 8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
+#define X86_FEATURE_NRIPS ( 8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR ( 8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN ( 8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID ( 8*32+11) /* AMD flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST (9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_MPX (9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_CLFLUSHOPT (9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_AVX512PF (9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER (9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD (9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+
+/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
/*
* BUG word(s)
@@ -234,8 +239,11 @@
#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* AMD Erratum 400 */
+#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
@@ -245,6 +253,12 @@
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
+/*
+ * In order to save room, we index into this array by doing
+ * X86_BUG_<name> - NCAPINTS*32.
+ */
+extern const char * const x86_bug_flags[NBUGINTS*32];
+
#define test_cpu_cap(c, bit) \
test_bit(bit, (unsigned long *)((c)->x86_capability))
@@ -301,7 +315,6 @@ extern const char * const x86_power_flags[32];
#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX)
#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
#define cpu_has_ht boot_cpu_has(X86_FEATURE_HT)
-#define cpu_has_mp boot_cpu_has(X86_FEATURE_MP)
#define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)
#define cpu_has_k6_mtrr boot_cpu_has(X86_FEATURE_K6_MTRR)
#define cpu_has_cyrix_arr boot_cpu_has(X86_FEATURE_CYRIX_ARR)
@@ -328,6 +341,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT)
+#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES)
#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
@@ -347,9 +361,6 @@ extern const char * const x86_power_flags[32];
#undef cpu_has_pae
#define cpu_has_pae ___BUG___
-#undef cpu_has_mp
-#define cpu_has_mp 1
-
#undef cpu_has_k6_mtrr
#define cpu_has_k6_mtrr 0
@@ -539,20 +550,20 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
#define static_cpu_has_safe(bit) boot_cpu_has(bit)
#endif
-#define cpu_has_bug(c, bit) cpu_has(c, (bit))
-#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
-#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit));
+#define cpu_has_bug(c, bit) cpu_has(c, (bit))
+#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
+#define clear_cpu_bug(c, bit) clear_cpu_cap(c, (bit))
-#define static_cpu_has_bug(bit) static_cpu_has((bit))
-#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
+#define static_cpu_has_bug(bit) static_cpu_has((bit))
+#define static_cpu_has_bug_safe(bit) static_cpu_has_safe((bit))
+#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
-#define MAX_CPU_FEATURES (NCAPINTS * 32)
-#define cpu_have_feature boot_cpu_has
+#define MAX_CPU_FEATURES (NCAPINTS * 32)
+#define cpu_have_feature boot_cpu_has
-#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X"
-#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
- boot_cpu_data.x86_model
+#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X"
+#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
+ boot_cpu_data.x86_model
#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
-
#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 115e3689cd53..412ececa00b9 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -293,7 +293,7 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. "m" is a random variable that should be in L1 */
- if (unlikely(static_cpu_has_safe(X86_FEATURE_FXSAVE_LEAK))) {
+ if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
asm volatile(
"fnclex\n\t"
"emms\n\t"
@@ -508,9 +508,12 @@ static inline void user_fpu_begin(void)
static inline void __save_fpu(struct task_struct *tsk)
{
- if (use_xsave())
- xsave_state(&tsk->thread.fpu.state->xsave, -1);
- else
+ if (use_xsave()) {
+ if (unlikely(system_state == SYSTEM_BOOTING))
+ xsave_state_booting(&tsk->thread.fpu.state->xsave, -1);
+ else
+ xsave_state(&tsk->thread.fpu.state->xsave, -1);
+ } else
fpu_fxsave(&tsk->thread.fpu);
}
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 230853da4ec0..0f5fb6b6567e 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -40,9 +40,6 @@ typedef struct {
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
-/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
-#define MAX_HARDIRQS_PER_CPU NR_VECTORS
-
#define __ARCH_IRQ_STAT
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index a20365953bf8..ccffa53750a8 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -67,4 +67,9 @@ struct legacy_pic {
extern struct legacy_pic *legacy_pic;
extern struct legacy_pic null_legacy_pic;
+static inline int nr_legacy_irqs(void)
+{
+ return legacy_pic->nr_legacy_irqs;
+}
+
#endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 90f97b4b9347..0aeed5ca356e 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -98,6 +98,8 @@ struct IR_IO_APIC_route_entry {
#define IOAPIC_AUTO -1
#define IOAPIC_EDGE 0
#define IOAPIC_LEVEL 1
+#define IOAPIC_MAP_ALLOC 0x1
+#define IOAPIC_MAP_CHECK 0x2
#ifdef CONFIG_X86_IO_APIC
@@ -118,9 +120,6 @@ extern int mp_irq_entries;
/* MP IRQ source entries */
extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
-/* non-0 if default (table-less) MP configuration */
-extern int mpc_default_type;
-
/* Older SiS APIC requires we rewrite the index register */
extern int sis_apic_bug;
@@ -133,9 +132,6 @@ extern int noioapicquirk;
/* -1 if "noapic" boot option passed */
extern int noioapicreroute;
-/* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
-extern int timer_through_8259;
-
/*
* If we use the IO-APIC for IRQ routing, disable automatic
* assignment of PCI IRQ's.
@@ -145,24 +141,17 @@ extern int timer_through_8259;
struct io_apic_irq_attr;
struct irq_cfg;
-extern int io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr);
-void setup_IO_APIC_irq_extra(u32 gsi);
extern void ioapic_insert_resources(void);
extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
unsigned int, int,
struct io_apic_irq_attr *);
-extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
- unsigned int, int,
- struct io_apic_irq_attr *);
extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
extern void native_compose_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id);
extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
-int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
extern int save_ioapic_entries(void);
extern void mask_ioapic_entries(void);
@@ -171,15 +160,40 @@ extern int restore_ioapic_entries(void);
extern void setup_ioapic_ids_from_mpc(void);
extern void setup_ioapic_ids_from_mpc_nocheck(void);
+enum ioapic_domain_type {
+ IOAPIC_DOMAIN_INVALID,
+ IOAPIC_DOMAIN_LEGACY,
+ IOAPIC_DOMAIN_STRICT,
+ IOAPIC_DOMAIN_DYNAMIC,
+};
+
+struct device_node;
+struct irq_domain;
+struct irq_domain_ops;
+
+struct ioapic_domain_cfg {
+ enum ioapic_domain_type type;
+ const struct irq_domain_ops *ops;
+ struct device_node *dev;
+};
+
struct mp_ioapic_gsi{
u32 gsi_base;
u32 gsi_end;
};
-extern struct mp_ioapic_gsi mp_gsi_routing[];
extern u32 gsi_top;
-int mp_find_ioapic(u32 gsi);
-int mp_find_ioapic_pin(int ioapic, u32 gsi);
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
+
+extern int mp_find_ioapic(u32 gsi);
+extern int mp_find_ioapic_pin(int ioapic, u32 gsi);
+extern u32 mp_pin_to_gsi(int ioapic, int pin);
+extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags);
+extern void mp_unmap_irq(int irq);
+extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
+ struct ioapic_domain_cfg *cfg);
+extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq);
+extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
+extern int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node);
extern void __init pre_init_apic_IRQ0(void);
extern void mp_save_irq(struct mpc_intsrc *m);
@@ -217,14 +231,12 @@ extern void io_apic_eoi(unsigned int apic, unsigned int vector);
#define io_apic_assign_pci_irqs 0
#define setup_ioapic_ids_from_mpc x86_init_noop
-static const int timer_through_8259 = 0;
static inline void ioapic_insert_resources(void) { }
#define gsi_top (NR_IRQS_LEGACY)
static inline int mp_find_ioapic(u32 gsi) { return 0; }
-
-struct io_apic_irq_attr;
-static inline int io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr) { return 0; }
+static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
+static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
+static inline void mp_unmap_irq(int irq) { }
static inline int save_ioapic_entries(void)
{
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index a55c7efcc4ed..0f555cc31984 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -13,7 +13,7 @@
#define RTC_ALWAYS_BCD 1 /* RTC operates in binary mode */
#endif
-#if defined(CONFIG_X86_32) && defined(__HAVE_ARCH_CMPXCHG)
+#if defined(CONFIG_X86_32)
/*
* This lock provides nmi access to the CMOS/RTC registers. It has some
* special properties. It is owned by a CPU and stores the index register
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index f5a617956735..7bef40a01a1d 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -40,8 +40,6 @@ extern int mp_bus_id_to_type[MAX_MP_BUSSES];
extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
extern unsigned int boot_cpu_physical_apicid;
-extern unsigned int max_physical_apicid;
-extern int mpc_default_type;
extern unsigned long mp_lapic_addr;
#ifdef CONFIG_X86_LOCAL_APIC
@@ -88,15 +86,6 @@ static inline void early_reserve_e820_mpc_new(void) { }
#endif
int generic_processor_info(int apicid, int version);
-#ifdef CONFIG_ACPI
-extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
-extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
- u32 gsi);
-extern void mp_config_acpi_legacy_irqs(void);
-struct device;
-extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
- int active_high_low);
-#endif /* CONFIG_ACPI */
#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
@@ -163,6 +152,4 @@ extern physid_mask_t phys_cpu_present_map;
extern int generic_mps_oem_check(struct mpc_table *, char *, char *);
-extern int default_acpi_madt_oem_check(char *, char *);
-
#endif /* _ASM_X86_MPSPEC_H */
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 0208c3c2cbc6..85e6cda45a02 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -100,23 +100,11 @@ do { \
static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
- /*
- * We have two variants here. The cmpxchg based one is the best one
- * because it never induce a false contention state. It is included
- * here because architectures using the inc/dec algorithms over the
- * xchg ones are much more likely to support cmpxchg natively.
- *
- * If not we fall back to the spinlock based variant - that is
- * just as efficient (and simpler) as a 'destructive' probing of
- * the mutex state would be.
- */
-#ifdef __HAVE_ARCH_CMPXCHG
+ /* cmpxchg because it never induces a false contention state. */
if (likely(atomic_cmpxchg(count, 1, 0) == 1))
return 1;
+
return 0;
-#else
- return fail_fn(count);
-#endif
}
#endif /* _ASM_X86_MUTEX_32_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 1da25a5f96f9..a1410db38a1a 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -43,7 +43,7 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
{
if (!current_set_polling_and_test()) {
- if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) {
+ if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
mb();
clflush((void *)&current_thread_info()->flags);
mb();
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a4ea02351f4d..2c8d3b8e7fcb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -386,8 +386,8 @@ struct bndcsr_struct {
struct xsave_hdr_struct {
u64 xstate_bv;
- u64 reserved1[2];
- u64 reserved2[5];
+ u64 xcomp_bv;
+ u64 reserved[6];
} __attribute__((packed));
struct xsave_struct {
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index fbeb06ed0eaa..1d081ac1cd69 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -26,12 +26,10 @@
extern int of_ioapic;
extern u64 initial_dtb;
extern void add_dtb(u64 data);
-extern void x86_add_irq_domains(void);
void x86_of_pci_init(void);
void x86_dtb_init(void);
#else
static inline void add_dtb(u64 data) { }
-static inline void x86_add_irq_domains(void) { }
static inline void x86_of_pci_init(void) { }
static inline void x86_dtb_init(void) { }
#define of_ioapic 0
diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
index 70f46f07f94e..ae0e241e228b 100644
--- a/arch/x86/include/asm/qrwlock.h
+++ b/arch/x86/include/asm/qrwlock.h
@@ -3,7 +3,7 @@
#include <asm-generic/qrwlock_types.h>
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+#ifndef CONFIG_X86_PPRO_FENCE
#define queue_write_unlock queue_write_unlock
static inline void queue_write_unlock(struct qrwlock *lock)
{
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 0b46ef261c77..2d60a7813dfe 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -73,6 +73,7 @@
#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD (is_uv1_hub() ? \
UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \
UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD)
+/* assuming UV3 is the same */
#define BAU_MISC_CONTROL_MULT_MASK 3
@@ -93,6 +94,8 @@
#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT
#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD
+#define PREFETCH_HINT_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT
+#define SB_STATUS_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
#define write_gmmr uv_write_global_mmr64
#define write_lmmr uv_write_local_mmr
#define read_lmmr uv_read_local_mmr
@@ -322,8 +325,9 @@ struct uv1_bau_msg_header {
/*
* UV2 Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
* see figure 9-2 of harp_sys.pdf
+ * assuming UV3 is the same
*/
-struct uv2_bau_msg_header {
+struct uv2_3_bau_msg_header {
unsigned int base_dest_nasid:15; /* nasid of the first bit */
/* bits 14:0 */ /* in uvhub map */
unsigned int dest_subnodeid:5; /* must be 0x10, for the LB */
@@ -395,7 +399,7 @@ struct bau_desc {
*/
union bau_msg_header {
struct uv1_bau_msg_header uv1_hdr;
- struct uv2_bau_msg_header uv2_hdr;
+ struct uv2_3_bau_msg_header uv2_3_hdr;
} header;
struct bau_msg_payload payload;
@@ -631,11 +635,6 @@ struct bau_control {
struct hub_and_pnode *thp;
};
-static inline unsigned long read_mmr_uv2_status(void)
-{
- return read_lmmr(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2);
-}
-
static inline void write_mmr_data_broadcast(int pnode, unsigned long mmr_image)
{
write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image);
@@ -760,7 +759,11 @@ static inline int atomic_read_short(const struct atomic_short *v)
*/
static inline int atom_asr(short i, struct atomic_short *v)
{
- return i + xadd(&v->counter, i);
+ short __i = i;
+ asm volatile(LOCK_PREFIX "xaddw %0, %1"
+ : "+r" (i), "+m" (v->counter)
+ : : "memory");
+ return i + __i;
}
/*
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 30be253dd283..8021bd28c0f1 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -18,15 +18,15 @@ struct vdso_image {
unsigned long alt, alt_len;
- unsigned long sym_end_mapping; /* Total size of the mapping */
-
- unsigned long sym_vvar_page;
- unsigned long sym_hpet_page;
- unsigned long sym_VDSO32_NOTE_MASK;
- unsigned long sym___kernel_sigreturn;
- unsigned long sym___kernel_rt_sigreturn;
- unsigned long sym___kernel_vsyscall;
- unsigned long sym_VDSO32_SYSENTER_RETURN;
+ long sym_vvar_start; /* Negative offset to the vvar area */
+
+ long sym_vvar_page;
+ long sym_hpet_page;
+ long sym_VDSO32_NOTE_MASK;
+ long sym___kernel_sigreturn;
+ long sym___kernel_rt_sigreturn;
+ long sym___kernel_vsyscall;
+ long sym_VDSO32_SYSENTER_RETURN;
};
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index d949ef28c48b..7e7a79ada658 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -52,24 +52,170 @@ extern void xsave_init(void);
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
extern int init_fpu(struct task_struct *child);
-static inline int fpu_xrstor_checking(struct xsave_struct *fx)
+/* These macros all use (%edi)/(%rdi) as the single memory argument. */
+#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
+#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
+#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
+#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
+#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
+
+#define xstate_fault ".section .fixup,\"ax\"\n" \
+ "3: movl $-1,%[err]\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err)
+
+/*
+ * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
{
- int err;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ WARN_ON(system_state != SYSTEM_BOOTING);
+
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ asm volatile("1:"XSAVES"\n\t"
+ "2:\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+ else
+ asm volatile("1:"XSAVE"\n\t"
+ "2:\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+
+ asm volatile(xstate_fault
+ : "0" (0)
+ : "memory");
+
+ return err;
+}
+
+/*
+ * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
+{
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ WARN_ON(system_state != SYSTEM_BOOTING);
- asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err)
- : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ asm volatile("1:"XRSTORS"\n\t"
+ "2:\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+ else
+ asm volatile("1:"XRSTOR"\n\t"
+ "2:\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+
+ asm volatile(xstate_fault
+ : "0" (0)
+ : "memory");
+
+ return err;
+}
+
+/*
+ * Save processor xstate to xsave area.
+ */
+static inline int xsave_state(struct xsave_struct *fx, u64 mask)
+{
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ /*
+ * If xsaves is enabled, xsaves replaces xsaveopt because
+ * it supports compact format and supervisor states in addition to
+ * modified optimization in xsaveopt.
+ *
+ * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
+ * because xsaveopt supports modified optimization which is not
+ * supported by xsave.
+ *
+ * If none of xsaves and xsaveopt is enabled, use xsave.
+ */
+ alternative_input_2(
+ "1:"XSAVE,
+ "1:"XSAVEOPT,
+ X86_FEATURE_XSAVEOPT,
+ "1:"XSAVES,
+ X86_FEATURE_XSAVES,
+ [fx] "D" (fx), "a" (lmask), "d" (hmask) :
+ "memory");
+ asm volatile("2:\n\t"
+ xstate_fault
+ : "0" (0)
: "memory");
return err;
}
+/*
+ * Restore processor xstate from xsave area.
+ */
+static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
+{
+ int err = 0;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+
+ /*
+ * Use xrstors to restore context if it is enabled. xrstors supports
+ * compacted format of xsave area which is not supported by xrstor.
+ */
+ alternative_input(
+ "1: " XRSTOR,
+ "1: " XRSTORS,
+ X86_FEATURE_XSAVES,
+ "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+
+ asm volatile("2:\n"
+ xstate_fault
+ : "0" (0)
+ : "memory");
+
+ return err;
+}
+
+/*
+ * Save xstate context for old process during context switch.
+ */
+static inline void fpu_xsave(struct fpu *fpu)
+{
+ xsave_state(&fpu->state->xsave, -1);
+}
+
+/*
+ * Restore xstate context for new process during context switch.
+ */
+static inline int fpu_xrstor_checking(struct xsave_struct *fx)
+{
+ return xrstor_state(fx, -1);
+}
+
+/*
+ * Save xstate to user space xsave area.
+ *
+ * We don't use modified optimization because xrstor/xrstors might track
+ * a different application.
+ *
+ * We don't use compacted format xsave area for
+ * backward compatibility for old applications which don't understand
+ * compacted format of xsave area.
+ */
static inline int xsave_user(struct xsave_struct __user *buf)
{
int err;
@@ -83,69 +229,34 @@ static inline int xsave_user(struct xsave_struct __user *buf)
return -EFAULT;
__asm__ __volatile__(ASM_STAC "\n"
- "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
+ "1:"XSAVE"\n"
"2: " ASM_CLAC "\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b,3b)
- : [err] "=r" (err)
+ xstate_fault
: "D" (buf), "a" (-1), "d" (-1), "0" (0)
: "memory");
return err;
}
+/*
+ * Restore xstate from user space xsave area.
+ */
static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
{
- int err;
+ int err = 0;
struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
u32 lmask = mask;
u32 hmask = mask >> 32;
__asm__ __volatile__(ASM_STAC "\n"
- "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
+ "1:"XRSTOR"\n"
"2: " ASM_CLAC "\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b,3b)
- : [err] "=r" (err)
+ xstate_fault
: "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
: "memory"); /* memory required? */
return err;
}
-static inline void xrstor_state(struct xsave_struct *fx, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
-
- asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-}
-
-static inline void xsave_state(struct xsave_struct *fx, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
+void *get_xsave_addr(struct xsave_struct *xsave, int xstate);
+void setup_xstate_comp(void);
- asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-}
-
-static inline void fpu_xsave(struct fpu *fpu)
-{
- /* This, however, we can work around by forcing the compiler to select
- an addressing mode that doesn't require extended registers. */
- alternative_input(
- ".byte " REX_PREFIX "0x0f,0xae,0x27",
- ".byte " REX_PREFIX "0x0f,0xae,0x37",
- X86_FEATURE_XSAVEOPT,
- [fx] "D" (&fpu->state->xsave), "a" (-1), "d" (-1) :
- "memory");
-}
#endif
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index fcf2b3ae1bf0..5cd1569518ef 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -297,6 +297,8 @@
#define MSR_IA32_TSC_ADJUST 0x0000003b
#define MSR_IA32_BNDCFGS 0x00000d90
+#define MSR_IA32_XSS 0x00000da0
+
#define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 86281ffb96d6..b436fc735aa4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -31,6 +31,7 @@
#include <linux/module.h>
#include <linux/dmi.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
@@ -43,6 +44,7 @@
#include <asm/io.h>
#include <asm/mpspec.h>
#include <asm/smp.h>
+#include <asm/i8259.h>
#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
@@ -74,10 +76,6 @@ int acpi_fix_pin2_polarity __initdata;
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
#endif
-#ifndef __HAVE_ARCH_CMPXCHG
-#warning ACPI uses CMPXCHG, i486 and later hardware
-#endif
-
/* --------------------------------------------------------------------------
Boot-time Configuration
-------------------------------------------------------------------------- */
@@ -97,44 +95,7 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
-static unsigned int gsi_to_irq(unsigned int gsi)
-{
- unsigned int irq = gsi + NR_IRQS_LEGACY;
- unsigned int i;
-
- for (i = 0; i < NR_IRQS_LEGACY; i++) {
- if (isa_irq_to_gsi[i] == gsi) {
- return i;
- }
- }
-
- /* Provide an identity mapping of gsi == irq
- * except on truly weird platforms that have
- * non isa irqs in the first 16 gsis.
- */
- if (gsi >= NR_IRQS_LEGACY)
- irq = gsi;
- else
- irq = gsi_top + gsi;
-
- return irq;
-}
-
-static u32 irq_to_gsi(int irq)
-{
- unsigned int gsi;
-
- if (irq < NR_IRQS_LEGACY)
- gsi = isa_irq_to_gsi[irq];
- else if (irq < gsi_top)
- gsi = irq;
- else if (irq < (gsi_top + NR_IRQS_LEGACY))
- gsi = irq - gsi_top;
- else
- gsi = 0xffffffff;
-
- return gsi;
-}
+#define ACPI_INVALID_GSI INT_MIN
/*
* This is just a simple wrapper around early_ioremap(),
@@ -345,11 +306,145 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
#endif /*CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
+#define MP_ISA_BUS 0
+
+static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
+ u32 gsi)
+{
+ int ioapic;
+ int pin;
+ struct mpc_intsrc mp_irq;
+
+ /*
+ * Convert 'gsi' to 'ioapic.pin'.
+ */
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return;
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+
+ /*
+ * TBD: This check is for faulty timer entries, where the override
+ * erroneously sets the trigger to level, resulting in a HUGE
+ * increase of timer interrupts!
+ */
+ if ((bus_irq == 0) && (trigger == 3))
+ trigger = 1;
+
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger << 2) | polarity;
+ mp_irq.srcbus = MP_ISA_BUS;
+ mp_irq.srcbusirq = bus_irq; /* IRQ */
+ mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
+ mp_irq.dstirq = pin; /* INTIN# */
+
+ mp_save_irq(&mp_irq);
+
+ /*
+ * Reset default identity mapping if gsi is also an legacy IRQ,
+ * otherwise there will be more than one entry with the same GSI
+ * and acpi_isa_irq_to_gsi() may give wrong result.
+ */
+ if (gsi < nr_legacy_irqs() && isa_irq_to_gsi[gsi] == gsi)
+ isa_irq_to_gsi[gsi] = ACPI_INVALID_GSI;
+ isa_irq_to_gsi[bus_irq] = gsi;
+}
+
+static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
+ int polarity)
+{
+#ifdef CONFIG_X86_MPPARSE
+ struct mpc_intsrc mp_irq;
+ struct pci_dev *pdev;
+ unsigned char number;
+ unsigned int devfn;
+ int ioapic;
+ u8 pin;
+
+ if (!acpi_ioapic)
+ return 0;
+ if (!dev || !dev_is_pci(dev))
+ return 0;
+
+ pdev = to_pci_dev(dev);
+ number = pdev->bus->number;
+ devfn = pdev->devfn;
+ pin = pdev->pin;
+ /* print the entry should happen on mptable identically */
+ mp_irq.type = MP_INTSRC;
+ mp_irq.irqtype = mp_INT;
+ mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+ (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+ mp_irq.srcbus = number;
+ mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+ ioapic = mp_find_ioapic(gsi);
+ mp_irq.dstapic = mpc_ioapic_id(ioapic);
+ mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
+
+ mp_save_irq(&mp_irq);
+#endif
+ return 0;
+}
+
+static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
+ int polarity)
+{
+ int irq, node;
+
+ if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+ return gsi;
+
+ /* Don't set up the ACPI SCI because it's already set up */
+ if (acpi_gbl_FADT.sci_interrupt == gsi)
+ return gsi;
+
+ trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
+ polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
+ node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
+ if (mp_set_gsi_attr(gsi, trigger, polarity, node)) {
+ pr_warn("Failed to set pin attr for GSI%d\n", gsi);
+ return -1;
+ }
+
+ irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
+ if (irq < 0)
+ return irq;
+
+ if (enable_update_mptable)
+ mp_config_acpi_gsi(dev, gsi, trigger, polarity);
+
+ return irq;
+}
+
+static void mp_unregister_gsi(u32 gsi)
+{
+ int irq;
+
+ if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+ return;
+
+ if (acpi_gbl_FADT.sci_interrupt == gsi)
+ return;
+
+ irq = mp_map_gsi_to_irq(gsi, 0);
+ if (irq > 0)
+ mp_unmap_irq(irq);
+}
+
+static struct irq_domain_ops acpi_irqdomain_ops = {
+ .map = mp_irqdomain_map,
+ .unmap = mp_irqdomain_unmap,
+};
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_io_apic *ioapic = NULL;
+ struct ioapic_domain_cfg cfg = {
+ .type = IOAPIC_DOMAIN_DYNAMIC,
+ .ops = &acpi_irqdomain_ops,
+ };
ioapic = (struct acpi_madt_io_apic *)header;
@@ -358,8 +453,12 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
acpi_table_print_madt_entry(header);
- mp_register_ioapic(ioapic->id,
- ioapic->address, ioapic->global_irq_base);
+ /* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */
+ if (ioapic->global_irq_base < nr_legacy_irqs())
+ cfg.type = IOAPIC_DOMAIN_LEGACY;
+
+ mp_register_ioapic(ioapic->id, ioapic->address, ioapic->global_irq_base,
+ &cfg);
return 0;
}
@@ -382,11 +481,6 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
- /*
- * mp_config_acpi_legacy_irqs() already setup IRQs < 16
- * If GSI is < 16, this will update its flags,
- * else it will create a new mp_irqs[] entry.
- */
mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
/*
@@ -508,25 +602,28 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
outb(new >> 8, 0x4d1);
}
-int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
{
- *irq = gsi_to_irq(gsi);
+ int irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
-#ifdef CONFIG_X86_IO_APIC
- if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC)
- setup_IO_APIC_irq_extra(gsi);
-#endif
+ if (irq >= 0) {
+ *irqp = irq;
+ return 0;
+ }
- return 0;
+ return -1;
}
EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
{
- if (isa_irq >= 16)
- return -1;
- *gsi = irq_to_gsi(isa_irq);
- return 0;
+ if (isa_irq < nr_legacy_irqs() &&
+ isa_irq_to_gsi[isa_irq] != ACPI_INVALID_GSI) {
+ *gsi = isa_irq_to_gsi[isa_irq];
+ return 0;
+ }
+
+ return -1;
}
static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
@@ -546,15 +643,25 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int trigger, int polarity)
{
+ int irq = gsi;
+
#ifdef CONFIG_X86_IO_APIC
- gsi = mp_register_gsi(dev, gsi, trigger, polarity);
+ irq = mp_register_gsi(dev, gsi, trigger, polarity);
#endif
- return gsi;
+ return irq;
+}
+
+static void acpi_unregister_gsi_ioapic(u32 gsi)
+{
+#ifdef CONFIG_X86_IO_APIC
+ mp_unregister_gsi(gsi);
+#endif
}
int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity) = acpi_register_gsi_pic;
+void (*__acpi_unregister_gsi)(u32 gsi) = NULL;
#ifdef CONFIG_ACPI_SLEEP
int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel;
@@ -568,32 +675,22 @@ int (*acpi_suspend_lowlevel)(void);
*/
int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
{
- unsigned int irq;
- unsigned int plat_gsi = gsi;
-
- plat_gsi = (*__acpi_register_gsi)(dev, gsi, trigger, polarity);
- irq = gsi_to_irq(plat_gsi);
-
- return irq;
+ return __acpi_register_gsi(dev, gsi, trigger, polarity);
}
EXPORT_SYMBOL_GPL(acpi_register_gsi);
void acpi_unregister_gsi(u32 gsi)
{
+ if (__acpi_unregister_gsi)
+ __acpi_unregister_gsi(gsi);
}
EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
-void __init acpi_set_irq_model_pic(void)
-{
- acpi_irq_model = ACPI_IRQ_MODEL_PIC;
- __acpi_register_gsi = acpi_register_gsi_pic;
- acpi_ioapic = 0;
-}
-
-void __init acpi_set_irq_model_ioapic(void)
+static void __init acpi_set_irq_model_ioapic(void)
{
acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
__acpi_register_gsi = acpi_register_gsi_ioapic;
+ __acpi_unregister_gsi = acpi_unregister_gsi_ioapic;
acpi_ioapic = 1;
}
@@ -829,9 +926,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
* and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
*/
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
- acpi_parse_lapic_addr_ovr, 0);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
+ acpi_parse_lapic_addr_ovr, 0);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing LAPIC address override entry\n");
@@ -856,9 +952,8 @@ static int __init acpi_parse_madt_lapic_entries(void)
* and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value).
*/
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
- acpi_parse_lapic_addr_ovr, 0);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
+ acpi_parse_lapic_addr_ovr, 0);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing LAPIC address override entry\n");
@@ -886,11 +981,10 @@ static int __init acpi_parse_madt_lapic_entries(void)
return count;
}
- x2count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
- acpi_parse_x2apic_nmi, 0);
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0);
+ x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
+ acpi_parse_x2apic_nmi, 0);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
+ acpi_parse_lapic_nmi, 0);
if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
/* TBD: Cleanup to allow fallback to MPS */
@@ -901,44 +995,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
-#define MP_ISA_BUS 0
-
-void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
-{
- int ioapic;
- int pin;
- struct mpc_intsrc mp_irq;
-
- /*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0)
- return;
- pin = mp_find_ioapic_pin(ioapic, gsi);
-
- /*
- * TBD: This check is for faulty timer entries, where the override
- * erroneously sets the trigger to level, resulting in a HUGE
- * increase of timer interrupts!
- */
- if ((bus_irq == 0) && (trigger == 3))
- trigger = 1;
-
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger << 2) | polarity;
- mp_irq.srcbus = MP_ISA_BUS;
- mp_irq.srcbusirq = bus_irq; /* IRQ */
- mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
- mp_irq.dstirq = pin; /* INTIN# */
-
- mp_save_irq(&mp_irq);
-
- isa_irq_to_gsi[bus_irq] = gsi;
-}
-
-void __init mp_config_acpi_legacy_irqs(void)
+static void __init mp_config_acpi_legacy_irqs(void)
{
int i;
struct mpc_intsrc mp_irq;
@@ -956,7 +1013,7 @@ void __init mp_config_acpi_legacy_irqs(void)
* Use the default configuration for the IRQs 0-15. Unless
* overridden by (MADT) interrupt source override entries.
*/
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < nr_legacy_irqs(); i++) {
int ioapic, pin;
unsigned int dstapic;
int idx;
@@ -1004,84 +1061,6 @@ void __init mp_config_acpi_legacy_irqs(void)
}
}
-static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
- int polarity)
-{
-#ifdef CONFIG_X86_MPPARSE
- struct mpc_intsrc mp_irq;
- struct pci_dev *pdev;
- unsigned char number;
- unsigned int devfn;
- int ioapic;
- u8 pin;
-
- if (!acpi_ioapic)
- return 0;
- if (!dev || !dev_is_pci(dev))
- return 0;
-
- pdev = to_pci_dev(dev);
- number = pdev->bus->number;
- devfn = pdev->devfn;
- pin = pdev->pin;
- /* print the entry should happen on mptable identically */
- mp_irq.type = MP_INTSRC;
- mp_irq.irqtype = mp_INT;
- mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
- (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
- mp_irq.srcbus = number;
- mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
- ioapic = mp_find_ioapic(gsi);
- mp_irq.dstapic = mpc_ioapic_id(ioapic);
- mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
-
- mp_save_irq(&mp_irq);
-#endif
- return 0;
-}
-
-int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
-{
- int ioapic;
- int ioapic_pin;
- struct io_apic_irq_attr irq_attr;
- int ret;
-
- if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
- return gsi;
-
- /* Don't set up the ACPI SCI because it's already set up */
- if (acpi_gbl_FADT.sci_interrupt == gsi)
- return gsi;
-
- ioapic = mp_find_ioapic(gsi);
- if (ioapic < 0) {
- printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
- return gsi;
- }
-
- ioapic_pin = mp_find_ioapic_pin(ioapic, gsi);
-
- if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
- printk(KERN_ERR "Invalid reference to IOAPIC pin "
- "%d-%d\n", mpc_ioapic_id(ioapic),
- ioapic_pin);
- return gsi;
- }
-
- if (enable_update_mptable)
- mp_config_acpi_gsi(dev, gsi, trigger, polarity);
-
- set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
- trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
- polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
- ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr);
- if (ret < 0)
- gsi = INT_MIN;
-
- return gsi;
-}
-
/*
* Parse IOAPIC related entries in MADT
* returns 0 on success, < 0 on error
@@ -1111,9 +1090,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
return -ENODEV;
}
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic,
- MAX_IO_APICS);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic,
+ MAX_IO_APICS);
if (!count) {
printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
return -ENODEV;
@@ -1122,9 +1100,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
return count;
}
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
- nr_irqs);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
+ acpi_parse_int_src_ovr, nr_irqs);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing interrupt source overrides entry\n");
@@ -1143,9 +1120,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
/* Fill in identity legacy mappings where no override */
mp_config_acpi_legacy_irqs();
- count =
- acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
- nr_irqs);
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE,
+ acpi_parse_nmi_src, nr_irqs);
if (count < 0) {
printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
/* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad28db7e6bde..6b35d308688c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -67,7 +67,7 @@ EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
/*
* The highest APIC ID seen during enumeration.
*/
-unsigned int max_physical_apicid;
+static unsigned int max_physical_apicid;
/*
* Bitmask of physically existing CPUs:
@@ -2451,51 +2451,6 @@ static void apic_pm_activate(void) { }
#ifdef CONFIG_X86_64
-static int apic_cluster_num(void)
-{
- int i, clusters, zeros;
- unsigned id;
- u16 *bios_cpu_apicid;
- DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
- bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
- bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
- for (i = 0; i < nr_cpu_ids; i++) {
- /* are we being called early in kernel startup? */
- if (bios_cpu_apicid) {
- id = bios_cpu_apicid[i];
- } else if (i < nr_cpu_ids) {
- if (cpu_present(i))
- id = per_cpu(x86_bios_cpu_apicid, i);
- else
- continue;
- } else
- break;
-
- if (id != BAD_APICID)
- __set_bit(APIC_CLUSTERID(id), clustermap);
- }
-
- /* Problem: Partially populated chassis may not have CPUs in some of
- * the APIC clusters they have been allocated. Only present CPUs have
- * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
- * Since clusters are allocated sequentially, count zeros only if
- * they are bounded by ones.
- */
- clusters = 0;
- zeros = 0;
- for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
- if (test_bit(i, clustermap)) {
- clusters += 1 + zeros;
- zeros = 0;
- } else
- ++zeros;
- }
-
- return clusters;
-}
-
static int multi_checked;
static int multi;
@@ -2540,20 +2495,7 @@ static void dmi_check_multi(void)
int apic_is_clustered_box(void)
{
dmi_check_multi();
- if (multi)
- return 1;
-
- if (!is_vsmp_box())
- return 0;
-
- /*
- * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
- * not guaranteed to be synced between boards
- */
- if (apic_cluster_num() > 1)
- return 1;
-
- return 0;
+ return multi;
}
#endif
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 81e08eff05ee..a44dce8cc559 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -31,6 +31,7 @@
#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/syscore_ops.h>
+#include <linux/irqdomain.h>
#include <linux/msi.h>
#include <linux/htirq.h>
#include <linux/freezer.h>
@@ -62,6 +63,16 @@
#define __apicdebuginit(type) static type __init
+#define for_each_ioapic(idx) \
+ for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
+#define for_each_ioapic_reverse(idx) \
+ for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--)
+#define for_each_pin(idx, pin) \
+ for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++)
+#define for_each_ioapic_pin(idx, pin) \
+ for_each_ioapic((idx)) \
+ for_each_pin((idx), (pin))
+
#define for_each_irq_pin(entry, head) \
for (entry = head; entry; entry = entry->next)
@@ -73,6 +84,17 @@ int sis_apic_bug = -1;
static DEFINE_RAW_SPINLOCK(ioapic_lock);
static DEFINE_RAW_SPINLOCK(vector_lock);
+static DEFINE_MUTEX(ioapic_mutex);
+static unsigned int ioapic_dynirq_base;
+static int ioapic_initialized;
+
+struct mp_pin_info {
+ int trigger;
+ int polarity;
+ int node;
+ int set;
+ u32 count;
+};
static struct ioapic {
/*
@@ -87,7 +109,9 @@ static struct ioapic {
struct mpc_ioapic mp_config;
/* IO APIC gsi routing info */
struct mp_ioapic_gsi gsi_config;
- DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+ struct ioapic_domain_cfg irqdomain_cfg;
+ struct irq_domain *irqdomain;
+ struct mp_pin_info *pin_info;
} ioapics[MAX_IO_APICS];
#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver
@@ -107,6 +131,41 @@ struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
return &ioapics[ioapic_idx].gsi_config;
}
+static inline int mp_ioapic_pin_count(int ioapic)
+{
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+
+ return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+}
+
+u32 mp_pin_to_gsi(int ioapic, int pin)
+{
+ return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin;
+}
+
+/*
+ * Initialize all legacy IRQs and all pins on the first IOAPIC
+ * if we have legacy interrupt controller. Kernel boot option "pirq="
+ * may rely on non-legacy pins on the first IOAPIC.
+ */
+static inline int mp_init_irq_at_boot(int ioapic, int irq)
+{
+ if (!nr_legacy_irqs())
+ return 0;
+
+ return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs());
+}
+
+static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin)
+{
+ return ioapics[ioapic_idx].pin_info + pin;
+}
+
+static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
+{
+ return ioapics[ioapic].irqdomain;
+}
+
int nr_ioapics;
/* The one past the highest gsi number used */
@@ -118,9 +177,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
-/* GSI interrupts */
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
-
#ifdef CONFIG_EISA
int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif
@@ -149,8 +205,7 @@ static int __init parse_noapic(char *str)
}
early_param("noapic", parse_noapic);
-static int io_apic_setup_irq_pin(unsigned int irq, int node,
- struct io_apic_irq_attr *attr);
+static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
void mp_save_irq(struct mpc_intsrc *m)
@@ -182,19 +237,15 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
}
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
-
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
- int count, node, i;
+ int i, node = cpu_to_node(0);
- if (!legacy_pic->nr_legacy_irqs)
+ if (!nr_legacy_irqs())
io_apic_irqs = ~0UL;
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i) {
ioapics[i].saved_registers =
kzalloc(sizeof(struct IO_APIC_route_entry) *
ioapics[i].nr_registers, GFP_KERNEL);
@@ -202,28 +253,20 @@ int __init arch_early_irq_init(void)
pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
}
- cfg = irq_cfgx;
- count = ARRAY_SIZE(irq_cfgx);
- node = cpu_to_node(0);
-
- for (i = 0; i < count; i++) {
- irq_set_chip_data(i, &cfg[i]);
- zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
- zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
- /*
- * For legacy IRQ's, start with assigning irq0 to irq15 to
- * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
- */
- if (i < legacy_pic->nr_legacy_irqs) {
- cfg[i].vector = IRQ0_VECTOR + i;
- cpumask_setall(cfg[i].domain);
- }
+ /*
+ * For legacy IRQ's, start with assigning irq0 to irq15 to
+ * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
+ */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ cfg = alloc_irq_and_cfg_at(i, node);
+ cfg->vector = IRQ0_VECTOR + i;
+ cpumask_setall(cfg->domain);
}
return 0;
}
-static struct irq_cfg *irq_cfg(unsigned int irq)
+static inline struct irq_cfg *irq_cfg(unsigned int irq)
{
return irq_get_chip_data(irq);
}
@@ -265,7 +308,7 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
if (res < 0) {
if (res != -EEXIST)
return NULL;
- cfg = irq_get_chip_data(at);
+ cfg = irq_cfg(at);
if (cfg)
return cfg;
}
@@ -425,6 +468,21 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
return 0;
}
+static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
+{
+ struct irq_pin_list **last, *entry;
+
+ last = &cfg->irq_2_pin;
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ if (entry->apic == apic && entry->pin == pin) {
+ *last = entry->next;
+ kfree(entry);
+ return;
+ } else {
+ last = &entry->next;
+ }
+}
+
static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
if (__add_pin_to_irq_node(cfg, node, apic, pin))
@@ -627,9 +685,8 @@ static void clear_IO_APIC (void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++)
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
- clear_IO_APIC_pin(apic, pin);
+ for_each_ioapic_pin(apic, pin)
+ clear_IO_APIC_pin(apic, pin);
}
#ifdef CONFIG_X86_32
@@ -678,13 +735,13 @@ int save_ioapic_entries(void)
int apic, pin;
int err = 0;
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for_each_ioapic(apic) {
if (!ioapics[apic].saved_registers) {
err = -ENOMEM;
continue;
}
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
+ for_each_pin(apic, pin)
ioapics[apic].saved_registers[pin] =
ioapic_read_entry(apic, pin);
}
@@ -699,11 +756,11 @@ void mask_ioapic_entries(void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for_each_ioapic(apic) {
if (!ioapics[apic].saved_registers)
continue;
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
+ for_each_pin(apic, pin) {
struct IO_APIC_route_entry entry;
entry = ioapics[apic].saved_registers[pin];
@@ -722,11 +779,11 @@ int restore_ioapic_entries(void)
{
int apic, pin;
- for (apic = 0; apic < nr_ioapics; apic++) {
+ for_each_ioapic(apic) {
if (!ioapics[apic].saved_registers)
continue;
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
+ for_each_pin(apic, pin)
ioapic_write_entry(apic, pin,
ioapics[apic].saved_registers[pin]);
}
@@ -785,7 +842,7 @@ static int __init find_isa_irq_apic(int irq, int type)
if (i < mp_irq_entries) {
int ioapic_idx;
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for_each_ioapic(ioapic_idx)
if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
return ioapic_idx;
}
@@ -799,7 +856,7 @@ static int __init find_isa_irq_apic(int irq, int type)
*/
static int EISA_ELCR(unsigned int irq)
{
- if (irq < legacy_pic->nr_legacy_irqs) {
+ if (irq < nr_legacy_irqs()) {
unsigned int port = 0x4d0 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
@@ -939,29 +996,101 @@ static int irq_trigger(int idx)
return trigger;
}
-static int pin_2_irq(int idx, int apic, int pin)
+static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin)
+{
+ int irq = -1;
+ int ioapic = (int)(long)domain->host_data;
+ int type = ioapics[ioapic].irqdomain_cfg.type;
+
+ switch (type) {
+ case IOAPIC_DOMAIN_LEGACY:
+ /*
+ * Dynamically allocate IRQ number for non-ISA IRQs in the first 16
+ * GSIs on some weird platforms.
+ */
+ if (gsi < nr_legacy_irqs())
+ irq = irq_create_mapping(domain, pin);
+ else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
+ irq = gsi;
+ break;
+ case IOAPIC_DOMAIN_STRICT:
+ if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
+ irq = gsi;
+ break;
+ case IOAPIC_DOMAIN_DYNAMIC:
+ irq = irq_create_mapping(domain, pin);
+ break;
+ default:
+ WARN(1, "ioapic: unknown irqdomain type %d\n", type);
+ break;
+ }
+
+ return irq > 0 ? irq : -1;
+}
+
+static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
+ unsigned int flags)
{
int irq;
- int bus = mp_irqs[idx].srcbus;
- struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic);
+ struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+ struct mp_pin_info *info = mp_pin_info(ioapic, pin);
+
+ if (!domain)
+ return -1;
+
+ mutex_lock(&ioapic_mutex);
/*
- * Debugging check, we are in big trouble if this message pops up!
+ * Don't use irqdomain to manage ISA IRQs because there may be
+ * multiple IOAPIC pins sharing the same ISA IRQ number and
+ * irqdomain only supports 1:1 mapping between IOAPIC pin and
+ * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used
+ * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
+ * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are
+ * available, and some BIOSes may use MP Interrupt Source records
+ * to override IRQ numbers for PIRQs instead of reprogramming
+ * the interrupt routing logic. Thus there may be multiple pins
+ * sharing the same legacy IRQ number when ACPI is disabled.
*/
- if (mp_irqs[idx].dstirq != pin)
- pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
-
- if (test_bit(bus, mp_bus_not_pci)) {
+ if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
irq = mp_irqs[idx].srcbusirq;
+ if (flags & IOAPIC_MAP_ALLOC) {
+ if (info->count == 0 &&
+ mp_irqdomain_map(domain, irq, pin) != 0)
+ irq = -1;
+
+ /* special handling for timer IRQ0 */
+ if (irq == 0)
+ info->count++;
+ }
} else {
- u32 gsi = gsi_cfg->gsi_base + pin;
+ irq = irq_find_mapping(domain, pin);
+ if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC))
+ irq = alloc_irq_from_domain(domain, gsi, pin);
+ }
- if (gsi >= NR_IRQS_LEGACY)
- irq = gsi;
- else
- irq = gsi_top + gsi;
+ if (flags & IOAPIC_MAP_ALLOC) {
+ if (irq > 0)
+ info->count++;
+ else if (info->count == 0)
+ info->set = 0;
}
+ mutex_unlock(&ioapic_mutex);
+
+ return irq > 0 ? irq : -1;
+}
+
+static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
+{
+ u32 gsi = mp_pin_to_gsi(ioapic, pin);
+
+ /*
+ * Debugging check, we are in big trouble if this message pops up!
+ */
+ if (mp_irqs[idx].dstirq != pin)
+ pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
+
#ifdef CONFIG_X86_32
/*
* PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -972,16 +1101,58 @@ static int pin_2_irq(int idx, int apic, int pin)
apic_printk(APIC_VERBOSE, KERN_DEBUG
"disabling PIRQ%d\n", pin-16);
} else {
- irq = pirq_entries[pin-16];
+ int irq = pirq_entries[pin-16];
apic_printk(APIC_VERBOSE, KERN_DEBUG
"using PIRQ%d -> IRQ %d\n",
pin-16, irq);
+ return irq;
}
}
}
#endif
- return irq;
+ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
+}
+
+int mp_map_gsi_to_irq(u32 gsi, unsigned int flags)
+{
+ int ioapic, pin, idx;
+
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return -1;
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+ idx = find_irq_entry(ioapic, pin, mp_INT);
+ if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
+ return -1;
+
+ return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
+}
+
+void mp_unmap_irq(int irq)
+{
+ struct irq_data *data = irq_get_irq_data(irq);
+ struct mp_pin_info *info;
+ int ioapic, pin;
+
+ if (!data || !data->domain)
+ return;
+
+ ioapic = (int)(long)data->domain->host_data;
+ pin = (int)data->hwirq;
+ info = mp_pin_info(ioapic, pin);
+
+ mutex_lock(&ioapic_mutex);
+ if (--info->count == 0) {
+ info->set = 0;
+ if (irq < nr_legacy_irqs() &&
+ ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY)
+ mp_irqdomain_unmap(data->domain, irq);
+ else
+ irq_dispose_mapping(irq);
+ }
+ mutex_unlock(&ioapic_mutex);
}
/*
@@ -991,7 +1162,7 @@ static int pin_2_irq(int idx, int apic, int pin)
int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
struct io_apic_irq_attr *irq_attr)
{
- int ioapic_idx, i, best_guess = -1;
+ int irq, i, best_ioapic = -1, best_idx = -1;
apic_printk(APIC_DEBUG,
"querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
@@ -1001,44 +1172,56 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
"PCI BIOS passed nonexistent PCI bus %d!\n", bus);
return -1;
}
+
for (i = 0; i < mp_irq_entries; i++) {
int lbus = mp_irqs[i].srcbus;
+ int ioapic_idx, found = 0;
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ if (bus != lbus || mp_irqs[i].irqtype != mp_INT ||
+ slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f))
+ continue;
+
+ for_each_ioapic(ioapic_idx)
if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
- mp_irqs[i].dstapic == MP_APIC_ALL)
+ mp_irqs[i].dstapic == MP_APIC_ALL) {
+ found = 1;
break;
+ }
+ if (!found)
+ continue;
- if (!test_bit(lbus, mp_bus_not_pci) &&
- !mp_irqs[i].irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq);
+ /* Skip ISA IRQs */
+ irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0);
+ if (irq > 0 && !IO_APIC_IRQ(irq))
+ continue;
- if (!(ioapic_idx || IO_APIC_IRQ(irq)))
- continue;
+ if (pin == (mp_irqs[i].srcbusirq & 3)) {
+ best_idx = i;
+ best_ioapic = ioapic_idx;
+ goto out;
+ }
- if (pin == (mp_irqs[i].srcbusirq & 3)) {
- set_io_apic_irq_attr(irq_attr, ioapic_idx,
- mp_irqs[i].dstirq,
- irq_trigger(i),
- irq_polarity(i));
- return irq;
- }
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0) {
- set_io_apic_irq_attr(irq_attr, ioapic_idx,
- mp_irqs[i].dstirq,
- irq_trigger(i),
- irq_polarity(i));
- best_guess = irq;
- }
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_idx < 0) {
+ best_idx = i;
+ best_ioapic = ioapic_idx;
}
}
- return best_guess;
+ if (best_idx < 0)
+ return -1;
+
+out:
+ irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
+ IOAPIC_MAP_ALLOC);
+ if (irq > 0)
+ set_io_apic_irq_attr(irq_attr, best_ioapic,
+ mp_irqs[best_idx].dstirq,
+ irq_trigger(best_idx),
+ irq_polarity(best_idx));
+ return irq;
}
EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
@@ -1198,7 +1381,7 @@ void __setup_vector_irq(int cpu)
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_active_irq(irq) {
- cfg = irq_get_chip_data(irq);
+ cfg = irq_cfg(irq);
if (!cfg)
continue;
@@ -1227,12 +1410,10 @@ static inline int IO_APIC_irq_trigger(int irq)
{
int apic, idx, pin;
- for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
- idx = find_irq_entry(apic, pin, mp_INT);
- if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
- return irq_trigger(idx);
- }
+ for_each_ioapic_pin(apic, pin) {
+ idx = find_irq_entry(apic, pin, mp_INT);
+ if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0)))
+ return irq_trigger(idx);
}
/*
* nonexistent IRQs are edge default
@@ -1330,95 +1511,29 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
}
ioapic_register_intr(irq, cfg, attr->trigger);
- if (irq < legacy_pic->nr_legacy_irqs)
+ if (irq < nr_legacy_irqs())
legacy_pic->mask(irq);
ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
}
-static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin)
-{
- if (idx != -1)
- return false;
-
- apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
- mpc_ioapic_id(ioapic_idx), pin);
- return true;
-}
-
-static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
-{
- int idx, node = cpu_to_node(0);
- struct io_apic_irq_attr attr;
- unsigned int pin, irq;
-
- for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
- idx = find_irq_entry(ioapic_idx, pin, mp_INT);
- if (io_apic_pin_not_connected(idx, ioapic_idx, pin))
- continue;
-
- irq = pin_2_irq(idx, ioapic_idx, pin);
-
- if ((ioapic_idx > 0) && (irq > 16))
- continue;
-
- /*
- * Skip the timer IRQ if there's a quirk handler
- * installed and if it returns 1:
- */
- if (apic->multi_timer_check &&
- apic->multi_timer_check(ioapic_idx, irq))
- continue;
-
- set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
- irq_polarity(idx));
-
- io_apic_setup_irq_pin(irq, node, &attr);
- }
-}
-
static void __init setup_IO_APIC_irqs(void)
{
- unsigned int ioapic_idx;
+ unsigned int ioapic, pin;
+ int idx;
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
- __io_apic_setup_irqs(ioapic_idx);
-}
-
-/*
- * for the gsit that is not in first ioapic
- * but could not use acpi_register_gsi()
- * like some special sci in IBM x3330
- */
-void setup_IO_APIC_irq_extra(u32 gsi)
-{
- int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0);
- struct io_apic_irq_attr attr;
-
- /*
- * Convert 'gsi' to 'ioapic.pin'.
- */
- ioapic_idx = mp_find_ioapic(gsi);
- if (ioapic_idx < 0)
- return;
-
- pin = mp_find_ioapic_pin(ioapic_idx, gsi);
- idx = find_irq_entry(ioapic_idx, pin, mp_INT);
- if (idx == -1)
- return;
-
- irq = pin_2_irq(idx, ioapic_idx, pin);
-
- /* Only handle the non legacy irqs on secondary ioapics */
- if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY)
- return;
-
- set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
- irq_polarity(idx));
-
- io_apic_setup_irq_pin_once(irq, node, &attr);
+ for_each_ioapic_pin(ioapic, pin) {
+ idx = find_irq_entry(ioapic, pin, mp_INT);
+ if (idx < 0)
+ apic_printk(APIC_VERBOSE,
+ KERN_DEBUG " apic %d pin %d not connected\n",
+ mpc_ioapic_id(ioapic), pin);
+ else
+ pin_2_irq(idx, ioapic, pin,
+ ioapic ? 0 : IOAPIC_MAP_ALLOC);
+ }
}
/*
@@ -1586,7 +1701,7 @@ __apicdebuginit(void) print_IO_APICs(void)
struct irq_chip *chip;
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for_each_ioapic(ioapic_idx)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
mpc_ioapic_id(ioapic_idx),
ioapics[ioapic_idx].nr_registers);
@@ -1597,7 +1712,7 @@ __apicdebuginit(void) print_IO_APICs(void)
*/
printk(KERN_INFO "testing the IO APIC.......................\n");
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
+ for_each_ioapic(ioapic_idx)
print_IO_APIC(ioapic_idx);
printk(KERN_DEBUG "IRQ to pin mappings:\n");
@@ -1608,7 +1723,7 @@ __apicdebuginit(void) print_IO_APICs(void)
if (chip != &ioapic_chip)
continue;
- cfg = irq_get_chip_data(irq);
+ cfg = irq_cfg(irq);
if (!cfg)
continue;
entry = cfg->irq_2_pin;
@@ -1758,7 +1873,7 @@ __apicdebuginit(void) print_PIC(void)
unsigned int v;
unsigned long flags;
- if (!legacy_pic->nr_legacy_irqs)
+ if (!nr_legacy_irqs())
return;
printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1828,26 +1943,22 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
void __init enable_IO_APIC(void)
{
int i8259_apic, i8259_pin;
- int apic;
+ int apic, pin;
- if (!legacy_pic->nr_legacy_irqs)
+ if (!nr_legacy_irqs())
return;
- for(apic = 0; apic < nr_ioapics; apic++) {
- int pin;
+ for_each_ioapic_pin(apic, pin) {
/* See if any of the pins is in ExtINT mode */
- for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
- struct IO_APIC_route_entry entry;
- entry = ioapic_read_entry(apic, pin);
+ struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
- /* If the interrupt line is enabled and in ExtInt mode
- * I have found the pin where the i8259 is connected.
- */
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
- ioapic_i8259.apic = apic;
- ioapic_i8259.pin = pin;
- goto found_i8259;
- }
+ /* If the interrupt line is enabled and in ExtInt mode
+ * I have found the pin where the i8259 is connected.
+ */
+ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+ ioapic_i8259.apic = apic;
+ ioapic_i8259.pin = pin;
+ goto found_i8259;
}
}
found_i8259:
@@ -1919,7 +2030,7 @@ void disable_IO_APIC(void)
*/
clear_IO_APIC();
- if (!legacy_pic->nr_legacy_irqs)
+ if (!nr_legacy_irqs())
return;
x86_io_apic_ops.disable();
@@ -1950,7 +2061,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
/*
* Set the IOAPIC ID to the value stored in the MPC table.
*/
- for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
+ for_each_ioapic(ioapic_idx) {
/* Read the register 0 value */
raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic_idx, 0);
@@ -2123,7 +2234,7 @@ static unsigned int startup_ioapic_irq(struct irq_data *data)
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < legacy_pic->nr_legacy_irqs) {
+ if (irq < nr_legacy_irqs()) {
legacy_pic->mask(irq);
if (legacy_pic->irq_pending(irq))
was_pending = 1;
@@ -2225,7 +2336,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
goto unlock;
}
- __this_cpu_write(vector_irq[vector], -1);
+ __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
unlock:
raw_spin_unlock(&desc->lock);
}
@@ -2253,7 +2364,7 @@ static void irq_complete_move(struct irq_cfg *cfg)
void irq_force_complete_move(int irq)
{
- struct irq_cfg *cfg = irq_get_chip_data(irq);
+ struct irq_cfg *cfg = irq_cfg(irq);
if (!cfg)
return;
@@ -2514,26 +2625,15 @@ static inline void init_IO_APIC_traps(void)
struct irq_cfg *cfg;
unsigned int irq;
- /*
- * NOTE! The local APIC isn't very good at handling
- * multiple interrupts at the same interrupt level.
- * As the interrupt level is determined by taking the
- * vector number and shifting that right by 4, we
- * want to spread these out a bit so that they don't
- * all fall in the same interrupt level.
- *
- * Also, we've got to be careful not to trash gate
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
- */
for_each_active_irq(irq) {
- cfg = irq_get_chip_data(irq);
+ cfg = irq_cfg(irq);
if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
* so default to an old-fashioned 8259
* interrupt if we can..
*/
- if (irq < legacy_pic->nr_legacy_irqs)
+ if (irq < nr_legacy_irqs())
legacy_pic->make_irq(irq);
else
/* Strange. Oh, well.. */
@@ -2649,8 +2749,6 @@ static int __init disable_timer_pin_setup(char *arg)
}
early_param("disable_timer_pin_1", disable_timer_pin_setup);
-int timer_through_8259 __initdata;
-
/*
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
@@ -2661,7 +2759,7 @@ int timer_through_8259 __initdata;
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = irq_get_chip_data(0);
+ struct irq_cfg *cfg = irq_cfg(0);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
unsigned long flags;
@@ -2755,7 +2853,6 @@ static inline void __init check_timer(void)
legacy_pic->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
- timer_through_8259 = 1;
goto out;
}
/*
@@ -2827,15 +2924,54 @@ out:
*/
#define PIC_IRQS (1UL << PIC_CASCADE_IR)
+static int mp_irqdomain_create(int ioapic)
+{
+ size_t size;
+ int hwirqs = mp_ioapic_pin_count(ioapic);
+ struct ioapic *ip = &ioapics[ioapic];
+ struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
+ struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+
+ size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic);
+ ip->pin_info = kzalloc(size, GFP_KERNEL);
+ if (!ip->pin_info)
+ return -ENOMEM;
+
+ if (cfg->type == IOAPIC_DOMAIN_INVALID)
+ return 0;
+
+ ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops,
+ (void *)(long)ioapic);
+ if(!ip->irqdomain) {
+ kfree(ip->pin_info);
+ ip->pin_info = NULL;
+ return -ENOMEM;
+ }
+
+ if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
+ cfg->type == IOAPIC_DOMAIN_STRICT)
+ ioapic_dynirq_base = max(ioapic_dynirq_base,
+ gsi_cfg->gsi_end + 1);
+
+ if (gsi_cfg->gsi_base == 0)
+ irq_set_default_host(ip->irqdomain);
+
+ return 0;
+}
+
void __init setup_IO_APIC(void)
{
+ int ioapic;
/*
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
*/
- io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
+ io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+ for_each_ioapic(ioapic)
+ BUG_ON(mp_irqdomain_create(ioapic));
+
/*
* Set up IO-APIC IRQ routing.
*/
@@ -2844,8 +2980,10 @@ void __init setup_IO_APIC(void)
sync_Arb_IDs();
setup_IO_APIC_irqs();
init_IO_APIC_traps();
- if (legacy_pic->nr_legacy_irqs)
+ if (nr_legacy_irqs())
check_timer();
+
+ ioapic_initialized = 1;
}
/*
@@ -2880,7 +3018,7 @@ static void ioapic_resume(void)
{
int ioapic_idx;
- for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--)
+ for_each_ioapic_reverse(ioapic_idx)
resume_ioapic_id(ioapic_idx);
restore_ioapic_entries();
@@ -2926,7 +3064,7 @@ int arch_setup_hwirq(unsigned int irq, int node)
void arch_teardown_hwirq(unsigned int irq)
{
- struct irq_cfg *cfg = irq_get_chip_data(irq);
+ struct irq_cfg *cfg = irq_cfg(irq);
unsigned long flags;
free_remapped_irq(irq);
@@ -3053,7 +3191,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
if (!irq_offset)
write_msi_msg(irq, &msg);
- setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
+ setup_remapped_irq(irq, irq_cfg(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
@@ -3192,7 +3330,7 @@ int default_setup_hpet_msi(unsigned int irq, unsigned int id)
hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
+ setup_remapped_irq(irq, irq_cfg(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
@@ -3303,27 +3441,6 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
return ret;
}
-int io_apic_setup_irq_pin_once(unsigned int irq, int node,
- struct io_apic_irq_attr *attr)
-{
- unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
- int ret;
- struct IO_APIC_route_entry orig_entry;
-
- /* Avoid redundant programming */
- if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin);
- orig_entry = ioapic_read_entry(attr->ioapic, pin);
- if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity)
- return 0;
- return -EBUSY;
- }
- ret = io_apic_setup_irq_pin(irq, node, attr);
- if (!ret)
- set_bit(pin, ioapics[ioapic_idx].pin_programmed);
- return ret;
-}
-
static int __init io_apic_get_redir_entries(int ioapic)
{
union IO_APIC_reg_01 reg_01;
@@ -3340,20 +3457,13 @@ static int __init io_apic_get_redir_entries(int ioapic)
return reg_01.bits.entries + 1;
}
-static void __init probe_nr_irqs_gsi(void)
-{
- int nr;
-
- nr = gsi_top + NR_IRQS_LEGACY;
- if (nr > nr_irqs_gsi)
- nr_irqs_gsi = nr;
-
- printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
-}
-
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
- return from < nr_irqs_gsi ? nr_irqs_gsi : from;
+ /*
+ * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
+ * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
+ */
+ return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
}
int __init arch_probe_nr_irqs(void)
@@ -3363,33 +3473,17 @@ int __init arch_probe_nr_irqs(void)
if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
nr_irqs = NR_VECTORS * nr_cpu_ids;
- nr = nr_irqs_gsi + 8 * nr_cpu_ids;
+ nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
/*
* for MSI and HT dyn irq
*/
- nr += nr_irqs_gsi * 16;
+ nr += gsi_top * 16;
#endif
if (nr < nr_irqs)
nr_irqs = nr;
- return NR_IRQS_LEGACY;
-}
-
-int io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
-{
- int node;
-
- if (!IO_APIC_IRQ(irq)) {
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- irq_attr->ioapic);
- return -EINVAL;
- }
-
- node = dev ? dev_to_node(dev) : cpu_to_node(0);
-
- return io_apic_setup_irq_pin_once(irq, node, irq_attr);
+ return 0;
}
#ifdef CONFIG_X86_32
@@ -3483,9 +3577,8 @@ static u8 __init io_apic_unique_id(u8 id)
DECLARE_BITMAP(used, 256);
bitmap_zero(used, 256);
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i)
__set_bit(mpc_ioapic_id(i), used);
- }
if (!test_bit(id, used))
return id;
return find_first_zero_bit(used, 256);
@@ -3543,14 +3636,13 @@ void __init setup_ioapic_dest(void)
if (skip_ioapic_setup == 1)
return;
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
- for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) {
+ for_each_ioapic_pin(ioapic, pin) {
irq_entry = find_irq_entry(ioapic, pin, mp_INT);
if (irq_entry == -1)
continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
- if ((ioapic > 0) && (irq > 16))
+ irq = pin_2_irq(irq_entry, ioapic, pin, 0);
+ if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
continue;
idata = irq_get_irq_data(irq);
@@ -3573,29 +3665,33 @@ void __init setup_ioapic_dest(void)
static struct resource *ioapic_resources;
-static struct resource * __init ioapic_setup_resources(int nr_ioapics)
+static struct resource * __init ioapic_setup_resources(void)
{
unsigned long n;
struct resource *res;
char *mem;
- int i;
+ int i, num = 0;
- if (nr_ioapics <= 0)
+ for_each_ioapic(i)
+ num++;
+ if (num == 0)
return NULL;
n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
- n *= nr_ioapics;
+ n *= num;
mem = alloc_bootmem(n);
res = (void *)mem;
- mem += sizeof(struct resource) * nr_ioapics;
+ mem += sizeof(struct resource) * num;
- for (i = 0; i < nr_ioapics; i++) {
- res[i].name = mem;
- res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ num = 0;
+ for_each_ioapic(i) {
+ res[num].name = mem;
+ res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
+ num++;
}
ioapic_resources = res;
@@ -3609,8 +3705,8 @@ void __init native_io_apic_init_mappings(void)
struct resource *ioapic_res;
int i;
- ioapic_res = ioapic_setup_resources(nr_ioapics);
- for (i = 0; i < nr_ioapics; i++) {
+ ioapic_res = ioapic_setup_resources();
+ for_each_ioapic(i) {
if (smp_found_config) {
ioapic_phys = mpc_ioapic_addr(i);
#ifdef CONFIG_X86_32
@@ -3641,8 +3737,6 @@ fake_ioapic_page:
ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
ioapic_res++;
}
-
- probe_nr_irqs_gsi();
}
void __init ioapic_insert_resources(void)
@@ -3657,7 +3751,7 @@ void __init ioapic_insert_resources(void)
return;
}
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i) {
insert_resource(&iomem_resource, r);
r++;
}
@@ -3665,16 +3759,15 @@ void __init ioapic_insert_resources(void)
int mp_find_ioapic(u32 gsi)
{
- int i = 0;
+ int i;
if (nr_ioapics == 0)
return -1;
/* Find the IOAPIC that manages this GSI. */
- for (i = 0; i < nr_ioapics; i++) {
+ for_each_ioapic(i) {
struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
- if ((gsi >= gsi_cfg->gsi_base)
- && (gsi <= gsi_cfg->gsi_end))
+ if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)
return i;
}
@@ -3686,7 +3779,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
{
struct mp_ioapic_gsi *gsi_cfg;
- if (WARN_ON(ioapic == -1))
+ if (WARN_ON(ioapic < 0))
return -1;
gsi_cfg = mp_ioapic_gsi_routing(ioapic);
@@ -3729,7 +3822,8 @@ static __init int bad_ioapic_register(int idx)
return 0;
}
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
+ struct ioapic_domain_cfg *cfg)
{
int idx = 0;
int entries;
@@ -3743,6 +3837,8 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
ioapics[idx].mp_config.type = MP_IOAPIC;
ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
ioapics[idx].mp_config.apicaddr = address;
+ ioapics[idx].irqdomain = NULL;
+ ioapics[idx].irqdomain_cfg = *cfg;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
@@ -3779,6 +3875,93 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
nr_ioapics++;
}
+int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ int ioapic = (int)(long)domain->host_data;
+ struct mp_pin_info *info = mp_pin_info(ioapic, hwirq);
+ struct io_apic_irq_attr attr;
+
+ /*
+ * Skip the timer IRQ if there's a quirk handler installed and if it
+ * returns 1:
+ */
+ if (apic->multi_timer_check &&
+ apic->multi_timer_check(ioapic, virq))
+ return 0;
+
+ /* Get default attribute if not set by caller yet */
+ if (!info->set) {
+ u32 gsi = mp_pin_to_gsi(ioapic, hwirq);
+
+ if (acpi_get_override_irq(gsi, &info->trigger,
+ &info->polarity) < 0) {
+ /*
+ * PCI interrupts are always polarity one level
+ * triggered.
+ */
+ info->trigger = 1;
+ info->polarity = 1;
+ }
+ info->node = NUMA_NO_NODE;
+ info->set = 1;
+ }
+ set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger,
+ info->polarity);
+
+ return io_apic_setup_irq_pin(virq, info->node, &attr);
+}
+
+void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq)
+{
+ struct irq_data *data = irq_get_irq_data(virq);
+ struct irq_cfg *cfg = irq_cfg(virq);
+ int ioapic = (int)(long)domain->host_data;
+ int pin = (int)data->hwirq;
+
+ /*
+ * Skip the timer IRQ if there's a quirk handler installed and if it
+ * returns 1:
+ */
+ if (apic->multi_timer_check &&
+ apic->multi_timer_check(ioapic, virq))
+ return;
+
+ ioapic_mask_entry(ioapic, pin);
+ __remove_pin_from_irq(cfg, ioapic, pin);
+ WARN_ON(cfg->irq_2_pin != NULL);
+ arch_teardown_hwirq(virq);
+}
+
+int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node)
+{
+ int ret = 0;
+ int ioapic, pin;
+ struct mp_pin_info *info;
+
+ ioapic = mp_find_ioapic(gsi);
+ if (ioapic < 0)
+ return -ENODEV;
+
+ pin = mp_find_ioapic_pin(ioapic, gsi);
+ info = mp_pin_info(ioapic, pin);
+ trigger = trigger ? 1 : 0;
+ polarity = polarity ? 1 : 0;
+
+ mutex_lock(&ioapic_mutex);
+ if (!info->set) {
+ info->trigger = trigger;
+ info->polarity = polarity;
+ info->node = node;
+ info->set = 1;
+ } else if (info->trigger != trigger || info->polarity != polarity) {
+ ret = -EBUSY;
+ }
+ mutex_unlock(&ioapic_mutex);
+
+ return ret;
+}
+
/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ce8b8ff0e0ef..bc360d3df60e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -8,6 +8,7 @@
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
+#include <asm/smp.h>
#include <asm/pci-direct.h>
#ifdef CONFIG_X86_64
@@ -50,7 +51,6 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
return wrmsr_safe_regs(gprs);
}
-#ifdef CONFIG_X86_32
/*
* B step AMD K6 before B 9730xxxx have hardware bugs that can cause
* misexecution of code under Linux. Owners of such processors should
@@ -70,6 +70,7 @@ __asm__(".globl vide\n\t.align 4\nvide: ret");
static void init_amd_k5(struct cpuinfo_x86 *c)
{
+#ifdef CONFIG_X86_32
/*
* General Systems BIOSen alias the cpu frequency registers
* of the Elan at 0x000df000. Unfortuantly, one of the Linux
@@ -83,11 +84,12 @@ static void init_amd_k5(struct cpuinfo_x86 *c)
if (inl(CBAR) & CBAR_ENB)
outl(0 | CBAR_KEY, CBAR);
}
+#endif
}
-
static void init_amd_k6(struct cpuinfo_x86 *c)
{
+#ifdef CONFIG_X86_32
u32 l, h;
int mbytes = get_num_physpages() >> (20-PAGE_SHIFT);
@@ -176,10 +178,44 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
/* placeholder for any needed mods */
return;
}
+#endif
}
-static void amd_k7_smp_check(struct cpuinfo_x86 *c)
+static void init_amd_k7(struct cpuinfo_x86 *c)
{
+#ifdef CONFIG_X86_32
+ u32 l, h;
+
+ /*
+ * Bit 15 of Athlon specific MSR 15, needs to be 0
+ * to enable SSE on Palomino/Morgan/Barton CPU's.
+ * If the BIOS didn't enable it already, enable it here.
+ */
+ if (c->x86_model >= 6 && c->x86_model <= 10) {
+ if (!cpu_has(c, X86_FEATURE_XMM)) {
+ printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+ msr_clear_bit(MSR_K7_HWCR, 15);
+ set_cpu_cap(c, X86_FEATURE_XMM);
+ }
+ }
+
+ /*
+ * It's been determined by AMD that Athlons since model 8 stepping 1
+ * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
+ * As per AMD technical note 27212 0.2
+ */
+ if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
+ rdmsr(MSR_K7_CLK_CTL, l, h);
+ if ((l & 0xfff00000) != 0x20000000) {
+ printk(KERN_INFO
+ "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
+ l, ((l & 0x000fffff)|0x20000000));
+ wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
+ }
+ }
+
+ set_cpu_cap(c, X86_FEATURE_K7);
+
/* calling is from identify_secondary_cpu() ? */
if (!c->cpu_index)
return;
@@ -207,7 +243,7 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c)
if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
((c->x86_model == 7) && (c->x86_mask >= 1)) ||
(c->x86_model > 7))
- if (cpu_has_mp)
+ if (cpu_has(c, X86_FEATURE_MP))
return;
/* If we get here, not a certified SMP capable AMD system. */
@@ -219,45 +255,8 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c)
WARN_ONCE(1, "WARNING: This combination of AMD"
" processors is not suitable for SMP.\n");
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
-}
-
-static void init_amd_k7(struct cpuinfo_x86 *c)
-{
- u32 l, h;
-
- /*
- * Bit 15 of Athlon specific MSR 15, needs to be 0
- * to enable SSE on Palomino/Morgan/Barton CPU's.
- * If the BIOS didn't enable it already, enable it here.
- */
- if (c->x86_model >= 6 && c->x86_model <= 10) {
- if (!cpu_has(c, X86_FEATURE_XMM)) {
- printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
- msr_clear_bit(MSR_K7_HWCR, 15);
- set_cpu_cap(c, X86_FEATURE_XMM);
- }
- }
-
- /*
- * It's been determined by AMD that Athlons since model 8 stepping 1
- * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
- * As per AMD technical note 27212 0.2
- */
- if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
- rdmsr(MSR_K7_CLK_CTL, l, h);
- if ((l & 0xfff00000) != 0x20000000) {
- printk(KERN_INFO
- "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
- l, ((l & 0x000fffff)|0x20000000));
- wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
- }
- }
-
- set_cpu_cap(c, X86_FEATURE_K7);
-
- amd_k7_smp_check(c);
-}
#endif
+}
#ifdef CONFIG_NUMA
/*
@@ -446,6 +445,26 @@ static void early_init_amd_mc(struct cpuinfo_x86 *c)
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
+
+#ifdef CONFIG_X86_64
+ if (c->x86 >= 0xf) {
+ unsigned long long tseg;
+
+ /*
+ * Split up direct mapping around the TSEG SMM area.
+ * Don't do it for gbpages because there seems very little
+ * benefit in doing so.
+ */
+ if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+ unsigned long pfn = tseg >> PAGE_SHIFT;
+
+ printk(KERN_DEBUG "tseg: %010llx\n", tseg);
+ if (pfn_range_is_mapped(pfn, pfn + 1))
+ set_memory_4k((unsigned long)__va(tseg), 1);
+ }
+ }
+#endif
+
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
if (c->x86 > 0x10 ||
@@ -515,101 +534,74 @@ static const int amd_erratum_383[];
static const int amd_erratum_400[];
static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
-static void init_amd(struct cpuinfo_x86 *c)
+static void init_amd_k8(struct cpuinfo_x86 *c)
{
- u32 dummy;
- unsigned long long value;
-
-#ifdef CONFIG_SMP
- /*
- * Disable TLB flush filter by setting HWCR.FFDIS on K8
- * bit 6 of msr C001_0015
- *
- * Errata 63 for SH-B3 steppings
- * Errata 122 for all steppings (F+ have it disabled by default)
- */
- if (c->x86 == 0xf)
- msr_set_bit(MSR_K7_HWCR, 6);
-#endif
+ u32 level;
+ u64 value;
- early_init_amd(c);
+ /* On C+ stepping K8 rep microcode works well for copy/memset */
+ level = cpuid_eax(1);
+ if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
/*
- * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
- * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+ * Some BIOSes incorrectly force this feature, but only K8 revision D
+ * (model = 0x14) and later actually support it.
+ * (AMD Erratum #110, docId: 25759).
*/
- clear_cpu_cap(c, 0*32+31);
-
-#ifdef CONFIG_X86_64
- /* On C+ stepping K8 rep microcode works well for copy/memset */
- if (c->x86 == 0xf) {
- u32 level;
-
- level = cpuid_eax(1);
- if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
-
- /*
- * Some BIOSes incorrectly force this feature, but only K8
- * revision D (model = 0x14) and later actually support it.
- * (AMD Erratum #110, docId: 25759).
- */
- if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
- clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
- if (!rdmsrl_amd_safe(0xc001100d, &value)) {
- value &= ~(1ULL << 32);
- wrmsrl_amd_safe(0xc001100d, value);
- }
+ if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
+ clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
+ if (!rdmsrl_amd_safe(0xc001100d, &value)) {
+ value &= ~BIT_64(32);
+ wrmsrl_amd_safe(0xc001100d, value);
}
-
}
- if (c->x86 >= 0x10)
- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
- /* get apicid instead of initial apic id from cpuid */
- c->apicid = hard_smp_processor_id();
-#else
+ if (!c->x86_model_id[0])
+ strcpy(c->x86_model_id, "Hammer");
+}
+
+static void init_amd_gh(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
+ /* do this for boot cpu */
+ if (c == &boot_cpu_data)
+ check_enable_amd_mmconf_dmi();
+
+ fam10h_check_enable_mmcfg();
+#endif
/*
- * FIXME: We should handle the K5 here. Set up the write
- * range and also turn on MSR 83 bits 4 and 31 (write alloc,
- * no bus pipeline)
+ * Disable GART TLB Walk Errors on Fam10h. We do this here because this
+ * is always needed when GART is enabled, even in a kernel which has no
+ * MCE support built in. BIOS should disable GartTlbWlk Errors already.
+ * If it doesn't, we do it here as suggested by the BKDG.
+ *
+ * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
*/
+ msr_set_bit(MSR_AMD64_MCx_MASK(4), 10);
- switch (c->x86) {
- case 4:
- init_amd_k5(c);
- break;
- case 5:
- init_amd_k6(c);
- break;
- case 6: /* An Athlon/Duron */
- init_amd_k7(c);
- break;
- }
+ /*
+ * On family 10h BIOS may not have properly enabled WC+ support, causing
+ * it to be converted to CD memtype. This may result in performance
+ * degradation for certain nested-paging guests. Prevent this conversion
+ * by clearing bit 24 in MSR_AMD64_BU_CFG2.
+ *
+ * NOTE: we want to use the _safe accessors so as not to #GP kvm
+ * guests on older kvm hosts.
+ */
+ msr_clear_bit(MSR_AMD64_BU_CFG2, 24);
- /* K6s reports MCEs but don't actually have all the MSRs */
- if (c->x86 < 6)
- clear_cpu_cap(c, X86_FEATURE_MCE);
-#endif
+ if (cpu_has_amd_erratum(c, amd_erratum_383))
+ set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
+}
- /* Enable workaround for FXSAVE leak */
- if (c->x86 >= 6)
- set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
-
- if (!c->x86_model_id[0]) {
- switch (c->x86) {
- case 0xf:
- /* Should distinguish Models here, but this is only
- a fallback anyways. */
- strcpy(c->x86_model_id, "Hammer");
- break;
- }
- }
+static void init_amd_bd(struct cpuinfo_x86 *c)
+{
+ u64 value;
/* re-enable TopologyExtensions if switched off by BIOS */
- if ((c->x86 == 0x15) &&
- (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
+ if ((c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
!cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (msr_set_bit(0xc0011005, 54) > 0) {
@@ -625,14 +617,60 @@ static void init_amd(struct cpuinfo_x86 *c)
* The way access filter has a performance penalty on some workloads.
* Disable it on the affected CPUs.
*/
- if ((c->x86 == 0x15) &&
- (c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
-
+ if ((c->x86_model >= 0x02) && (c->x86_model < 0x20)) {
if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) {
value |= 0x1E;
wrmsrl_safe(0xc0011021, value);
}
}
+}
+
+static void init_amd(struct cpuinfo_x86 *c)
+{
+ u32 dummy;
+
+#ifdef CONFIG_SMP
+ /*
+ * Disable TLB flush filter by setting HWCR.FFDIS on K8
+ * bit 6 of msr C001_0015
+ *
+ * Errata 63 for SH-B3 steppings
+ * Errata 122 for all steppings (F+ have it disabled by default)
+ */
+ if (c->x86 == 0xf)
+ msr_set_bit(MSR_K7_HWCR, 6);
+#endif
+
+ early_init_amd(c);
+
+ /*
+ * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+ */
+ clear_cpu_cap(c, 0*32+31);
+
+ if (c->x86 >= 0x10)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+
+ /* get apicid instead of initial apic id from cpuid */
+ c->apicid = hard_smp_processor_id();
+
+ /* K6s reports MCEs but don't actually have all the MSRs */
+ if (c->x86 < 6)
+ clear_cpu_cap(c, X86_FEATURE_MCE);
+
+ switch (c->x86) {
+ case 4: init_amd_k5(c); break;
+ case 5: init_amd_k6(c); break;
+ case 6: init_amd_k7(c); break;
+ case 0xf: init_amd_k8(c); break;
+ case 0x10: init_amd_gh(c); break;
+ case 0x15: init_amd_bd(c); break;
+ }
+
+ /* Enable workaround for FXSAVE leak */
+ if (c->x86 >= 6)
+ set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
cpu_detect_cache_sizes(c);
@@ -656,33 +694,6 @@ static void init_amd(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
}
-#ifdef CONFIG_X86_64
- if (c->x86 == 0x10) {
- /* do this for boot cpu */
- if (c == &boot_cpu_data)
- check_enable_amd_mmconf_dmi();
-
- fam10h_check_enable_mmcfg();
- }
-
- if (c == &boot_cpu_data && c->x86 >= 0xf) {
- unsigned long long tseg;
-
- /*
- * Split up direct mapping around the TSEG SMM area.
- * Don't do it for gbpages because there seems very little
- * benefit in doing so.
- */
- if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
- unsigned long pfn = tseg >> PAGE_SHIFT;
-
- printk(KERN_DEBUG "tseg: %010llx\n", tseg);
- if (pfn_range_is_mapped(pfn, pfn + 1))
- set_memory_4k((unsigned long)__va(tseg), 1);
- }
- }
-#endif
-
/*
* Family 0x12 and above processors have APIC timer
* running in deep C states.
@@ -690,34 +701,6 @@ static void init_amd(struct cpuinfo_x86 *c)
if (c->x86 > 0x11)
set_cpu_cap(c, X86_FEATURE_ARAT);
- if (c->x86 == 0x10) {
- /*
- * Disable GART TLB Walk Errors on Fam10h. We do this here
- * because this is always needed when GART is enabled, even in a
- * kernel which has no MCE support built in.
- * BIOS should disable GartTlbWlk Errors already. If
- * it doesn't, do it here as suggested by the BKDG.
- *
- * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
- */
- msr_set_bit(MSR_AMD64_MCx_MASK(4), 10);
-
- /*
- * On family 10h BIOS may not have properly enabled WC+ support,
- * causing it to be converted to CD memtype. This may result in
- * performance degradation for certain nested-paging guests.
- * Prevent this conversion by clearing bit 24 in
- * MSR_AMD64_BU_CFG2.
- *
- * NOTE: we want to use the _safe accessors so as not to #GP kvm
- * guests on older kvm hosts.
- */
- msr_clear_bit(MSR_AMD64_BU_CFG2, 24);
-
- if (cpu_has_amd_erratum(c, amd_erratum_383))
- set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
- }
-
if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ef1b93f18ed1..ce31eeada362 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -148,6 +148,7 @@ static int __init x86_xsave_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
setup_clear_cpu_cap(X86_FEATURE_AVX);
setup_clear_cpu_cap(X86_FEATURE_AVX2);
return 1;
@@ -161,6 +162,13 @@ static int __init x86_xsaveopt_setup(char *s)
}
__setup("noxsaveopt", x86_xsaveopt_setup);
+static int __init x86_xsaves_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+ return 1;
+}
+__setup("noxsaves", x86_xsaves_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override = -1;
static int disable_x86_serial_nr = 1;
@@ -634,6 +642,15 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[9] = ebx;
}
+ /* Extended state features: level 0x0000000d */
+ if (c->cpuid_level >= 0x0000000d) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx);
+
+ c->x86_capability[10] = eax;
+ }
+
/* AMD-defined flags: level 0x80000001 */
xlvl = cpuid_eax(0x80000000);
c->extended_cpuid_level = xlvl;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f9e4fdd3b877..9483ee5b3991 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -253,7 +253,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
*/
if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
- set_cpu_cap(c, X86_FEATURE_11AP);
+ set_cpu_bug(c, X86_BUG_11AP);
#ifdef CONFIG_X86_INTEL_USERCOPY
@@ -402,7 +402,7 @@ static void init_intel(struct cpuinfo_x86 *c)
if (c->x86 == 6 && cpu_has_clflush &&
(c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
- set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
+ set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
#ifdef CONFIG_X86_64
if (c->x86 == 15)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9a79c8dbd8e8..4fc57975acc1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2385,6 +2385,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
threshold_cpu_callback(action, cpu);
mce_device_remove(cpu);
mce_intel_hcpu_update(cpu);
+
+ /* intentionally ignoring frozen here */
+ if (!(action & CPU_TASKS_FROZEN))
+ cmci_rediscover();
break;
case CPU_DOWN_PREPARE:
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
@@ -2396,11 +2400,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
}
- if (action == CPU_POST_DEAD) {
- /* intentionally ignoring frozen here */
- cmci_rediscover();
- }
-
return NOTIFY_OK;
}
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 2bf616505499..e2b22df964cd 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,23 +1,25 @@
#!/bin/sh
#
-# Generate the x86_cap_flags[] array from include/asm/cpufeature.h
+# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeature.h
#
IN=$1
OUT=$2
-TABS="$(printf '\t\t\t\t\t')"
-trap 'rm "$OUT"' EXIT
+function dump_array()
+{
+ ARRAY=$1
+ SIZE=$2
+ PFX=$3
+ POSTFIX=$4
-(
- echo "#ifndef _ASM_X86_CPUFEATURE_H"
- echo "#include <asm/cpufeature.h>"
- echo "#endif"
- echo ""
- echo "const char * const x86_cap_flags[NCAPINTS*32] = {"
+ PFX_SZ=$(echo $PFX | wc -c)
+ TABS="$(printf '\t\t\t\t\t')"
+
+ echo "const char * const $ARRAY[$SIZE] = {"
- # Iterate through any input lines starting with #define X86_FEATURE_
- sed -n -e 's/\t/ /g' -e 's/^ *# *define *X86_FEATURE_//p' $IN |
+ # Iterate through any input lines starting with #define $PFX
+ sed -n -e 's/\t/ /g' -e "s/^ *# *define *$PFX//p" $IN |
while read i
do
# Name is everything up to the first whitespace
@@ -31,11 +33,32 @@ trap 'rm "$OUT"' EXIT
# Name is uppercase, VALUE is all lowercase
VALUE="$(echo "$VALUE" | tr A-Z a-z)"
- TABCOUNT=$(( ( 5*8 - 14 - $(echo "$NAME" | wc -c) ) / 8 ))
- printf "\t[%s]%.*s = %s,\n" \
- "X86_FEATURE_$NAME" "$TABCOUNT" "$TABS" "$VALUE"
+ if [ -n "$POSTFIX" ]; then
+ T=$(( $PFX_SZ + $(echo $POSTFIX | wc -c) + 2 ))
+ TABS="$(printf '\t\t\t\t\t\t')"
+ TABCOUNT=$(( ( 6*8 - ($T + 1) - $(echo "$NAME" | wc -c) ) / 8 ))
+ printf "\t[%s - %s]%.*s = %s,\n" "$PFX$NAME" "$POSTFIX" "$TABCOUNT" "$TABS" "$VALUE"
+ else
+ TABCOUNT=$(( ( 5*8 - ($PFX_SZ + 1) - $(echo "$NAME" | wc -c) ) / 8 ))
+ printf "\t[%s]%.*s = %s,\n" "$PFX$NAME" "$TABCOUNT" "$TABS" "$VALUE"
+ fi
done
echo "};"
+}
+
+trap 'rm "$OUT"' EXIT
+
+(
+ echo "#ifndef _ASM_X86_CPUFEATURE_H"
+ echo "#include <asm/cpufeature.h>"
+ echo "#endif"
+ echo ""
+
+ dump_array "x86_cap_flags" "NCAPINTS*32" "X86_FEATURE_" ""
+ echo ""
+
+ dump_array "x86_bug_flags" "NBUGINTS*32" "X86_BUG_" "NCAPINTS*32"
+
) > $OUT
trap - EXIT
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index 3bbdf4cd38b9..30790d798e6b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -294,31 +294,41 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
cpu_to_node(cpu));
}
-static void amd_uncore_cpu_up_prepare(unsigned int cpu)
+static int amd_uncore_cpu_up_prepare(unsigned int cpu)
{
- struct amd_uncore *uncore;
+ struct amd_uncore *uncore_nb = NULL, *uncore_l2;
if (amd_uncore_nb) {
- uncore = amd_uncore_alloc(cpu);
- uncore->cpu = cpu;
- uncore->num_counters = NUM_COUNTERS_NB;
- uncore->rdpmc_base = RDPMC_BASE_NB;
- uncore->msr_base = MSR_F15H_NB_PERF_CTL;
- uncore->active_mask = &amd_nb_active_mask;
- uncore->pmu = &amd_nb_pmu;
- *per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
+ uncore_nb = amd_uncore_alloc(cpu);
+ if (!uncore_nb)
+ goto fail;
+ uncore_nb->cpu = cpu;
+ uncore_nb->num_counters = NUM_COUNTERS_NB;
+ uncore_nb->rdpmc_base = RDPMC_BASE_NB;
+ uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
+ uncore_nb->active_mask = &amd_nb_active_mask;
+ uncore_nb->pmu = &amd_nb_pmu;
+ *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
}
if (amd_uncore_l2) {
- uncore = amd_uncore_alloc(cpu);
- uncore->cpu = cpu;
- uncore->num_counters = NUM_COUNTERS_L2;
- uncore->rdpmc_base = RDPMC_BASE_L2;
- uncore->msr_base = MSR_F16H_L2I_PERF_CTL;
- uncore->active_mask = &amd_l2_active_mask;
- uncore->pmu = &amd_l2_pmu;
- *per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
+ uncore_l2 = amd_uncore_alloc(cpu);
+ if (!uncore_l2)
+ goto fail;
+ uncore_l2->cpu = cpu;
+ uncore_l2->num_counters = NUM_COUNTERS_L2;
+ uncore_l2->rdpmc_base = RDPMC_BASE_L2;
+ uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
+ uncore_l2->active_mask = &amd_l2_active_mask;
+ uncore_l2->pmu = &amd_l2_pmu;
+ *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
}
+
+ return 0;
+
+fail:
+ kfree(uncore_nb);
+ return -ENOMEM;
}
static struct amd_uncore *
@@ -441,7 +451,7 @@ static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
if (!--uncore->refcnt)
kfree(uncore);
- *per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
+ *per_cpu_ptr(uncores, cpu) = NULL;
}
static void amd_uncore_cpu_dead(unsigned int cpu)
@@ -461,7 +471,8 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
- amd_uncore_cpu_up_prepare(cpu);
+ if (amd_uncore_cpu_up_prepare(cpu))
+ return notifier_from_errno(-ENOMEM);
break;
case CPU_STARTING:
@@ -501,20 +512,33 @@ static void __init init_cpu_already_online(void *dummy)
amd_uncore_cpu_online(cpu);
}
+static void cleanup_cpu_online(void *dummy)
+{
+ unsigned int cpu = smp_processor_id();
+
+ amd_uncore_cpu_dead(cpu);
+}
+
static int __init amd_uncore_init(void)
{
- unsigned int cpu;
+ unsigned int cpu, cpu2;
int ret = -ENODEV;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
- return -ENODEV;
+ goto fail_nodev;
if (!cpu_has_topoext)
- return -ENODEV;
+ goto fail_nodev;
if (cpu_has_perfctr_nb) {
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
- perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
+ if (!amd_uncore_nb) {
+ ret = -ENOMEM;
+ goto fail_nb;
+ }
+ ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
+ if (ret)
+ goto fail_nb;
printk(KERN_INFO "perf: AMD NB counters detected\n");
ret = 0;
@@ -522,20 +546,28 @@ static int __init amd_uncore_init(void)
if (cpu_has_perfctr_l2) {
amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
- perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+ if (!amd_uncore_l2) {
+ ret = -ENOMEM;
+ goto fail_l2;
+ }
+ ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
+ if (ret)
+ goto fail_l2;
printk(KERN_INFO "perf: AMD L2I counters detected\n");
ret = 0;
}
if (ret)
- return -ENODEV;
+ goto fail_nodev;
cpu_notifier_register_begin();
/* init cpus already online before registering for hotplug notifier */
for_each_online_cpu(cpu) {
- amd_uncore_cpu_up_prepare(cpu);
+ ret = amd_uncore_cpu_up_prepare(cpu);
+ if (ret)
+ goto fail_online;
smp_call_function_single(cpu, init_cpu_already_online, NULL, 1);
}
@@ -543,5 +575,30 @@ static int __init amd_uncore_init(void)
cpu_notifier_register_done();
return 0;
+
+
+fail_online:
+ for_each_online_cpu(cpu2) {
+ if (cpu2 == cpu)
+ break;
+ smp_call_function_single(cpu, cleanup_cpu_online, NULL, 1);
+ }
+ cpu_notifier_register_done();
+
+ /* amd_uncore_nb/l2 should have been freed by cleanup_cpu_online */
+ amd_uncore_nb = amd_uncore_l2 = NULL;
+ if (cpu_has_perfctr_l2)
+ perf_pmu_unregister(&amd_l2_pmu);
+fail_l2:
+ if (cpu_has_perfctr_nb)
+ perf_pmu_unregister(&amd_nb_pmu);
+ if (amd_uncore_l2)
+ free_percpu(amd_uncore_l2);
+fail_nb:
+ if (amd_uncore_nb)
+ free_percpu(amd_uncore_nb);
+
+fail_nodev:
+ return ret;
}
device_initcall(amd_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index ae6552a0701f..cfc6f9dfcd90 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2947,10 +2947,7 @@ again:
* extra registers. If we failed to take an extra
* register, try the alternative.
*/
- if (idx % 2)
- idx--;
- else
- idx++;
+ idx ^= 1;
if (idx != reg1->idx % 6) {
if (idx == 2)
config1 >>= 8;
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 06fe3ed8b851..5433658e598d 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -97,6 +97,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
seq_printf(m, " %s", x86_cap_flags[i]);
+ seq_printf(m, "\nbugs\t\t:");
+ for (i = 0; i < 32*NBUGINTS; i++) {
+ unsigned int bug_bit = 32*NCAPINTS + i;
+
+ if (cpu_has_bug(c, bug_bit) && x86_bug_flags[i])
+ seq_printf(m, " %s", x86_bug_flags[i]);
+ }
+
seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
c->loops_per_jiffy/(500000/HZ),
(c->loops_per_jiffy/(5000/HZ)) % 100);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index b6f794aa1693..4a8013d55947 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -38,7 +38,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{ X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 },
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 7db54b5d5f86..3d3503351242 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -21,6 +21,7 @@
#include <asm/apic.h>
#include <asm/pci_x86.h>
#include <asm/setup.h>
+#include <asm/i8259.h>
__initdata u64 initial_dtb;
char __initdata cmd_line[COMMAND_LINE_SIZE];
@@ -165,82 +166,6 @@ static void __init dtb_lapic_setup(void)
#ifdef CONFIG_X86_IO_APIC
static unsigned int ioapic_id;
-static void __init dtb_add_ioapic(struct device_node *dn)
-{
- struct resource r;
- int ret;
-
- ret = of_address_to_resource(dn, 0, &r);
- if (ret) {
- printk(KERN_ERR "Can't obtain address from node %s.\n",
- dn->full_name);
- return;
- }
- mp_register_ioapic(++ioapic_id, r.start, gsi_top);
-}
-
-static void __init dtb_ioapic_setup(void)
-{
- struct device_node *dn;
-
- for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
- dtb_add_ioapic(dn);
-
- if (nr_ioapics) {
- of_ioapic = 1;
- return;
- }
- printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
-}
-#else
-static void __init dtb_ioapic_setup(void) {}
-#endif
-
-static void __init dtb_apic_setup(void)
-{
- dtb_lapic_setup();
- dtb_ioapic_setup();
-}
-
-#ifdef CONFIG_OF_FLATTREE
-static void __init x86_flattree_get_config(void)
-{
- u32 size, map_len;
- void *dt;
-
- if (!initial_dtb)
- return;
-
- map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
-
- initial_boot_params = dt = early_memremap(initial_dtb, map_len);
- size = of_get_flat_dt_size();
- if (map_len < size) {
- early_iounmap(dt, map_len);
- initial_boot_params = dt = early_memremap(initial_dtb, size);
- map_len = size;
- }
-
- unflatten_and_copy_device_tree();
- early_iounmap(dt, map_len);
-}
-#else
-static inline void x86_flattree_get_config(void) { }
-#endif
-
-void __init x86_dtb_init(void)
-{
- x86_flattree_get_config();
-
- if (!of_have_populated_dt())
- return;
-
- dtb_setup_hpet();
- dtb_apic_setup();
-}
-
-#ifdef CONFIG_X86_IO_APIC
-
struct of_ioapic_type {
u32 out_type;
u32 trigger;
@@ -276,10 +201,8 @@ static int ioapic_xlate(struct irq_domain *domain,
const u32 *intspec, u32 intsize,
irq_hw_number_t *out_hwirq, u32 *out_type)
{
- struct io_apic_irq_attr attr;
struct of_ioapic_type *it;
- u32 line, idx;
- int rc;
+ u32 line, idx, gsi;
if (WARN_ON(intsize < 2))
return -EINVAL;
@@ -291,13 +214,10 @@ static int ioapic_xlate(struct irq_domain *domain,
it = &of_ioapic_type[intspec[1]];
- idx = (u32) domain->host_data;
- set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
-
- rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line),
- cpu_to_node(0), &attr);
- if (rc)
- return rc;
+ idx = (u32)(long)domain->host_data;
+ gsi = mp_pin_to_gsi(idx, line);
+ if (mp_set_gsi_attr(gsi, it->trigger, it->polarity, cpu_to_node(0)))
+ return -EBUSY;
*out_hwirq = line;
*out_type = it->out_type;
@@ -305,81 +225,86 @@ static int ioapic_xlate(struct irq_domain *domain,
}
const struct irq_domain_ops ioapic_irq_domain_ops = {
+ .map = mp_irqdomain_map,
+ .unmap = mp_irqdomain_unmap,
.xlate = ioapic_xlate,
};
-static void dt_add_ioapic_domain(unsigned int ioapic_num,
- struct device_node *np)
+static void __init dtb_add_ioapic(struct device_node *dn)
{
- struct irq_domain *id;
- struct mp_ioapic_gsi *gsi_cfg;
+ struct resource r;
int ret;
- int num;
-
- gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
- num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
-
- id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
- (void *)ioapic_num);
- BUG_ON(!id);
- if (gsi_cfg->gsi_base == 0) {
- /*
- * The first NR_IRQS_LEGACY irq descs are allocated in
- * early_irq_init() and need just a mapping. The
- * remaining irqs need both. All of them are preallocated
- * and assigned so we can keep the 1:1 mapping which the ioapic
- * is having.
- */
- irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
-
- if (num > NR_IRQS_LEGACY) {
- ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
- NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
- if (ret)
- pr_err("Error creating mapping for the "
- "remaining IRQs: %d\n", ret);
- }
- irq_set_default_host(id);
- } else {
- ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
- if (ret)
- pr_err("Error creating IRQ mapping: %d\n", ret);
+ struct ioapic_domain_cfg cfg = {
+ .type = IOAPIC_DOMAIN_DYNAMIC,
+ .ops = &ioapic_irq_domain_ops,
+ .dev = dn,
+ };
+
+ ret = of_address_to_resource(dn, 0, &r);
+ if (ret) {
+ printk(KERN_ERR "Can't obtain address from node %s.\n",
+ dn->full_name);
+ return;
}
+ mp_register_ioapic(++ioapic_id, r.start, gsi_top, &cfg);
}
-static void __init ioapic_add_ofnode(struct device_node *np)
+static void __init dtb_ioapic_setup(void)
{
- struct resource r;
- int i, ret;
+ struct device_node *dn;
- ret = of_address_to_resource(np, 0, &r);
- if (ret) {
- printk(KERN_ERR "Failed to obtain address for %s\n",
- np->full_name);
+ for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
+ dtb_add_ioapic(dn);
+
+ if (nr_ioapics) {
+ of_ioapic = 1;
return;
}
+ printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
+}
+#else
+static void __init dtb_ioapic_setup(void) {}
+#endif
- for (i = 0; i < nr_ioapics; i++) {
- if (r.start == mpc_ioapic_addr(i)) {
- dt_add_ioapic_domain(i, np);
- return;
- }
- }
- printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name);
+static void __init dtb_apic_setup(void)
+{
+ dtb_lapic_setup();
+ dtb_ioapic_setup();
}
-void __init x86_add_irq_domains(void)
+#ifdef CONFIG_OF_FLATTREE
+static void __init x86_flattree_get_config(void)
{
- struct device_node *dp;
+ u32 size, map_len;
+ void *dt;
- if (!of_have_populated_dt())
+ if (!initial_dtb)
return;
- for_each_node_with_property(dp, "interrupt-controller") {
- if (of_device_is_compatible(dp, "intel,ce4100-ioapic"))
- ioapic_add_ofnode(dp);
+ map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
+
+ initial_boot_params = dt = early_memremap(initial_dtb, map_len);
+ size = of_get_flat_dt_size();
+ if (map_len < size) {
+ early_iounmap(dt, map_len);
+ initial_boot_params = dt = early_memremap(initial_dtb, size);
+ map_len = size;
}
+
+ unflatten_and_copy_device_tree();
+ early_iounmap(dt, map_len);
}
#else
-void __init x86_add_irq_domains(void) { }
+static inline void x86_flattree_get_config(void) { }
#endif
+
+void __init x86_dtb_init(void)
+{
+ x86_flattree_get_config();
+
+ if (!of_have_populated_dt())
+ return;
+
+ dtb_setup_hpet();
+ dtb_apic_setup();
+}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7e3f1d950b65..c5781f55f1a1 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -207,7 +207,6 @@ ENDPROC(native_usergs_sysret64)
*/
.macro XCPT_FRAME start=1 offset=0
INTR_FRAME \start, RIP+\offset-ORIG_RAX
- /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
.endm
/*
@@ -287,21 +286,21 @@ ENDPROC(native_usergs_sysret64)
ENTRY(save_paranoid)
XCPT_FRAME 1 RDI+8
cld
- movq_cfi rdi, RDI+8
- movq_cfi rsi, RSI+8
+ movq %rdi, RDI+8(%rsp)
+ movq %rsi, RSI+8(%rsp)
movq_cfi rdx, RDX+8
movq_cfi rcx, RCX+8
movq_cfi rax, RAX+8
- movq_cfi r8, R8+8
- movq_cfi r9, R9+8
- movq_cfi r10, R10+8
- movq_cfi r11, R11+8
+ movq %r8, R8+8(%rsp)
+ movq %r9, R9+8(%rsp)
+ movq %r10, R10+8(%rsp)
+ movq %r11, R11+8(%rsp)
movq_cfi rbx, RBX+8
- movq_cfi rbp, RBP+8
- movq_cfi r12, R12+8
- movq_cfi r13, R13+8
- movq_cfi r14, R14+8
- movq_cfi r15, R15+8
+ movq %rbp, RBP+8(%rsp)
+ movq %r12, R12+8(%rsp)
+ movq %r13, R13+8(%rsp)
+ movq %r14, R14+8(%rsp)
+ movq %r15, R15+8(%rsp)
movl $1,%ebx
movl $MSR_GS_BASE,%ecx
rdmsr
@@ -1394,21 +1393,21 @@ ENTRY(error_entry)
CFI_ADJUST_CFA_OFFSET 15*8
/* oldrax contains error code */
cld
- movq_cfi rdi, RDI+8
- movq_cfi rsi, RSI+8
- movq_cfi rdx, RDX+8
- movq_cfi rcx, RCX+8
- movq_cfi rax, RAX+8
- movq_cfi r8, R8+8
- movq_cfi r9, R9+8
- movq_cfi r10, R10+8
- movq_cfi r11, R11+8
+ movq %rdi, RDI+8(%rsp)
+ movq %rsi, RSI+8(%rsp)
+ movq %rdx, RDX+8(%rsp)
+ movq %rcx, RCX+8(%rsp)
+ movq %rax, RAX+8(%rsp)
+ movq %r8, R8+8(%rsp)
+ movq %r9, R9+8(%rsp)
+ movq %r10, R10+8(%rsp)
+ movq %r11, R11+8(%rsp)
movq_cfi rbx, RBX+8
- movq_cfi rbp, RBP+8
- movq_cfi r12, R12+8
- movq_cfi r13, R13+8
- movq_cfi r14, R14+8
- movq_cfi r15, R15+8
+ movq %rbp, RBP+8(%rsp)
+ movq %r12, R12+8(%rsp)
+ movq %r13, R13+8(%rsp)
+ movq %r14, R14+8(%rsp)
+ movq %r15, R15+8(%rsp)
xorl %ebx,%ebx
testl $3,CS+8(%rsp)
je error_kernelspace
@@ -1426,6 +1425,7 @@ error_sti:
* compat mode. Check for these here too.
*/
error_kernelspace:
+ CFI_REL_OFFSET rcx, RCX+8
incl %ebx
leaq irq_return_iret(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index d5dd80814419..a9a4229f6161 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -375,7 +375,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
/*
* These bits must be zero.
*/
- xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+ memset(xsave_hdr->reserved, 0, 48);
return ret;
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 7f50156542fb..1e6cff5814fa 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -78,7 +78,7 @@ void __init init_ISA_irqs(void)
#endif
legacy_pic->init(0);
- for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
+ for (i = 0; i < nr_legacy_irqs(); i++)
irq_set_chip_and_handler_name(i, chip, handle_level_irq, name);
}
@@ -87,12 +87,6 @@ void __init init_IRQ(void)
int i;
/*
- * We probably need a better place for this, but it works for
- * now ...
- */
- x86_add_irq_domains();
-
- /*
* On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
@@ -100,7 +94,7 @@ void __init init_IRQ(void)
* then this vector space can be freed and re-used dynamically as the
* irq's migrate etc.
*/
- for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
+ for (i = 0; i < nr_legacy_irqs(); i++)
per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i;
x86_init.irqs.intr_init();
@@ -121,7 +115,7 @@ void setup_vector_irq(int cpu)
* legacy PIC, for the new cpu that is coming online, setup the static
* legacy vector to irq mapping:
*/
- for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++)
+ for (irq = 0; irq < nr_legacy_irqs(); irq++)
per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
#endif
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index d2b56489d70f..fde86d2b79f8 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/pci.h>
+#include <linux/irqdomain.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
@@ -67,7 +68,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
boot_cpu_physical_apicid = m->apicid;
}
- printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu);
+ pr_info("Processor #%d%s\n", m->apicid, bootup_cpu);
generic_processor_info(apicid, m->apicver);
}
@@ -87,9 +88,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
#if MAX_MP_BUSSES < 256
if (m->busid >= MAX_MP_BUSSES) {
- printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
- " is too large, max. supported is %d\n",
- m->busid, str, MAX_MP_BUSSES - 1);
+ pr_warn("MP table busid value (%d) for bustype %s is too large, max. supported is %d\n",
+ m->busid, str, MAX_MP_BUSSES - 1);
return;
}
#endif
@@ -110,19 +110,29 @@ static void __init MP_bus_info(struct mpc_bus *m)
mp_bus_id_to_type[m->busid] = MP_BUS_EISA;
#endif
} else
- printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+ pr_warn("Unknown bustype %s - ignoring\n", str);
}
+static struct irq_domain_ops mp_ioapic_irqdomain_ops = {
+ .map = mp_irqdomain_map,
+ .unmap = mp_irqdomain_unmap,
+};
+
static void __init MP_ioapic_info(struct mpc_ioapic *m)
{
+ struct ioapic_domain_cfg cfg = {
+ .type = IOAPIC_DOMAIN_LEGACY,
+ .ops = &mp_ioapic_irqdomain_ops,
+ };
+
if (m->flags & MPC_APIC_USABLE)
- mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
+ mp_register_ioapic(m->apicid, m->apicaddr, gsi_top, &cfg);
}
static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
{
- apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ apic_printk(APIC_VERBOSE,
+ "Int: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC INT %02x\n",
mp_irq->irqtype, mp_irq->irqflag & 3,
(mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
@@ -135,8 +145,8 @@ static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
{
- apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
- " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+ apic_printk(APIC_VERBOSE,
+ "Lint: type %d, pol %d, trig %d, bus %02x, IRQ %02x, APIC ID %x, APIC LINT %02x\n",
m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid,
m->srcbusirq, m->destapic, m->destapiclint);
}
@@ -148,34 +158,33 @@ static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
{
if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) {
- printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
+ pr_err("MPTABLE: bad signature [%c%c%c%c]!\n",
mpc->signature[0], mpc->signature[1],
mpc->signature[2], mpc->signature[3]);
return 0;
}
if (mpf_checksum((unsigned char *)mpc, mpc->length)) {
- printk(KERN_ERR "MPTABLE: checksum error!\n");
+ pr_err("MPTABLE: checksum error!\n");
return 0;
}
if (mpc->spec != 0x01 && mpc->spec != 0x04) {
- printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
- mpc->spec);
+ pr_err("MPTABLE: bad table version (%d)!!\n", mpc->spec);
return 0;
}
if (!mpc->lapic) {
- printk(KERN_ERR "MPTABLE: null local APIC address!\n");
+ pr_err("MPTABLE: null local APIC address!\n");
return 0;
}
memcpy(oem, mpc->oem, 8);
oem[8] = 0;
- printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
+ pr_info("MPTABLE: OEM ID: %s\n", oem);
memcpy(str, mpc->productid, 12);
str[12] = 0;
- printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
+ pr_info("MPTABLE: Product ID: %s\n", str);
- printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic);
+ pr_info("MPTABLE: APIC at: 0x%X\n", mpc->lapic);
return 1;
}
@@ -188,8 +197,8 @@ static void skip_entry(unsigned char **ptr, int *count, int size)
static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
{
- printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"
- "type %x\n", *mpt);
+ pr_err("Your mptable is wrong, contact your HW vendor!\n");
+ pr_cont("type %x\n", *mpt);
print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
1, mpc, mpc->length, 1);
}
@@ -259,7 +268,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
}
if (!num_processors)
- printk(KERN_ERR "MPTABLE: no processors registered!\n");
+ pr_err("MPTABLE: no processors registered!\n");
return num_processors;
}
@@ -295,16 +304,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
* If it does, we assume it's valid.
*/
if (mpc_default_type == 5) {
- printk(KERN_INFO "ISA/PCI bus type with no IRQ information... "
- "falling back to ELCR\n");
+ pr_info("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
ELCR_trigger(13))
- printk(KERN_ERR "ELCR contains invalid data... "
- "not using ELCR\n");
+ pr_err("ELCR contains invalid data... not using ELCR\n");
else {
- printk(KERN_INFO
- "Using ELCR to identify PCI interrupts\n");
+ pr_info("Using ELCR to identify PCI interrupts\n");
ELCR_fallback = 1;
}
}
@@ -353,7 +359,7 @@ static void __init construct_ioapic_table(int mpc_default_type)
bus.busid = 0;
switch (mpc_default_type) {
default:
- printk(KERN_ERR "???\nUnknown standard configuration %d\n",
+ pr_err("???\nUnknown standard configuration %d\n",
mpc_default_type);
/* fall through */
case 1:
@@ -462,8 +468,8 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 0;
#endif
- printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"
- "... disabling SMP support. (tell your hw vendor)\n");
+ pr_err("BIOS bug, MP table errors detected!...\n");
+ pr_cont("... disabling SMP support. (tell your hw vendor)\n");
early_iounmap(mpc, size);
return -1;
}
@@ -481,8 +487,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
if (!mp_irq_entries) {
struct mpc_bus bus;
- printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
- "using default mptable. (tell your hw vendor)\n");
+ pr_err("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
bus.type = MP_BUS;
bus.busid = 0;
@@ -516,14 +521,14 @@ void __init default_get_smp_config(unsigned int early)
if (acpi_lapic && acpi_ioapic)
return;
- printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
- mpf->specification);
+ pr_info("Intel MultiProcessor Specification v1.%d\n",
+ mpf->specification);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
if (mpf->feature2 & (1 << 7)) {
- printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
+ pr_info(" IMCR and PIC compatibility mode.\n");
pic_mode = 1;
} else {
- printk(KERN_INFO " Virtual Wire compatibility mode.\n");
+ pr_info(" Virtual Wire compatibility mode.\n");
pic_mode = 0;
}
#endif
@@ -539,8 +544,7 @@ void __init default_get_smp_config(unsigned int early)
return;
}
- printk(KERN_INFO "Default MP configuration #%d\n",
- mpf->feature1);
+ pr_info("Default MP configuration #%d\n", mpf->feature1);
construct_default_ISA_mptable(mpf->feature1);
} else if (mpf->physptr) {
@@ -550,7 +554,7 @@ void __init default_get_smp_config(unsigned int early)
BUG();
if (!early)
- printk(KERN_INFO "Processors: %d\n", num_processors);
+ pr_info("Processors: %d\n", num_processors);
/*
* Only use the first configuration found.
*/
@@ -583,10 +587,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
#endif
mpf_found = mpf;
- printk(KERN_INFO "found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
- (unsigned long long) virt_to_phys(mpf),
- (unsigned long long) virt_to_phys(mpf) +
- sizeof(*mpf) - 1, mpf);
+ pr_info("found SMP MP-table at [mem %#010llx-%#010llx] mapped at [%p]\n",
+ (unsigned long long) virt_to_phys(mpf),
+ (unsigned long long) virt_to_phys(mpf) +
+ sizeof(*mpf) - 1, mpf);
mem = virt_to_phys(mpf);
memblock_reserve(mem, sizeof(*mpf));
@@ -735,7 +739,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
int nr_m_spare = 0;
unsigned char *mpt = ((unsigned char *)mpc) + count;
- printk(KERN_INFO "mpc_length %x\n", mpc->length);
+ pr_info("mpc_length %x\n", mpc->length);
while (count < mpc->length) {
switch (*mpt) {
case MP_PROCESSOR:
@@ -862,13 +866,13 @@ static int __init update_mp_table(void)
if (!smp_check_mpc(mpc, oem, str))
return 0;
- printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf));
- printk(KERN_INFO "physptr: %x\n", mpf->physptr);
+ pr_info("mpf: %llx\n", (u64)virt_to_phys(mpf));
+ pr_info("physptr: %x\n", mpf->physptr);
if (mpc_new_phys && mpc->length > mpc_new_length) {
mpc_new_phys = 0;
- printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
- mpc_new_length);
+ pr_info("mpc_new_length is %ld, please use alloc_mptable=8k\n",
+ mpc_new_length);
}
if (!mpc_new_phys) {
@@ -879,10 +883,10 @@ static int __init update_mp_table(void)
mpc->checksum = 0xff;
new = mpf_checksum((unsigned char *)mpc, mpc->length);
if (old == new) {
- printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
+ pr_info("mpc is readonly, please try alloc_mptable instead\n");
return 0;
}
- printk(KERN_INFO "use in-position replacing\n");
+ pr_info("use in-position replacing\n");
} else {
mpf->physptr = mpc_new_phys;
mpc_new = phys_to_virt(mpc_new_phys);
@@ -892,7 +896,7 @@ static int __init update_mp_table(void)
if (mpc_new_phys - mpf->physptr) {
struct mpf_intel *mpf_new;
/* steal 16 bytes from [0, 1k) */
- printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
+ pr_info("mpf new: %x\n", 0x400 - 16);
mpf_new = phys_to_virt(0x400 - 16);
memcpy(mpf_new, mpf, 16);
mpf = mpf_new;
@@ -900,7 +904,7 @@ static int __init update_mp_table(void)
}
mpf->checksum = 0;
mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
- printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
+ pr_info("physptr new: %x\n", mpf->physptr);
}
/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4505e2a950d8..f804dc935d2a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -93,6 +93,7 @@ void arch_task_cache_init(void)
kmem_cache_create("task_xstate", xstate_size,
__alignof__(union thread_xstate),
SLAB_PANIC | SLAB_NOTRACK, NULL);
+ setup_xstate_comp();
}
/*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index ea030319b321..56b0c338061e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -234,9 +234,6 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
return ns;
}
-/* XXX surely we already have this someplace in the kernel?! */
-#define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d))
-
static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
{
unsigned long long tsc_now, ns_now;
@@ -259,7 +256,9 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
* time function is continuous; see the comment near struct
* cyc2ns_data.
*/
- data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz);
+ data->cyc2ns_mul =
+ DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR,
+ cpu_khz);
data->cyc2ns_shift = CYC2NS_SCALE_FACTOR;
data->cyc2ns_offset = ns_now -
mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a4b451c6addf..940b142cc11f 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -8,6 +8,7 @@
#include <linux/bootmem.h>
#include <linux/compat.h>
+#include <linux/cpu.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/sigframe.h>
@@ -24,7 +25,9 @@ u64 pcntxt_mask;
struct xsave_struct *init_xstate_buf;
static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
-static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
+static unsigned int *xstate_offsets, *xstate_sizes;
+static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
+static unsigned int xstate_features;
/*
* If a processor implementation discern that a processor state component is
@@ -283,7 +286,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
if (use_xsave()) {
/* These bits must be zero. */
- xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+ memset(xsave_hdr->reserved, 0, 48);
/*
* Init the state that is not present in the memory
@@ -479,6 +482,52 @@ static void __init setup_xstate_features(void)
}
/*
+ * This function sets up offsets and sizes of all extended states in
+ * xsave area. This supports both standard format and compacted format
+ * of the xsave aread.
+ *
+ * Input: void
+ * Output: void
+ */
+void setup_xstate_comp(void)
+{
+ unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8];
+ int i;
+
+ /*
+ * The FP xstates and SSE xstates are legacy states. They are always
+ * in the fixed offsets in the xsave area in either compacted form
+ * or standard form.
+ */
+ xstate_comp_offsets[0] = 0;
+ xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space);
+
+ if (!cpu_has_xsaves) {
+ for (i = 2; i < xstate_features; i++) {
+ if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
+ xstate_comp_offsets[i] = xstate_offsets[i];
+ xstate_comp_sizes[i] = xstate_sizes[i];
+ }
+ }
+ return;
+ }
+
+ xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+ for (i = 2; i < xstate_features; i++) {
+ if (test_bit(i, (unsigned long *)&pcntxt_mask))
+ xstate_comp_sizes[i] = xstate_sizes[i];
+ else
+ xstate_comp_sizes[i] = 0;
+
+ if (i > 2)
+ xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
+ + xstate_comp_sizes[i-1];
+
+ }
+}
+
+/*
* setup the xstate image representing the init state
*/
static void __init setup_init_fpu_buf(void)
@@ -496,15 +545,21 @@ static void __init setup_init_fpu_buf(void)
setup_xstate_features();
+ if (cpu_has_xsaves) {
+ init_xstate_buf->xsave_hdr.xcomp_bv =
+ (u64)1 << 63 | pcntxt_mask;
+ init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask;
+ }
+
/*
* Init all the features state with header_bv being 0x0
*/
- xrstor_state(init_xstate_buf, -1);
+ xrstor_state_booting(init_xstate_buf, -1);
/*
* Dump the init state again. This is to identify the init state
* of any feature which is not represented by all zero's.
*/
- xsave_state(init_xstate_buf, -1);
+ xsave_state_booting(init_xstate_buf, -1);
}
static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
@@ -520,6 +575,30 @@ static int __init eager_fpu_setup(char *s)
}
__setup("eagerfpu=", eager_fpu_setup);
+
+/*
+ * Calculate total size of enabled xstates in XCR0/pcntxt_mask.
+ */
+static void __init init_xstate_size(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ int i;
+
+ if (!cpu_has_xsaves) {
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ xstate_size = ebx;
+ return;
+ }
+
+ xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ for (i = 2; i < 64; i++) {
+ if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
+ cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+ xstate_size += eax;
+ }
+ }
+}
+
/*
* Enable and initialize the xsave feature.
*/
@@ -551,8 +630,7 @@ static void __init xstate_enable_boot_cpu(void)
/*
* Recompute the context size for enabled features
*/
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- xstate_size = ebx;
+ init_xstate_size();
update_regset_xstate_info(xstate_size, pcntxt_mask);
prepare_fx_sw_frame();
@@ -572,8 +650,9 @@ static void __init xstate_enable_boot_cpu(void)
}
}
- pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
- pcntxt_mask, xstate_size);
+ pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n",
+ pcntxt_mask, xstate_size,
+ cpu_has_xsaves ? "compacted form" : "standard form");
}
/*
@@ -635,3 +714,26 @@ void eager_fpu_init(void)
else
fxrstor_checking(&init_xstate_buf->i387);
}
+
+/*
+ * Given the xsave area and a state inside, this function returns the
+ * address of the state.
+ *
+ * This is the API that is called to get xstate address in either
+ * standard format or compacted format of xsave area.
+ *
+ * Inputs:
+ * xsave: base address of the xsave area;
+ * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE,
+ * etc.)
+ * Output:
+ * address of the state in the xsave area.
+ */
+void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
+{
+ int feature = fls64(xstate) - 1;
+ if (!test_bit(feature, (unsigned long *)&pcntxt_mask))
+ return NULL;
+
+ return (void *)xsave + xstate_comp_offsets[feature];
+}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 36642793e315..1dbade870f90 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -577,6 +577,8 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
static const char nx_warning[] = KERN_CRIT
"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
+static const char smep_warning[] = KERN_CRIT
+"unable to execute userspace code (SMEP?) (uid: %d)\n";
static void
show_fault_oops(struct pt_regs *regs, unsigned long error_code,
@@ -597,6 +599,10 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
if (pte && pte_present(*pte) && !pte_exec(*pte))
printk(nx_warning, from_kuid(&init_user_ns, current_uid()));
+ if (pte && pte_present(*pte) && pte_exec(*pte) &&
+ (pgd_flags(*pgd) & _PAGE_USER) &&
+ (read_cr4() & X86_CR4_SMEP))
+ printk(smep_warning, from_kuid(&init_user_ns, current_uid()));
}
printk(KERN_ALERT "BUG: unable to handle kernel ");
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 5075371ab593..cfd1b132b8e3 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -448,7 +448,7 @@ static void probe_pci_root_info(struct pci_root_info *info,
return;
size = sizeof(*info->res) * info->res_num;
- info->res = kzalloc(size, GFP_KERNEL);
+ info->res = kzalloc_node(size, GFP_KERNEL, info->sd.node);
if (!info->res) {
info->res_num = 0;
return;
@@ -456,7 +456,7 @@ static void probe_pci_root_info(struct pci_root_info *info,
size = sizeof(*info->res_offset) * info->res_num;
info->res_num = 0;
- info->res_offset = kzalloc(size, GFP_KERNEL);
+ info->res_offset = kzalloc_node(size, GFP_KERNEL, info->sd.node);
if (!info->res_offset) {
kfree(info->res);
info->res = NULL;
@@ -499,7 +499,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
if (node != NUMA_NO_NODE && !node_online(node))
node = NUMA_NO_NODE;
- info = kzalloc(sizeof(*info), GFP_KERNEL);
+ info = kzalloc_node(sizeof(*info), GFP_KERNEL, node);
if (!info) {
printk(KERN_WARNING "pci_bus %04x:%02x: "
"ignored (out of memory)\n", domain, busnum);
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 84b9d672843d..09fece368592 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -208,27 +208,31 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
static int intel_mid_pci_irq_enable(struct pci_dev *dev)
{
- u8 pin;
- struct io_apic_irq_attr irq_attr;
+ int polarity;
- pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
+ polarity = 0; /* active high */
+ else
+ polarity = 1; /* active low */
/*
* MRST only have IOAPIC, the PCI irq lines are 1:1 mapped to
* IOAPIC RTE entries, so we just enable RTE for the device.
*/
- irq_attr.ioapic = mp_find_ioapic(dev->irq);
- irq_attr.ioapic_pin = dev->irq;
- irq_attr.trigger = 1; /* level */
- if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
- irq_attr.polarity = 0; /* active high */
- else
- irq_attr.polarity = 1; /* active low */
- io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr);
+ if (mp_set_gsi_attr(dev->irq, 1, polarity, dev_to_node(&dev->dev)))
+ return -EBUSY;
+ if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0)
+ return -EBUSY;
return 0;
}
+static void intel_mid_pci_irq_disable(struct pci_dev *dev)
+{
+ if (dev->irq > 0)
+ mp_unmap_irq(dev->irq);
+}
+
struct pci_ops intel_mid_pci_ops = {
.read = pci_read,
.write = pci_write,
@@ -245,6 +249,7 @@ int __init intel_mid_pci_init(void)
pr_info("Intel MID platform detected, using MID PCI ops\n");
pci_mmcfg_late_init();
pcibios_enable_irq = intel_mid_pci_irq_enable;
+ pcibios_disable_irq = intel_mid_pci_irq_disable;
pci_root_ops = intel_mid_pci_ops;
pci_soc_mode = 1;
/* Continue with standard init */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 84112f55dd7a..748cfe8ab322 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -26,6 +26,7 @@ static int acer_tm360_irqrouting;
static struct irq_routing_table *pirq_table;
static int pirq_enable_irq(struct pci_dev *dev);
+static void pirq_disable_irq(struct pci_dev *dev);
/*
* Never use: 0, 1, 2 (timer, keyboard, and cascade)
@@ -53,7 +54,7 @@ struct irq_router_handler {
};
int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq;
-void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL;
+void (*pcibios_disable_irq)(struct pci_dev *dev) = pirq_disable_irq;
/*
* Check passed address for the PCI IRQ Routing Table signature
@@ -1186,7 +1187,7 @@ void pcibios_penalize_isa_irq(int irq, int active)
static int pirq_enable_irq(struct pci_dev *dev)
{
- u8 pin;
+ u8 pin = 0;
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (pin && !pcibios_lookup_irq(dev, 1)) {
@@ -1227,8 +1228,6 @@ static int pirq_enable_irq(struct pci_dev *dev)
}
dev = temp_dev;
if (irq >= 0) {
- io_apic_set_pci_routing(&dev->dev, irq,
- &irq_attr);
dev->irq = irq;
dev_info(&dev->dev, "PCI->APIC IRQ transform: "
"INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
@@ -1254,3 +1253,11 @@ static int pirq_enable_irq(struct pci_dev *dev)
}
return 0;
}
+
+static void pirq_disable_irq(struct pci_dev *dev)
+{
+ if (io_apic_assign_pci_irqs && dev->irq) {
+ mp_unmap_irq(dev->irq);
+ dev->irq = 0;
+ }
+}
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 905956f16465..093f5f4272d3 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -23,6 +23,7 @@
#include <xen/features.h>
#include <xen/events.h>
#include <asm/xen/pci.h>
+#include <asm/i8259.h>
static int xen_pcifront_enable_irq(struct pci_dev *dev)
{
@@ -40,7 +41,7 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev)
/* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/
pirq = gsi;
- if (gsi < NR_IRQS_LEGACY)
+ if (gsi < nr_legacy_irqs())
share = 0;
rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
@@ -511,7 +512,7 @@ int __init pci_xen_initial_domain(void)
xen_setup_acpi_sci();
__acpi_register_gsi = acpi_register_gsi_xen;
/* Pre-allocate legacy irqs */
- for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
+ for (irq = 0; irq < nr_legacy_irqs(); irq++) {
int trigger, polarity;
if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
@@ -522,7 +523,7 @@ int __init pci_xen_initial_domain(void)
true /* Map GSI to PIRQ */);
}
if (0 == nr_ioapics) {
- for (irq = 0; irq < NR_IRQS_LEGACY; irq++)
+ for (irq = 0; irq < nr_legacy_irqs(); irq++)
xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic");
}
return 0;
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 8244f5ec2f4c..701fd5843c87 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -135,14 +135,10 @@ static void __init sdv_arch_setup(void)
sdv_serial_fixup();
}
-#ifdef CONFIG_X86_IO_APIC
static void sdv_pci_init(void)
{
x86_of_pci_init();
- /* We can't set this earlier, because we need to calibrate the timer */
- legacy_pic = &null_legacy_pic;
}
-#endif
/*
* CE4100 specific x86_init function overrides and early setup
@@ -155,7 +151,9 @@ void __init x86_ce4100_early_setup(void)
x86_init.resources.probe_roms = x86_init_noop;
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
x86_init.pci.init = ce4100_pci_init;
+ x86_init.pci.init_irq = sdv_pci_init;
/*
* By default, the reboot method is ACPI which is supported by the
@@ -166,10 +164,5 @@ void __init x86_ce4100_early_setup(void)
*/
reboot_type = BOOT_KBD;
-#ifdef CONFIG_X86_IO_APIC
- x86_init.pci.init_irq = sdv_pci_init;
- x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
-#endif
-
pm_power_off = ce4100_power_off;
}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
index 973cf3bfa9fd..0b283d4d0ad7 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
@@ -26,28 +26,18 @@ static struct platform_device wdt_dev = {
static int tangier_probe(struct platform_device *pdev)
{
- int ioapic;
- int irq;
+ int gsi;
struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
- struct io_apic_irq_attr irq_attr = { 0 };
if (!pdata)
return -EINVAL;
- irq = pdata->irq;
- ioapic = mp_find_ioapic(irq);
- if (ioapic >= 0) {
- int ret;
- irq_attr.ioapic = ioapic;
- irq_attr.ioapic_pin = irq;
- irq_attr.trigger = 1;
- /* irq_attr.polarity = 0; -> Active high */
- ret = io_apic_set_pci_routing(NULL, irq, &irq_attr);
- if (ret)
- return ret;
- } else {
+ /* IOAPIC builds identity mapping between GSI and IRQ on MID */
+ gsi = pdata->irq;
+ if (mp_set_gsi_attr(gsi, 1, 0, cpu_to_node(0)) ||
+ mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC) <= 0) {
dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n",
- irq);
+ gsi);
return -EINVAL;
}
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 994c40bd7cb7..3c53a90fdb18 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -432,9 +432,8 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
struct sfi_table_simple *sb;
struct sfi_device_table_entry *pentry;
struct devs_id *dev = NULL;
- int num, i;
- int ioapic;
- struct io_apic_irq_attr irq_attr;
+ int num, i, ret;
+ int polarity;
sb = (struct sfi_table_simple *)table;
num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
@@ -448,35 +447,30 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
* devices, but they have separate RTE entry in IOAPIC
* so we have to enable them one by one here
*/
- ioapic = mp_find_ioapic(irq);
- if (ioapic >= 0) {
- irq_attr.ioapic = ioapic;
- irq_attr.ioapic_pin = irq;
- irq_attr.trigger = 1;
- if (intel_mid_identify_cpu() ==
- INTEL_MID_CPU_CHIP_TANGIER) {
- if (!strncmp(pentry->name,
- "r69001-ts-i2c", 13))
- /* active low */
- irq_attr.polarity = 1;
- else if (!strncmp(pentry->name,
- "synaptics_3202", 14))
- /* active low */
- irq_attr.polarity = 1;
- else if (irq == 41)
- /* fast_int_1 */
- irq_attr.polarity = 1;
- else
- /* active high */
- irq_attr.polarity = 0;
- } else {
- /* PNW and CLV go with active low */
- irq_attr.polarity = 1;
- }
- io_apic_set_pci_routing(NULL, irq, &irq_attr);
+ if (intel_mid_identify_cpu() ==
+ INTEL_MID_CPU_CHIP_TANGIER) {
+ if (!strncmp(pentry->name, "r69001-ts-i2c", 13))
+ /* active low */
+ polarity = 1;
+ else if (!strncmp(pentry->name,
+ "synaptics_3202", 14))
+ /* active low */
+ polarity = 1;
+ else if (irq == 41)
+ /* fast_int_1 */
+ polarity = 1;
+ else
+ /* active high */
+ polarity = 0;
+ } else {
+ /* PNW and CLV go with active low */
+ polarity = 1;
}
- } else {
- irq = 0; /* No irq */
+
+ ret = mp_set_gsi_attr(irq, 1, polarity, NUMA_NO_NODE);
+ if (ret == 0)
+ ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC);
+ WARN_ON(ret < 0);
}
dev = get_device_id(pentry->type, pentry->name);
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index bcd1a703e3e6..2a8a74f3bd76 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -25,6 +25,7 @@
#include <linux/init.h>
#include <linux/sfi.h>
#include <linux/io.h>
+#include <linux/irqdomain.h>
#include <asm/io_apic.h>
#include <asm/mpspec.h>
@@ -70,19 +71,26 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table)
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
+static struct irq_domain_ops sfi_ioapic_irqdomain_ops = {
+ .map = mp_irqdomain_map,
+};
static int __init sfi_parse_ioapic(struct sfi_table_header *table)
{
struct sfi_table_simple *sb;
struct sfi_apic_table_entry *pentry;
int i, num;
+ struct ioapic_domain_cfg cfg = {
+ .type = IOAPIC_DOMAIN_STRICT,
+ .ops = &sfi_ioapic_irqdomain_ops,
+ };
sb = (struct sfi_table_simple *)table;
num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
pentry = (struct sfi_apic_table_entry *)sb->pentry;
for (i = 0; i < num; i++) {
- mp_register_ioapic(i, pentry->phys_addr, gsi_top);
+ mp_register_ioapic(i, pentry->phys_addr, gsi_top, &cfg);
pentry++;
}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index dfe605ac1bcd..ed161c6e278b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1,7 +1,7 @@
/*
* SGI UltraViolet TLB flush routines.
*
- * (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI.
+ * (c) 2008-2014 Cliff Wickman <cpw@sgi.com>, SGI.
*
* This code is released under the GNU General Public License version 2 or
* later.
@@ -451,7 +451,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
/*
* The reverse of the above; converts a duration in ns to a duration in cycles.
- */
+ */
static inline unsigned long long ns_2_cycles(unsigned long long ns)
{
struct cyc2ns_data *data = cyc2ns_read_begin();
@@ -563,7 +563,7 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
* UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
* But not currently used.
*/
-static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
+static unsigned long uv2_3_read_status(unsigned long offset, int rshft, int desc)
{
unsigned long descriptor_status;
@@ -606,7 +606,7 @@ int handle_uv2_busy(struct bau_control *bcp)
return FLUSH_GIVEUP;
}
-static int uv2_wait_completion(struct bau_desc *bau_desc,
+static int uv2_3_wait_completion(struct bau_desc *bau_desc,
unsigned long mmr_offset, int right_shift,
struct bau_control *bcp, long try)
{
@@ -616,7 +616,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
long busy_reps = 0;
struct ptc_stats *stat = bcp->statp;
- descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
+ descriptor_stat = uv2_3_read_status(mmr_offset, right_shift, desc);
/* spin on the status MMR, waiting for it to go idle */
while (descriptor_stat != UV2H_DESC_IDLE) {
@@ -658,8 +658,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
/* not to hammer on the clock */
busy_reps = 0;
ttm = get_cycles();
- if ((ttm - bcp->send_message) >
- bcp->timeout_interval)
+ if ((ttm - bcp->send_message) > bcp->timeout_interval)
return handle_uv2_busy(bcp);
}
/*
@@ -667,8 +666,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
*/
cpu_relax();
}
- descriptor_stat = uv2_read_status(mmr_offset, right_shift,
- desc);
+ descriptor_stat = uv2_3_read_status(mmr_offset, right_shift, desc);
}
bcp->conseccompletes++;
return FLUSH_COMPLETE;
@@ -679,8 +677,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
* which register to read and position in that register based on cpu in
* current hub.
*/
-static int wait_completion(struct bau_desc *bau_desc,
- struct bau_control *bcp, long try)
+static int wait_completion(struct bau_desc *bau_desc, struct bau_control *bcp, long try)
{
int right_shift;
unsigned long mmr_offset;
@@ -695,11 +692,9 @@ static int wait_completion(struct bau_desc *bau_desc,
}
if (bcp->uvhub_version == 1)
- return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
- bcp, try);
+ return uv1_wait_completion(bau_desc, mmr_offset, right_shift, bcp, try);
else
- return uv2_wait_completion(bau_desc, mmr_offset, right_shift,
- bcp, try);
+ return uv2_3_wait_completion(bau_desc, mmr_offset, right_shift, bcp, try);
}
/*
@@ -888,7 +883,7 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
struct ptc_stats *stat = bcp->statp;
struct bau_control *hmaster = bcp->uvhub_master;
struct uv1_bau_msg_header *uv1_hdr = NULL;
- struct uv2_bau_msg_header *uv2_hdr = NULL;
+ struct uv2_3_bau_msg_header *uv2_3_hdr = NULL;
if (bcp->uvhub_version == 1) {
uv1 = 1;
@@ -902,27 +897,28 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
if (uv1)
uv1_hdr = &bau_desc->header.uv1_hdr;
else
- uv2_hdr = &bau_desc->header.uv2_hdr;
+ /* uv2 and uv3 */
+ uv2_3_hdr = &bau_desc->header.uv2_3_hdr;
do {
if (try == 0) {
if (uv1)
uv1_hdr->msg_type = MSG_REGULAR;
else
- uv2_hdr->msg_type = MSG_REGULAR;
+ uv2_3_hdr->msg_type = MSG_REGULAR;
seq_number = bcp->message_number++;
} else {
if (uv1)
uv1_hdr->msg_type = MSG_RETRY;
else
- uv2_hdr->msg_type = MSG_RETRY;
+ uv2_3_hdr->msg_type = MSG_RETRY;
stat->s_retry_messages++;
}
if (uv1)
uv1_hdr->sequence = seq_number;
else
- uv2_hdr->sequence = seq_number;
+ uv2_3_hdr->sequence = seq_number;
index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
bcp->send_message = get_cycles();
@@ -1080,8 +1076,10 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
* done. The returned pointer is valid till preemption is re-enabled.
*/
const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm, unsigned long start,
- unsigned long end, unsigned int cpu)
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end,
+ unsigned int cpu)
{
int locals = 0;
int remotes = 0;
@@ -1268,6 +1266,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
if (bcp->uvhub_version == 2)
process_uv2_message(&msgdesc, bcp);
else
+ /* no error workaround for uv1 or uv3 */
bau_process_message(&msgdesc, bcp, 1);
msg++;
@@ -1325,8 +1324,12 @@ static void __init enable_timeouts(void)
*/
mmr_image |= (1L << SOFTACK_MSHIFT);
if (is_uv2_hub()) {
+ /* do not touch the legacy mode bit */
/* hw bug workaround; do not use extended status */
mmr_image &= ~(1L << UV2_EXT_SHFT);
+ } else if (is_uv3_hub()) {
+ mmr_image &= ~(1L << PREFETCH_HINT_SHFT);
+ mmr_image |= (1L << SB_STATUS_SHFT);
}
write_mmr_misc_control(pnode, mmr_image);
}
@@ -1692,7 +1695,7 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
struct bau_desc *bau_desc;
struct bau_desc *bd2;
struct uv1_bau_msg_header *uv1_hdr;
- struct uv2_bau_msg_header *uv2_hdr;
+ struct uv2_3_bau_msg_header *uv2_3_hdr;
struct bau_control *bcp;
/*
@@ -1739,15 +1742,15 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
*/
} else {
/*
- * BIOS uses legacy mode, but UV2 hardware always
+ * BIOS uses legacy mode, but uv2 and uv3 hardware always
* uses native mode for selective broadcasts.
*/
- uv2_hdr = &bd2->header.uv2_hdr;
- uv2_hdr->swack_flag = 1;
- uv2_hdr->base_dest_nasid =
+ uv2_3_hdr = &bd2->header.uv2_3_hdr;
+ uv2_3_hdr->swack_flag = 1;
+ uv2_3_hdr->base_dest_nasid =
UV_PNODE_TO_NASID(base_pnode);
- uv2_hdr->dest_subnodeid = UV_LB_SUBNODEID;
- uv2_hdr->command = UV_NET_ENDPOINT_INTD;
+ uv2_3_hdr->dest_subnodeid = UV_LB_SUBNODEID;
+ uv2_3_hdr->command = UV_NET_ENDPOINT_INTD;
}
}
for_each_present_cpu(cpu) {
@@ -1858,6 +1861,7 @@ static int calculate_destination_timeout(void)
ts_ns *= (mult1 * mult2);
ret = ts_ns / 1000;
} else {
+ /* same destination timeout for uv2 and uv3 */
/* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
@@ -2012,8 +2016,10 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
bcp->uvhub_version = 1;
else if (is_uv2_hub())
bcp->uvhub_version = 2;
+ else if (is_uv3_hub())
+ bcp->uvhub_version = 3;
else {
- printk(KERN_EMERG "uvhub version not 1 or 2\n");
+ printk(KERN_EMERG "uvhub version not 1, 2 or 3\n");
return 1;
}
bcp->uvhub_master = *hmasterp;
@@ -2138,9 +2144,10 @@ static int __init uv_bau_init(void)
}
vector = UV_BAU_MESSAGE;
- for_each_possible_blade(uvhub)
+ for_each_possible_blade(uvhub) {
if (uv_blade_nr_possible_cpus(uvhub))
init_uvhub(uvhub, vector, uv_base_pnode);
+ }
alloc_intr_gate(vector, uv_bau_message_intr1);
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 61b04fe36e66..5a4affe025e8 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -10,7 +10,7 @@ VDSO32-$(CONFIG_X86_32) := y
VDSO32-$(CONFIG_COMPAT) := y
# files to link into the vdso
-vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
# files to link into kernel
obj-y += vma.o
@@ -37,7 +37,8 @@ vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
obj-y += $(vdso_img_objs)
targets += $(vdso_img_cfiles)
targets += $(vdso_img_sodbg)
-.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c)
+.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
+ $(vdso_img-y:%=$(obj)/vdso%.so)
export CPPFLAGS_vdso.lds += -P -C
@@ -54,10 +55,10 @@ hostprogs-y += vdso2c
quiet_cmd_vdso2c = VDSO2C $@
define cmd_vdso2c
- $(obj)/vdso2c $< $@
+ $(obj)/vdso2c $< $(<:%.dbg=%) $@
endef
-$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE
+$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
$(call if_changed,vdso2c)
#
@@ -113,6 +114,10 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
targets += vdsox32.lds $(vobjx32s-y)
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg
+ $(call if_changed,objcopy)
+
$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
$(call if_changed,vdso)
@@ -134,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
targets += vdso32/vdso32.lds
targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
-targets += vdso32/vclock_gettime.o vdso32/vdso-fakesections.o
+targets += vdso32/vclock_gettime.o
$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
@@ -156,7 +161,6 @@ $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
$(obj)/vdso32/vdso32.lds \
$(obj)/vdso32/vclock_gettime.o \
- $(obj)/vdso32/vdso-fakesections.o \
$(obj)/vdso32/note.o \
$(obj)/vdso32/%.o
$(call if_changed,vdso)
diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c
deleted file mode 100644
index aa5fbfab20a5..000000000000
--- a/arch/x86/vdso/vdso-fakesections.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright 2014 Andy Lutomirski
- * Subject to the GNU Public License, v.2
- *
- * String table for loadable section headers. See vdso2c.h for why
- * this exists.
- */
-
-const char fake_shstrtab[] __attribute__((section(".fake_shstrtab"))) =
- ".hash\0"
- ".dynsym\0"
- ".dynstr\0"
- ".gnu.version\0"
- ".gnu.version_d\0"
- ".dynamic\0"
- ".rodata\0"
- ".fake_shstrtab\0" /* Yay, self-referential code. */
- ".note\0"
- ".eh_frame_hdr\0"
- ".eh_frame\0"
- ".text";
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 9197544eea9a..de2c921025f5 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -18,6 +18,25 @@
SECTIONS
{
+ /*
+ * User/kernel shared data is before the vDSO. This may be a little
+ * uglier than putting it after the vDSO, but it avoids issues with
+ * non-allocatable things that dangle past the end of the PT_LOAD
+ * segment.
+ */
+
+ vvar_start = . - 2 * PAGE_SIZE;
+ vvar_page = vvar_start;
+
+ /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
+#define __VVAR_KERNEL_LDS
+#include <asm/vvar.h>
+#undef __VVAR_KERNEL_LDS
+#undef EMIT_VVAR
+
+ hpet_page = vvar_start + PAGE_SIZE;
+
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
@@ -74,31 +93,6 @@ SECTIONS
.altinstructions : { *(.altinstructions) } :text
.altinstr_replacement : { *(.altinstr_replacement) } :text
- /*
- * The remainder of the vDSO consists of special pages that are
- * shared between the kernel and userspace. It needs to be at the
- * end so that it doesn't overlap the mapping of the actual
- * vDSO image.
- */
-
- . = ALIGN(PAGE_SIZE);
- vvar_page = .;
-
- /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
-#define __VVAR_KERNEL_LDS
-#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
-#undef EMIT_VVAR
-
- . = vvar_page + PAGE_SIZE;
-
- hpet_page = .;
- . = . + PAGE_SIZE;
-
- . = ALIGN(PAGE_SIZE);
- end_mapping = .;
-
/DISCARD/ : {
*(.discard)
*(.discard.*)
diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c
index 238dbe82776e..8627db24a7f6 100644
--- a/arch/x86/vdso/vdso2c.c
+++ b/arch/x86/vdso/vdso2c.c
@@ -1,3 +1,53 @@
+/*
+ * vdso2c - A vdso image preparation tool
+ * Copyright (c) 2014 Andy Lutomirski and others
+ * Licensed under the GPL v2
+ *
+ * vdso2c requires stripped and unstripped input. It would be trivial
+ * to fully strip the input in here, but, for reasons described below,
+ * we need to write a section table. Doing this is more or less
+ * equivalent to dropping all non-allocatable sections, but it's
+ * easier to let objcopy handle that instead of doing it ourselves.
+ * If we ever need to do something fancier than what objcopy provides,
+ * it would be straightforward to add here.
+ *
+ * We're keep a section table for a few reasons:
+ *
+ * The Go runtime had a couple of bugs: it would read the section
+ * table to try to figure out how many dynamic symbols there were (it
+ * shouldn't have looked at the section table at all) and, if there
+ * were no SHT_SYNDYM section table entry, it would use an
+ * uninitialized value for the number of symbols. An empty DYNSYM
+ * table would work, but I see no reason not to write a valid one (and
+ * keep full performance for old Go programs). This hack is only
+ * needed on x86_64.
+ *
+ * The bug was introduced on 2012-08-31 by:
+ * https://code.google.com/p/go/source/detail?r=56ea40aac72b
+ * and was fixed on 2014-06-13 by:
+ * https://code.google.com/p/go/source/detail?r=fc1cd5e12595
+ *
+ * Binutils has issues debugging the vDSO: it reads the section table to
+ * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
+ * would break build-id if we removed the section table. Binutils
+ * also requires that shstrndx != 0. See:
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
+ *
+ * elfutils might not look for PT_NOTE if there is a section table at
+ * all. I don't know whether this matters for any practical purpose.
+ *
+ * For simplicity, rather than hacking up a partial section table, we
+ * just write a mostly complete one. We omit non-dynamic symbols,
+ * though, since they're rather large.
+ *
+ * Once binutils gets fixed, we might be able to drop this for all but
+ * the 64-bit vdso, since build-id only works in kernel RPMs, and
+ * systems that update to new enough kernel RPMs will likely update
+ * binutils in sync. build-id has never worked for home-built kernel
+ * RPMs without manual symlinking, and I suspect that no one ever does
+ * that.
+ */
+
#include <inttypes.h>
#include <stdint.h>
#include <unistd.h>
@@ -20,9 +70,9 @@ const char *outfilename;
/* Symbols that we need in vdso2c. */
enum {
+ sym_vvar_start,
sym_vvar_page,
sym_hpet_page,
- sym_end_mapping,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
};
@@ -38,9 +88,9 @@ struct vdso_sym {
};
struct vdso_sym required_syms[] = {
+ [sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true},
- [sym_end_mapping] = {"end_mapping", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false
},
@@ -61,7 +111,8 @@ static void fail(const char *format, ...)
va_start(ap, format);
fprintf(stderr, "Error: ");
vfprintf(stderr, format, ap);
- unlink(outfilename);
+ if (outfilename)
+ unlink(outfilename);
exit(1);
va_end(ap);
}
@@ -96,9 +147,11 @@ extern void bad_put_le(void);
#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
-#define BITSFUNC3(name, bits) name##bits
-#define BITSFUNC2(name, bits) BITSFUNC3(name, bits)
-#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS)
+#define BITSFUNC3(name, bits, suffix) name##bits##suffix
+#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
+#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
+
+#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
@@ -112,30 +165,53 @@ extern void bad_put_le(void);
#include "vdso2c.h"
#undef ELF_BITS
-static void go(void *addr, size_t len, FILE *outfile, const char *name)
+static void go(void *raw_addr, size_t raw_len,
+ void *stripped_addr, size_t stripped_len,
+ FILE *outfile, const char *name)
{
- Elf64_Ehdr *hdr = (Elf64_Ehdr *)addr;
+ Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
- go64(addr, len, outfile, name);
+ go64(raw_addr, raw_len, stripped_addr, stripped_len,
+ outfile, name);
} else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
- go32(addr, len, outfile, name);
+ go32(raw_addr, raw_len, stripped_addr, stripped_len,
+ outfile, name);
} else {
fail("unknown ELF class\n");
}
}
+static void map_input(const char *name, void **addr, size_t *len, int prot)
+{
+ off_t tmp_len;
+
+ int fd = open(name, O_RDONLY);
+ if (fd == -1)
+ err(1, "%s", name);
+
+ tmp_len = lseek(fd, 0, SEEK_END);
+ if (tmp_len == (off_t)-1)
+ err(1, "lseek");
+ *len = (size_t)tmp_len;
+
+ *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
+ if (*addr == MAP_FAILED)
+ err(1, "mmap");
+
+ close(fd);
+}
+
int main(int argc, char **argv)
{
- int fd;
- off_t len;
- void *addr;
+ size_t raw_len, stripped_len;
+ void *raw_addr, *stripped_addr;
FILE *outfile;
char *name, *tmp;
int namelen;
- if (argc != 3) {
- printf("Usage: vdso2c INPUT OUTPUT\n");
+ if (argc != 4) {
+ printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
return 1;
}
@@ -143,7 +219,7 @@ int main(int argc, char **argv)
* Figure out the struct name. If we're writing to a .so file,
* generate raw output insted.
*/
- name = strdup(argv[2]);
+ name = strdup(argv[3]);
namelen = strlen(name);
if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
name = NULL;
@@ -159,26 +235,18 @@ int main(int argc, char **argv)
*tmp = '_';
}
- fd = open(argv[1], O_RDONLY);
- if (fd == -1)
- err(1, "%s", argv[1]);
-
- len = lseek(fd, 0, SEEK_END);
- if (len == (off_t)-1)
- err(1, "lseek");
-
- addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
- if (addr == MAP_FAILED)
- err(1, "mmap");
+ map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
+ map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
- outfilename = argv[2];
+ outfilename = argv[3];
outfile = fopen(outfilename, "w");
if (!outfile)
err(1, "%s", argv[2]);
- go(addr, (size_t)len, outfile, name);
+ go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
- munmap(addr, len);
+ munmap(raw_addr, raw_len);
+ munmap(stripped_addr, stripped_len);
fclose(outfile);
return 0;
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index 11b65d4f9414..fd57829b30d8 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -4,139 +4,23 @@
* are built for 32-bit userspace.
*/
-/*
- * We're writing a section table for a few reasons:
- *
- * The Go runtime had a couple of bugs: it would read the section
- * table to try to figure out how many dynamic symbols there were (it
- * shouldn't have looked at the section table at all) and, if there
- * were no SHT_SYNDYM section table entry, it would use an
- * uninitialized value for the number of symbols. An empty DYNSYM
- * table would work, but I see no reason not to write a valid one (and
- * keep full performance for old Go programs). This hack is only
- * needed on x86_64.
- *
- * The bug was introduced on 2012-08-31 by:
- * https://code.google.com/p/go/source/detail?r=56ea40aac72b
- * and was fixed on 2014-06-13 by:
- * https://code.google.com/p/go/source/detail?r=fc1cd5e12595
- *
- * Binutils has issues debugging the vDSO: it reads the section table to
- * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
- * would break build-id if we removed the section table. Binutils
- * also requires that shstrndx != 0. See:
- * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
- *
- * elfutils might not look for PT_NOTE if there is a section table at
- * all. I don't know whether this matters for any practical purpose.
- *
- * For simplicity, rather than hacking up a partial section table, we
- * just write a mostly complete one. We omit non-dynamic symbols,
- * though, since they're rather large.
- *
- * Once binutils gets fixed, we might be able to drop this for all but
- * the 64-bit vdso, since build-id only works in kernel RPMs, and
- * systems that update to new enough kernel RPMs will likely update
- * binutils in sync. build-id has never worked for home-built kernel
- * RPMs without manual symlinking, and I suspect that no one ever does
- * that.
- */
-struct BITSFUNC(fake_sections)
-{
- ELF(Shdr) *table;
- unsigned long table_offset;
- int count, max_count;
-
- int in_shstrndx;
- unsigned long shstr_offset;
- const char *shstrtab;
- size_t shstrtab_len;
-
- int out_shstrndx;
-};
-
-static unsigned int BITSFUNC(find_shname)(struct BITSFUNC(fake_sections) *out,
- const char *name)
-{
- const char *outname = out->shstrtab;
- while (outname - out->shstrtab < out->shstrtab_len) {
- if (!strcmp(name, outname))
- return (outname - out->shstrtab) + out->shstr_offset;
- outname += strlen(outname) + 1;
- }
-
- if (*name)
- printf("Warning: could not find output name \"%s\"\n", name);
- return out->shstr_offset + out->shstrtab_len - 1; /* Use a null. */
-}
-
-static void BITSFUNC(init_sections)(struct BITSFUNC(fake_sections) *out)
-{
- if (!out->in_shstrndx)
- fail("didn't find the fake shstrndx\n");
-
- memset(out->table, 0, out->max_count * sizeof(ELF(Shdr)));
-
- if (out->max_count < 1)
- fail("we need at least two fake output sections\n");
-
- PUT_LE(&out->table[0].sh_type, SHT_NULL);
- PUT_LE(&out->table[0].sh_name, BITSFUNC(find_shname)(out, ""));
-
- out->count = 1;
-}
-
-static void BITSFUNC(copy_section)(struct BITSFUNC(fake_sections) *out,
- int in_idx, const ELF(Shdr) *in,
- const char *name)
-{
- uint64_t flags = GET_LE(&in->sh_flags);
-
- bool copy = flags & SHF_ALLOC &&
- (GET_LE(&in->sh_size) ||
- (GET_LE(&in->sh_type) != SHT_RELA &&
- GET_LE(&in->sh_type) != SHT_REL)) &&
- strcmp(name, ".altinstructions") &&
- strcmp(name, ".altinstr_replacement");
-
- if (!copy)
- return;
-
- if (out->count >= out->max_count)
- fail("too many copied sections (max = %d)\n", out->max_count);
-
- if (in_idx == out->in_shstrndx)
- out->out_shstrndx = out->count;
-
- out->table[out->count] = *in;
- PUT_LE(&out->table[out->count].sh_name,
- BITSFUNC(find_shname)(out, name));
-
- /* elfutils requires that a strtab have the correct type. */
- if (!strcmp(name, ".fake_shstrtab"))
- PUT_LE(&out->table[out->count].sh_type, SHT_STRTAB);
-
- out->count++;
-}
-
-static void BITSFUNC(go)(void *addr, size_t len,
+static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
+ void *stripped_addr, size_t stripped_len,
FILE *outfile, const char *name)
{
int found_load = 0;
unsigned long load_size = -1; /* Work around bogus warning */
- unsigned long data_size;
- ELF(Ehdr) *hdr = (ELF(Ehdr) *)addr;
+ unsigned long mapping_size;
+ ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
int i;
unsigned long j;
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
*alt_sec = NULL;
ELF(Dyn) *dyn = 0, *dyn_end = 0;
const char *secstrings;
- uint64_t syms[NSYMS] = {};
-
- struct BITSFUNC(fake_sections) fake_sections = {};
+ INT_BITS syms[NSYMS] = {};
- ELF(Phdr) *pt = (ELF(Phdr) *)(addr + GET_LE(&hdr->e_phoff));
+ ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
/* Walk the segment table. */
for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
@@ -154,14 +38,16 @@ static void BITSFUNC(go)(void *addr, size_t len,
load_size = GET_LE(&pt[i].p_memsz);
found_load = 1;
} else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) {
- dyn = addr + GET_LE(&pt[i].p_offset);
- dyn_end = addr + GET_LE(&pt[i].p_offset) +
+ dyn = raw_addr + GET_LE(&pt[i].p_offset);
+ dyn_end = raw_addr + GET_LE(&pt[i].p_offset) +
GET_LE(&pt[i].p_memsz);
}
}
if (!found_load)
fail("no PT_LOAD seg\n");
- data_size = (load_size + 4095) / 4096 * 4096;
+
+ if (stripped_len < load_size)
+ fail("stripped input is too short\n");
/* Walk the dynamic table */
for (i = 0; dyn + i < dyn_end &&
@@ -173,11 +59,11 @@ static void BITSFUNC(go)(void *addr, size_t len,
}
/* Walk the section table */
- secstrings_hdr = addr + GET_LE(&hdr->e_shoff) +
+ secstrings_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx);
- secstrings = addr + GET_LE(&secstrings_hdr->sh_offset);
+ secstrings = raw_addr + GET_LE(&secstrings_hdr->sh_offset);
for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
- ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) +
+ ELF(Shdr) *sh = raw_addr + GET_LE(&hdr->e_shoff) +
GET_LE(&hdr->e_shentsize) * i;
if (GET_LE(&sh->sh_type) == SHT_SYMTAB)
symtab_hdr = sh;
@@ -190,7 +76,7 @@ static void BITSFUNC(go)(void *addr, size_t len,
if (!symtab_hdr)
fail("no symbol table\n");
- strtab_hdr = addr + GET_LE(&hdr->e_shoff) +
+ strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
/* Walk the symbol table */
@@ -198,9 +84,9 @@ static void BITSFUNC(go)(void *addr, size_t len,
i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
i++) {
int k;
- ELF(Sym) *sym = addr + GET_LE(&symtab_hdr->sh_offset) +
+ ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
GET_LE(&symtab_hdr->sh_entsize) * i;
- const char *name = addr + GET_LE(&strtab_hdr->sh_offset) +
+ const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
GET_LE(&sym->st_name);
for (k = 0; k < NSYMS; k++) {
@@ -209,51 +95,17 @@ static void BITSFUNC(go)(void *addr, size_t len,
fail("duplicate symbol %s\n",
required_syms[k].name);
}
+
+ /*
+ * Careful: we use negative addresses, but
+ * st_value is unsigned, so we rely
+ * on syms[k] being a signed type of the
+ * correct width.
+ */
syms[k] = GET_LE(&sym->st_value);
}
}
-
- if (!strcmp(name, "fake_shstrtab")) {
- ELF(Shdr) *sh;
-
- fake_sections.in_shstrndx = GET_LE(&sym->st_shndx);
- fake_sections.shstrtab = addr + GET_LE(&sym->st_value);
- fake_sections.shstrtab_len = GET_LE(&sym->st_size);
- sh = addr + GET_LE(&hdr->e_shoff) +
- GET_LE(&hdr->e_shentsize) *
- fake_sections.in_shstrndx;
- fake_sections.shstr_offset = GET_LE(&sym->st_value) -
- GET_LE(&sh->sh_addr);
- }
- }
-
- /* Build the output section table. */
- if (!syms[sym_VDSO_FAKE_SECTION_TABLE_START] ||
- !syms[sym_VDSO_FAKE_SECTION_TABLE_END])
- fail("couldn't find fake section table\n");
- if ((syms[sym_VDSO_FAKE_SECTION_TABLE_END] -
- syms[sym_VDSO_FAKE_SECTION_TABLE_START]) % sizeof(ELF(Shdr)))
- fail("fake section table size isn't a multiple of sizeof(Shdr)\n");
- fake_sections.table = addr + syms[sym_VDSO_FAKE_SECTION_TABLE_START];
- fake_sections.table_offset = syms[sym_VDSO_FAKE_SECTION_TABLE_START];
- fake_sections.max_count = (syms[sym_VDSO_FAKE_SECTION_TABLE_END] -
- syms[sym_VDSO_FAKE_SECTION_TABLE_START]) /
- sizeof(ELF(Shdr));
-
- BITSFUNC(init_sections)(&fake_sections);
- for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
- ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) +
- GET_LE(&hdr->e_shentsize) * i;
- BITSFUNC(copy_section)(&fake_sections, i, sh,
- secstrings + GET_LE(&sh->sh_name));
}
- if (!fake_sections.out_shstrndx)
- fail("didn't generate shstrndx?!?\n");
-
- PUT_LE(&hdr->e_shoff, fake_sections.table_offset);
- PUT_LE(&hdr->e_shentsize, sizeof(ELF(Shdr)));
- PUT_LE(&hdr->e_shnum, fake_sections.count);
- PUT_LE(&hdr->e_shstrndx, fake_sections.out_shstrndx);
/* Validate mapping addresses. */
for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
@@ -263,21 +115,23 @@ static void BITSFUNC(go)(void *addr, size_t len,
if (syms[i] % 4096)
fail("%s must be a multiple of 4096\n",
required_syms[i].name);
- if (syms[i] < data_size)
- fail("%s must be after the text mapping\n",
+ if (syms[sym_vvar_start] > syms[i] + 4096)
+ fail("%s underruns begin_vvar\n",
required_syms[i].name);
- if (syms[sym_end_mapping] < syms[i] + 4096)
- fail("%s overruns end_mapping\n",
+ if (syms[i] + 4096 > 0)
+ fail("%s is on the wrong side of the vdso text\n",
required_syms[i].name);
}
- if (syms[sym_end_mapping] % 4096)
- fail("end_mapping must be a multiple of 4096\n");
+ if (syms[sym_vvar_start] % 4096)
+ fail("vvar_begin must be a multiple of 4096\n");
if (!name) {
- fwrite(addr, load_size, 1, outfile);
+ fwrite(stripped_addr, stripped_len, 1, outfile);
return;
}
+ mapping_size = (stripped_len + 4095) / 4096 * 4096;
+
fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
fprintf(outfile, "#include <linux/linkage.h>\n");
fprintf(outfile, "#include <asm/page_types.h>\n");
@@ -285,20 +139,21 @@ static void BITSFUNC(go)(void *addr, size_t len,
fprintf(outfile, "\n");
fprintf(outfile,
"static unsigned char raw_data[%lu] __page_aligned_data = {",
- data_size);
- for (j = 0; j < load_size; j++) {
+ mapping_size);
+ for (j = 0; j < stripped_len; j++) {
if (j % 10 == 0)
fprintf(outfile, "\n\t");
- fprintf(outfile, "0x%02X, ", (int)((unsigned char *)addr)[j]);
+ fprintf(outfile, "0x%02X, ",
+ (int)((unsigned char *)stripped_addr)[j]);
}
fprintf(outfile, "\n};\n\n");
fprintf(outfile, "static struct page *pages[%lu];\n\n",
- data_size / 4096);
+ mapping_size / 4096);
fprintf(outfile, "const struct vdso_image %s = {\n", name);
fprintf(outfile, "\t.data = raw_data,\n");
- fprintf(outfile, "\t.size = %lu,\n", data_size);
+ fprintf(outfile, "\t.size = %lu,\n", mapping_size);
fprintf(outfile, "\t.text_mapping = {\n");
fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
fprintf(outfile, "\t\t.pages = pages,\n");
@@ -311,8 +166,8 @@ static void BITSFUNC(go)(void *addr, size_t len,
}
for (i = 0; i < NSYMS; i++) {
if (required_syms[i].export && syms[i])
- fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n",
- required_syms[i].name, syms[i]);
+ fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
+ required_syms[i].name, (int64_t)syms[i]);
}
fprintf(outfile, "};\n");
}
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 5a5176de8d0a..dbef622bb5af 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -93,7 +93,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- unsigned long addr;
+ unsigned long addr, text_start;
int ret = 0;
static struct page *no_pages[] = {NULL};
static struct vm_special_mapping vvar_mapping = {
@@ -103,26 +103,28 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
- image->sym_end_mapping);
+ image->size - image->sym_vvar_start);
} else {
addr = 0;
}
down_write(&mm->mmap_sem);
- addr = get_unmapped_area(NULL, addr, image->sym_end_mapping, 0, 0);
+ addr = get_unmapped_area(NULL, addr,
+ image->size - image->sym_vvar_start, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
- current->mm->context.vdso = (void __user *)addr;
+ text_start = addr - image->sym_vvar_start;
+ current->mm->context.vdso = (void __user *)text_start;
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*/
vma = _install_special_mapping(mm,
- addr,
+ text_start,
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
@@ -134,8 +136,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
}
vma = _install_special_mapping(mm,
- addr + image->size,
- image->sym_end_mapping - image->size,
+ addr,
+ -image->sym_vvar_start,
VM_READ,
&vvar_mapping);
@@ -146,7 +148,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
if (image->sym_vvar_page)
ret = remap_pfn_range(vma,
- addr + image->sym_vvar_page,
+ text_start + image->sym_vvar_page,
__pa_symbol(&__vvar_page) >> PAGE_SHIFT,
PAGE_SIZE,
PAGE_READONLY);
@@ -157,7 +159,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
#ifdef CONFIG_HPET_TIMER
if (hpet_address && image->sym_hpet_page) {
ret = io_remap_pfn_range(vma,
- addr + image->sym_hpet_page,
+ text_start + image->sym_hpet_page,
hpet_address >> PAGE_SHIFT,
PAGE_SIZE,
pgprot_noncached(PAGE_READONLY));