summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2011-03-01 16:00:39 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2011-03-01 16:00:39 +1100
commit97481cb6a44e877579b686e92e8f89fb8588b76e (patch)
tree76094d084d924ea191110926fac7bcc16b5af267 /arch
parent1bf277bdb1cd998bd02c9a5b457c3b211086d961 (diff)
parent6a254c0c7bdfa26245777bee5d63cdfa6bba1ff5 (diff)
Merge remote-tracking branch 'tip/auto-latest'
Conflicts: arch/x86/kernel/acpi/sleep.c
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/rwsem.h36
-rw-r--r--arch/alpha/kernel/time.c8
-rw-r--r--arch/arm/kernel/time.c4
-rw-r--r--arch/arm/mach-clps711x/include/mach/time.h2
-rw-r--r--arch/blackfin/kernel/time.c6
-rw-r--r--arch/cris/arch-v10/kernel/time.c4
-rw-r--r--arch/cris/arch-v32/kernel/time.c6
-rw-r--r--arch/frv/kernel/time.c14
-rw-r--r--arch/h8300/kernel/time.c4
-rw-r--r--arch/h8300/kernel/timer/timer8.c2
-rw-r--r--arch/ia64/include/asm/rwsem.h37
-rw-r--r--arch/ia64/kernel/time.c19
-rw-r--r--arch/ia64/xen/time.c13
-rw-r--r--arch/m32r/kernel/time.c5
-rw-r--r--arch/m68k/bvme6000/config.c4
-rw-r--r--arch/m68k/kernel/time.c4
-rw-r--r--arch/m68k/mvme147/config.c4
-rw-r--r--arch/m68k/mvme16x/config.c4
-rw-r--r--arch/m68k/sun3/sun3ints.c2
-rw-r--r--arch/m68knommu/kernel/time.c8
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/mn10300/kernel/time.c6
-rw-r--r--arch/parisc/kernel/time.c7
-rw-r--r--arch/powerpc/include/asm/rwsem.h51
-rw-r--r--arch/s390/include/asm/rwsem.h63
-rw-r--r--arch/sh/include/asm/rwsem.h56
-rw-r--r--arch/sparc/include/asm/rwsem.h46
-rw-r--r--arch/sparc/kernel/pcic.c4
-rw-r--r--arch/sparc/kernel/time_32.c9
-rw-r--r--arch/sparc/lib/atomic32.c2
-rw-r--r--arch/um/Kconfig.common1
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/um/kernel/irq.c31
-rw-r--r--arch/x86/Kconfig30
-rw-r--r--arch/x86/ia32/ia32entry.S1
-rw-r--r--arch/x86/include/asm/acpi.h14
-rw-r--r--arch/x86/include/asm/amd_nb.h14
-rw-r--r--arch/x86/include/asm/apic.h38
-rw-r--r--arch/x86/include/asm/apicdef.h12
-rw-r--r--arch/x86/include/asm/bootparam.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/entry_arch.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h24
-rw-r--r--arch/x86/include/asm/init.h6
-rw-r--r--arch/x86/include/asm/io_apic.h44
-rw-r--r--arch/x86/include/asm/ipi.h8
-rw-r--r--arch/x86/include/asm/irq.h3
-rw-r--r--arch/x86/include/asm/irq_controller.h12
-rw-r--r--arch/x86/include/asm/irq_vectors.h45
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mpspec.h3
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/numa.h52
-rw-r--r--arch/x86/include/asm/numa_32.h7
-rw-r--r--arch/x86/include/asm/numa_64.h23
-rw-r--r--arch/x86/include/asm/olpc_ofw.h14
-rw-r--r--arch/x86/include/asm/page_types.h11
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/prom.h70
-rw-r--r--arch/x86/include/asm/reboot.h5
-rw-r--r--arch/x86/include/asm/rwsem.h80
-rw-r--r--arch/x86/include/asm/segment.h12
-rw-r--r--arch/x86/include/asm/smp.h10
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/topology.h19
-rw-r--r--arch/x86/include/asm/trampoline.h33
-rw-r--r--arch/x86/include/asm/unistd_32.h3
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c8
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.S21
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.h5
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.lds.S28
-rw-r--r--arch/x86/kernel/acpi/sleep.c64
-rw-r--r--arch/x86/kernel/acpi/sleep.h3
-rw-r--r--arch/x86/kernel/acpi/wakeup_rm.S10
-rw-r--r--arch/x86/kernel/amd_nb.c84
-rw-r--r--arch/x86/kernel/apb_timer.c60
-rw-r--r--arch/x86/kernel/aperture_64.c33
-rw-r--r--arch/x86/kernel/apic/apic.c87
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/apic_noop.c26
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c34
-rw-r--r--arch/x86/kernel/apic/es7000_32.c35
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c265
-rw-r--r--arch/x86/kernel/apic/ipi.c12
-rw-r--r--arch/x86/kernel/apic/numaq_32.c21
-rw-r--r--arch/x86/kernel/apic/probe_32.c10
-rw-r--r--arch/x86/kernel/apic/summit_32.c47
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/apm_32.c12
-rw-r--r--arch/x86/kernel/asm-offsets.c65
-rw-r--r--arch/x86/kernel/asm-offsets_32.c69
-rw-r--r--arch/x86/kernel/asm-offsets_64.c90
-rw-r--r--arch/x86/kernel/cpu/amd.c61
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c80
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c86
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c175
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/devicetree.c441
-rw-r--r--arch/x86/kernel/dumpstack.c25
-rw-r--r--arch/x86/kernel/e820.c18
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/head32.c9
-rw-r--r--arch/x86/kernel/head_32.S10
-rw-r--r--arch/x86/kernel/head_64.S3
-rw-r--r--arch/x86/kernel/ioport.c20
-rw-r--r--arch/x86/kernel/irq.c11
-rw-r--r--arch/x86/kernel/irqinit.c88
-rw-r--r--arch/x86/kernel/kgdb.c9
-rw-r--r--arch/x86/kernel/microcode_amd.c188
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/reboot.c120
-rw-r--r--arch/x86/kernel/reboot_32.S135
-rw-r--r--arch/x86/kernel/rtc.c3
-rw-r--r--arch/x86/kernel/setup.c93
-rw-r--r--arch/x86/kernel/setup_percpu.c11
-rw-r--r--arch/x86/kernel/smpboot.c134
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--arch/x86/kernel/trampoline.c42
-rw-r--r--arch/x86/kernel/trampoline_32.S15
-rw-r--r--arch/x86/kernel/trampoline_64.S28
-rw-r--r--arch/x86/kernel/vmlinux.lds.S15
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/lib/memmove_64.S197
-rw-r--r--arch/x86/lib/memmove_64.c192
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/amdtopology_64.c142
-rw-r--r--arch/x86/mm/init.c56
-rw-r--r--arch/x86/mm/init_32.c11
-rw-r--r--arch/x86/mm/init_64.c126
-rw-r--r--arch/x86/mm/numa.c212
-rw-r--r--arch/x86/mm/numa_32.c10
-rw-r--r--arch/x86/mm/numa_64.c967
-rw-r--r--arch/x86/mm/numa_emulation.c475
-rw-r--r--arch/x86/mm/numa_internal.h31
-rw-r--r--arch/x86/mm/srat_32.c6
-rw-r--r--arch/x86/mm/srat_64.c367
-rw-r--r--arch/x86/mm/tlb.c11
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/ce4100.c2
-rw-r--r--arch/x86/platform/ce4100/ce4100.c24
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts428
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/mrst/vrtc.c16
-rw-r--r--arch/x86/platform/olpc/Makefile4
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--arch/x86/xen/xen-head.S4
-rw-r--r--arch/xtensa/include/asm/rwsem.h37
-rw-r--r--arch/xtensa/kernel/time.c6
164 files changed, 4101 insertions, 3297 deletions
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 1570c0b54336..a83bbea62c67 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -13,44 +13,13 @@
#ifdef __KERNEL__
#include <linux/compiler.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
static inline void __down_read(struct rw_semaphore *sem)
{
@@ -250,10 +219,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
#endif
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index c1f3e7cb82a4..a58e84f1a63b 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -159,7 +159,7 @@ void read_persistent_clock(struct timespec *ts)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
irqreturn_t timer_interrupt(int irq, void *dev)
{
@@ -172,8 +172,6 @@ irqreturn_t timer_interrupt(int irq, void *dev)
profile_tick(CPU_PROFILING);
#endif
- write_seqlock(&xtime_lock);
-
/*
* Calculate how many ticks have passed since the last update,
* including any previous partial leftover. Save any resulting
@@ -187,9 +185,7 @@ irqreturn_t timer_interrupt(int irq, void *dev)
nticks = delta >> FIX_SHIFT;
if (nticks)
- do_timer(nticks);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(nticks);
if (test_irq_work_pending()) {
clear_irq_work_pending();
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 3d76bf233734..1ff46cabc7ef 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -107,9 +107,7 @@ void timer_tick(void)
{
profile_tick(CPU_PROFILING);
do_leds();
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
#endif
diff --git a/arch/arm/mach-clps711x/include/mach/time.h b/arch/arm/mach-clps711x/include/mach/time.h
index 8fe283ccd1f3..61fef9129c6a 100644
--- a/arch/arm/mach-clps711x/include/mach/time.h
+++ b/arch/arm/mach-clps711x/include/mach/time.h
@@ -30,7 +30,7 @@ p720t_timer_interrupt(int irq, void *dev_id)
{
struct pt_regs *regs = get_irq_regs();
do_leds();
- do_timer(1);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(regs));
#endif
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index c9113619029f..8d73724c0092 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -114,16 +114,14 @@ u32 arch_gettimeoffset(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
#ifdef CONFIG_CORE_TIMER_IRQ_L1
__attribute__((l1_text))
#endif
irqreturn_t timer_interrupt(int irq, void *dummy)
{
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifdef CONFIG_IPIPE
update_root_process_times(get_irq_regs());
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 00eb36f8debf..20c85b5dc7d0 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -140,7 +140,7 @@ stop_watchdog(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
//static unsigned short myjiff; /* used by our debug routine print_timestamp */
@@ -176,7 +176,7 @@ timer_interrupt(int irq, void *dev_id)
/* call the real timer interrupt handler */
- do_timer(1);
+ xtime_update(1);
cris_do_profile(regs); /* Save profiling information */
return IRQ_HANDLED;
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index a545211e999d..bb978ede8985 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -183,7 +183,7 @@ void handle_watchdog_bite(struct pt_regs *regs)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick.
+ * as well as call the "xtime_update()" routine every clocktick.
*/
extern void cris_do_profile(struct pt_regs *regs);
@@ -216,9 +216,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
/* Call the real timer interrupt handler */
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
return IRQ_HANDLED;
}
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 0ddbbae83cb2..b457de496b70 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -50,21 +50,13 @@ static struct irqaction timer_irq = {
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dummy)
{
profile_tick(CPU_PROFILING);
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don't need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
- do_timer(1);
+ xtime_update(1);
#ifdef CONFIG_HEARTBEAT
static unsigned short n;
@@ -72,8 +64,6 @@ static irqreturn_t timer_interrupt(int irq, void *dummy)
__set_LEDS(n);
#endif /* CONFIG_HEARTBEAT */
- write_sequnlock(&xtime_lock);
-
update_process_times(user_mode(get_irq_regs()));
return IRQ_HANDLED;
diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c
index 165005aff9df..32263a138aa6 100644
--- a/arch/h8300/kernel/time.c
+++ b/arch/h8300/kernel/time.c
@@ -35,9 +35,7 @@ void h8300_timer_tick(void)
{
if (current->pid)
profile_tick(CPU_PROFILING);
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
}
diff --git a/arch/h8300/kernel/timer/timer8.c b/arch/h8300/kernel/timer/timer8.c
index 3946c0fa8374..7a1533fad47d 100644
--- a/arch/h8300/kernel/timer/timer8.c
+++ b/arch/h8300/kernel/timer/timer8.c
@@ -61,7 +61,7 @@
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 215d5454c7d3..3027e7516d85 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -25,20 +25,8 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
#include <asm/intrinsics.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
#define RWSEM_ACTIVE_BIAS (1L)
#define RWSEM_ACTIVE_MASK (0xffffffffL)
@@ -46,26 +34,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void
-init_rwsem (struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
/*
* lock for reading
*/
@@ -174,9 +142,4 @@ __downgrade_write (struct rw_semaphore *sem)
#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 9702fa92489e..156ad803d5b7 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -190,19 +190,10 @@ timer_interrupt (int irq, void *dev_id)
new_itm += local_cpu_data->itm_delta;
- if (smp_processor_id() == time_keeper_id) {
- /*
- * Here we are in the timer irq handler. We have irqs locally
- * disabled, but we don't know if the timer_bh is running on
- * another CPU. We need to avoid to SMP race by acquiring the
- * xtime_lock.
- */
- write_seqlock(&xtime_lock);
- do_timer(1);
- local_cpu_data->itm_next = new_itm;
- write_sequnlock(&xtime_lock);
- } else
- local_cpu_data->itm_next = new_itm;
+ if (smp_processor_id() == time_keeper_id)
+ xtime_update(1);
+
+ local_cpu_data->itm_next = new_itm;
if (time_after(new_itm, ia64_get_itc()))
break;
@@ -222,7 +213,7 @@ skip_process_time_accounting:
* comfort, we increase the safety margin by
* intentionally dropping the next tick(s). We do NOT
* update itm.next because that would force us to call
- * do_timer() which in turn would let our clock run
+ * xtime_update() which in turn would let our clock run
* too fast (with the potentially devastating effect
* of losing monotony of time).
*/
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index c1c544513e8d..1f8244a78bee 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -139,14 +139,11 @@ consider_steal_time(unsigned long new_itm)
run_posix_cpu_timers(p);
delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
- if (cpu == time_keeper_id) {
- write_seqlock(&xtime_lock);
- do_timer(stolen + blocked);
- local_cpu_data->itm_next = delta_itm + new_itm;
- write_sequnlock(&xtime_lock);
- } else {
- local_cpu_data->itm_next = delta_itm + new_itm;
- }
+ if (cpu == time_keeper_id)
+ xtime_update(stolen + blocked);
+
+ local_cpu_data->itm_next = delta_itm + new_itm;
+
per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
}
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index bda86820bffd..84dd04048db9 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -107,15 +107,14 @@ u32 arch_gettimeoffset(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
#ifndef CONFIG_SMP
profile_tick(CPU_PROFILING);
#endif
- /* XXX FIXME. Uh, the xtime_lock should be held here, no? */
- do_timer(1);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 9fe6fefb5e14..1edd95095cb4 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -45,8 +45,8 @@ extern int bvme6000_set_clock_mmss (unsigned long);
extern void bvme6000_reset (void);
void bvme6000_set_vectors (void);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via bvme6000_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/timer/timekeeping.c, called via bvme6000_process_int() */
static irq_handler_t tick_handler;
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 06438dac08ff..18b34ee5db3b 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -37,11 +37,11 @@ static inline int set_rtc_mmss(unsigned long nowtime)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dummy)
{
- do_timer(1);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
profile_tick(CPU_PROFILING);
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 100baaa692a1..6cb9c3a9b6c9 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -46,8 +46,8 @@ extern void mvme147_reset (void);
static int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via mvme147_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/time/timekeeping.c, called via mvme147_process_int() */
irq_handler_t tick_handler;
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 11edf61cc2c4..0b28e2621653 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -51,8 +51,8 @@ extern void mvme16x_reset (void);
int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via mvme16x_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/time/timekeeping.c, called via mvme16x_process_int() */
static irq_handler_t tick_handler;
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 2d9e21bd313a..6464ad3ae3e6 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -66,7 +66,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id)
#ifdef CONFIG_SUN3
intersil_clear();
#endif
- do_timer(1);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
if (!(kstat_cpu(0).irqs[irq] % 20))
sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]);
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index d6ac2a43453c..6623909f70e6 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -36,7 +36,7 @@ static inline int set_rtc_mmss(unsigned long nowtime)
#ifndef CONFIG_GENERIC_CLOCKEVENTS
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
irqreturn_t arch_timer_interrupt(int irq, void *dummy)
{
@@ -44,11 +44,7 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
if (current->pid)
profile_tick(CPU_PROFILING);
- write_seqlock(&xtime_lock);
-
- do_timer(1);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6a1fdfef8fde..ab52b7cf3b6b 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -148,9 +148,9 @@ struct {
spinlock_t tc_list_lock;
struct list_head tc_list; /* Thread contexts */
} vpecontrol = {
- .vpe_list_lock = SPIN_LOCK_UNLOCKED,
+ .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock),
.vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list),
- .tc_list_lock = SPIN_LOCK_UNLOCKED,
+ .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock),
.tc_list = LIST_HEAD_INIT(vpecontrol.tc_list)
};
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index 75da468090b9..5b955000626d 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -104,8 +104,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
unsigned tsc, elapse;
irqreturn_t ret;
- write_seqlock(&xtime_lock);
-
while (tsc = get_cycles(),
elapse = tsc - mn10300_last_tsc, /* time elapsed since last
* tick */
@@ -114,11 +112,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
mn10300_last_tsc += MN10300_TSC_PER_HZ;
/* advance the kernel's time tracking system */
- do_timer(1);
+ xtime_update(1);
}
- write_sequnlock(&xtime_lock);
-
ret = local_timer_interrupt();
#ifdef CONFIG_SMP
send_IPI_allbutself(LOCAL_TIMER_IPI);
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 05511ccb61d2..45b7389d77aa 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -162,11 +162,8 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
update_process_times(user_mode(get_irq_regs()));
}
- if (cpu == 0) {
- write_seqlock(&xtime_lock);
- do_timer(ticks_elapsed);
- write_sequnlock(&xtime_lock);
- }
+ if (cpu == 0)
+ xtime_update(ticks_elapsed);
return IRQ_HANDLED;
}
diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h
index 8447d89fbe72..bb1e2cdeb9bf 100644
--- a/arch/powerpc/include/asm/rwsem.h
+++ b/arch/powerpc/include/asm/rwsem.h
@@ -13,11 +13,6 @@
* by Paul Mackerras <paulus@samba.org>.
*/
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
/*
* the semaphore definition
*/
@@ -33,47 +28,6 @@
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-struct rw_semaphore {
- long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, \
- __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
- do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
- } while (0)
-
/*
* lock for reading
*/
@@ -174,10 +128,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return sem->count != 0;
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_RWSEM_H */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 423fdda2322d..d0eb4653cebd 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -43,29 +43,6 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
#ifndef __s390x__
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
@@ -81,41 +58,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
/*
- * initialisation
- */
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-
-/*
* lock for reading
*/
static inline void __down_read(struct rw_semaphore *sem)
@@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return new;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _S390_RWSEM_H */
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
index 06e2251a5e48..edab57265293 100644
--- a/arch/sh/include/asm/rwsem.h
+++ b/arch/sh/include/asm/rwsem.h
@@ -11,64 +11,13 @@
#endif
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -179,10 +128,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
index a2b4302869bc..069bf4d663a1 100644
--- a/arch/sparc/include/asm/rwsem.h
+++ b/arch/sparc/include/asm/rwsem.h
@@ -13,53 +13,12 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000L
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_ACTIVE_MASK 0xffffffffL
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
/*
* lock for reading
@@ -160,11 +119,6 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index aeaa09a3c655..2cdc131b50ac 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -700,10 +700,8 @@ static void pcic_clear_clock_irq(void)
static irqreturn_t pcic_timer_handler (int irq, void *h)
{
- write_seqlock(&xtime_lock); /* Dummy, to show that we remember */
pcic_clear_clock_irq();
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
#endif
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 55e014ce0635..8237dd4dfeb4 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -85,7 +85,7 @@ int update_persistent_clock(struct timespec now)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
#define TICK_SIZE (tick_nsec / 1000)
@@ -96,14 +96,9 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id)
profile_tick(CPU_PROFILING);
#endif
- /* Protect counter clear so that do_gettimeoffset works */
- write_seqlock(&xtime_lock);
-
clear_clock_irq();
- do_timer(1);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index cbddeb38ffda..d3c7a12ad879 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -16,7 +16,7 @@
#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])
spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = {
- [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
+ [0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)
};
#else /* SMP */
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index e351e14b4339..1e78940218c0 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
bool
default y
select HAVE_GENERIC_HARDIRQS
+ select GENERIC_HARDIRQS_NO_DEPRECATED
config MMU
bool
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ba4a98ba39c0..620f5b70957d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -185,7 +185,7 @@ struct ubd {
.no_cow = 0, \
.shared = 0, \
.cow = DEFAULT_COW, \
- .lock = SPIN_LOCK_UNLOCKED, \
+ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
.request = NULL, \
.start_sg = 0, \
.end_sg = 0, \
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 3f0ac9e0c966..64cfea80cfe2 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -35,8 +35,10 @@ int show_interrupts(struct seq_file *p, void *v)
}
if (i < NR_IRQS) {
- raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
+ struct irq_desc *desc = irq_to_desc(i);
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
if (!action)
goto skip;
seq_printf(p, "%3d: ",i);
@@ -46,7 +48,7 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
- seq_printf(p, " %14s", irq_desc[i].chip->name);
+ seq_printf(p, " %14s", get_irq_desc_chip(desc)->name);
seq_printf(p, " %s", action->name);
for (action=action->next; action; action = action->next)
@@ -54,7 +56,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
skip:
- raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
} else if (i == NR_IRQS)
seq_putc(p, '\n');
@@ -360,10 +362,10 @@ EXPORT_SYMBOL(um_request_irq);
EXPORT_SYMBOL(reactivate_fd);
/*
- * irq_chip must define (startup || enable) &&
- * (shutdown || disable) && end
+ * irq_chip must define at least enable/disable and ack when
+ * the edge handler is used.
*/
-static void dummy(unsigned int irq)
+static void dummy(struct irq_data *d)
{
}
@@ -371,20 +373,17 @@ static void dummy(unsigned int irq)
static struct irq_chip normal_irq_type = {
.name = "SIGIO",
.release = free_irq_by_irq_and_dev,
- .disable = dummy,
- .enable = dummy,
- .ack = dummy,
- .end = dummy
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
};
static struct irq_chip SIGVTALRM_irq_type = {
.name = "SIGVTALRM",
.release = free_irq_by_irq_and_dev,
- .shutdown = dummy, /* never called */
- .disable = dummy,
- .enable = dummy,
- .ack = dummy,
- .end = dummy
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
};
void __init init_IRQ(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9137e5b39a25..b7de058a9881 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -217,10 +217,6 @@ config X86_HT
def_bool y
depends on SMP
-config X86_TRAMPOLINE
- def_bool y
- depends on SMP || (64BIT && ACPI_SLEEP)
-
config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !CC_STACKPROTECTOR
@@ -382,6 +378,8 @@ config X86_INTEL_CE
depends on X86_32
depends on X86_EXTENDED_PLATFORM
select X86_REBOOTFIXUPS
+ select OF
+ select OF_EARLY_FLATTREE
---help---
Select for the Intel CE media processor (CE4100) SOC.
This option compiles in support for the CE4100 SOC for settop
@@ -811,7 +809,7 @@ config X86_LOCAL_APIC
config X86_IO_APIC
def_bool y
- depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
+ depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
config X86_VISWS_APIC
def_bool y
@@ -1705,7 +1703,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
depends on NUMA
config USE_PERCPU_NUMA_NODE_ID
- def_bool X86_64
+ def_bool y
depends on NUMA
menu "Power management and ACPI options"
@@ -2066,9 +2064,10 @@ config SCx200HR_TIMER
config OLPC
bool "One Laptop Per Child support"
+ depends on !X86_PAE
select GPIOLIB
- select OLPC_OPENFIRMWARE
- depends on !X86_64 && !X86_PAE
+ select OF
+ select OF_PROMTREE if PROC_DEVICETREE
---help---
Add support for detecting the unique features of the OLPC
XO hardware.
@@ -2079,21 +2078,6 @@ config OLPC_XO1
---help---
Add support for non-essential features of the OLPC XO-1 laptop.
-config OLPC_OPENFIRMWARE
- bool "Support for OLPC's Open Firmware"
- depends on !X86_64 && !X86_PAE
- default n
- select OF
- help
- This option adds support for the implementation of Open Firmware
- that is used on the OLPC XO-1 Children's Machine.
- If unsure, say N here.
-
-config OLPC_OPENFIRMWARE_DT
- bool
- default y if OLPC_OPENFIRMWARE && PROC_DEVICETREE
- select OF_PROMTREE
-
endif # X86_32
config AMD_NB
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 518bb99c3394..0ed78961b60f 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -851,4 +851,5 @@ ia32_sys_call_table:
.quad sys_fanotify_init
.quad sys32_fanotify_mark
.quad sys_prlimit64 /* 340 */
+ .quad compat_sys_clock_adjtime
ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index ef14da1f4ec5..12e0e7dd869c 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -29,6 +29,7 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
+#include <asm/trampoline.h>
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
@@ -116,7 +117,8 @@ static inline void acpi_disable_pci(void)
/* Low-level suspend routine. */
extern int acpi_suspend_lowlevel(void);
-extern unsigned long acpi_wakeup_address;
+extern const unsigned char acpi_wakeup_code[];
+#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
/* early initialization routine */
extern void acpi_reserve_wakeup_memory(void);
@@ -185,15 +187,7 @@ struct bootnode;
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
-extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
- unsigned long end);
-extern int acpi_scan_nodes(unsigned long start, unsigned long end);
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
-#ifdef CONFIG_NUMA_EMU
-extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
- int num_nodes);
-#endif
+extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
#define acpi_unlazy_tlb(x) leave_mm(x)
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 64dc82ee19f0..e264ae5a1443 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -9,23 +9,20 @@ struct amd_nb_bus_dev_range {
u8 dev_limit;
};
-extern struct pci_device_id amd_nb_misc_ids[];
+extern const struct pci_device_id amd_nb_misc_ids[];
extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
struct bootnode;
extern int early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
-extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
-extern int amd_scan_nodes(void);
-
-#ifdef CONFIG_NUMA_EMU
-extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
-extern void amd_get_nodes(struct bootnode *nodes);
-#endif
+extern int amd_numa_init(void);
+extern int amd_get_subcaches(int);
+extern int amd_set_subcaches(int, int);
struct amd_northbridge {
struct pci_dev *misc;
+ struct pci_dev *link;
};
struct amd_northbridge_info {
@@ -37,6 +34,7 @@ extern struct amd_northbridge_info amd_northbridges;
#define AMD_NB_GART 0x1
#define AMD_NB_L3_INDEX_DISABLE 0x2
+#define AMD_NB_L3_PARTITIONING 0x4
#ifdef CONFIG_AMD_NB
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3c896946f4cc..dbd558cbd2b1 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -240,7 +240,7 @@ extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
extern int APIC_init_uniprocessor(void);
extern void enable_NMI_through_LVT0(void);
-extern int apic_force_enable(void);
+extern int apic_force_enable(unsigned long addr);
/*
* On 32bit this is mach-xxx local
@@ -307,8 +307,6 @@ struct apic {
void (*setup_apic_routing)(void);
int (*multi_timer_check)(int apic, int irq);
- int (*apicid_to_node)(int logical_apicid);
- int (*cpu_to_logical_apicid)(int cpu);
int (*cpu_present_to_apicid)(int mps_cpu);
void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
void (*setup_portio_remap)(void);
@@ -356,6 +354,23 @@ struct apic {
void (*icr_write)(u32 low, u32 high);
void (*wait_icr_idle)(void);
u32 (*safe_wait_icr_idle)(void);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Called very early during boot from get_smp_config(). It should
+ * return the logical apicid. x86_[bios]_cpu_to_apicid is
+ * initialized before this function is called.
+ *
+ * If logical apicid can't be determined that early, the function
+ * may return BAD_APICID. Logical apicid will be configured after
+ * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity
+ * won't be applied properly during early boot in this case.
+ */
+ int (*x86_32_early_logical_apicid)(int cpu);
+
+ /* determine CPU -> NUMA node mapping */
+ int (*x86_32_numa_cpu_node)(int cpu);
+#endif
};
/*
@@ -503,6 +518,11 @@ extern struct apic apic_noop;
extern struct apic apic_default;
+static inline int noop_x86_32_early_logical_apicid(int cpu)
+{
+ return BAD_APICID;
+}
+
/*
* Set up the logical destination ID.
*
@@ -522,7 +542,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
return cpuid_apic >> index_msb;
}
-extern int default_apicid_to_node(int logical_apicid);
+extern int default_x86_32_numa_cpu_node(int cpu);
#endif
@@ -558,12 +578,6 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma
*retmap = *phys_map;
}
-/* Mapping from cpu number to logical apicid */
-static inline int default_cpu_to_logical_apicid(int cpu)
-{
- return 1 << cpu;
-}
-
static inline int __default_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
@@ -596,8 +610,4 @@ extern int default_check_phys_apicid_present(int phys_apicid);
#endif /* CONFIG_X86_LOCAL_APIC */
-#ifdef CONFIG_X86_32
-extern u8 cpu_2_logical_apicid[NR_CPUS];
-#endif
-
#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 47a30ff8e517..d87988bacf3e 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -426,4 +426,16 @@ struct local_apic {
#else
#define BAD_APICID 0xFFFFu
#endif
+
+enum ioapic_irq_destination_types {
+ dest_Fixed = 0,
+ dest_LowestPrio = 1,
+ dest_SMI = 2,
+ dest__reserved_1 = 3,
+ dest_NMI = 4,
+ dest_INIT = 5,
+ dest__reserved_2 = 6,
+ dest_ExtINT = 7
+};
+
#endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index c8bfe63a06de..e020d88ec02d 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -12,6 +12,7 @@
/* setup data types */
#define SETUP_NONE 0
#define SETUP_E820_EXT 1
+#define SETUP_DTB 2
/* extensible setup data list node */
struct setup_data {
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 220e2ea08e80..91f3e087cf21 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -160,6 +160,7 @@
#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
+#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index e99d55d74df5..908b96957d88 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -96,7 +96,7 @@ extern void e820_setup_gap(void);
extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
unsigned long start_addr, unsigned long long end_addr);
struct setup_data;
-extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
+extern void parse_e820_ext(struct setup_data *data);
#if defined(CONFIG_X86_64) || \
(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 57650ab4a5f5..1cd6d26a0a8d 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -16,10 +16,13 @@ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
BUILD_INTERRUPT3(invalidate_interrupt\idx,
(INVALIDATE_TLB_VECTOR_START)+\idx,
smp_invalidate_interrupt)
+.endif
.endr
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 0274ec5a7e62..bb9efe8706e2 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -45,6 +45,30 @@ extern void invalidate_interrupt4(void);
extern void invalidate_interrupt5(void);
extern void invalidate_interrupt6(void);
extern void invalidate_interrupt7(void);
+extern void invalidate_interrupt8(void);
+extern void invalidate_interrupt9(void);
+extern void invalidate_interrupt10(void);
+extern void invalidate_interrupt11(void);
+extern void invalidate_interrupt12(void);
+extern void invalidate_interrupt13(void);
+extern void invalidate_interrupt14(void);
+extern void invalidate_interrupt15(void);
+extern void invalidate_interrupt16(void);
+extern void invalidate_interrupt17(void);
+extern void invalidate_interrupt18(void);
+extern void invalidate_interrupt19(void);
+extern void invalidate_interrupt20(void);
+extern void invalidate_interrupt21(void);
+extern void invalidate_interrupt22(void);
+extern void invalidate_interrupt23(void);
+extern void invalidate_interrupt24(void);
+extern void invalidate_interrupt25(void);
+extern void invalidate_interrupt26(void);
+extern void invalidate_interrupt27(void);
+extern void invalidate_interrupt28(void);
+extern void invalidate_interrupt29(void);
+extern void invalidate_interrupt30(void);
+extern void invalidate_interrupt31(void);
extern void irq_move_cleanup_interrupt(void);
extern void reboot_interrupt(void);
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 36fb1a6a5109..8dbe353e41e1 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -11,8 +11,8 @@ kernel_physical_mapping_init(unsigned long start,
unsigned long page_size_mask);
-extern unsigned long __initdata e820_table_start;
-extern unsigned long __meminitdata e820_table_end;
-extern unsigned long __meminitdata e820_table_top;
+extern unsigned long __initdata pgt_buf_start;
+extern unsigned long __meminitdata pgt_buf_end;
+extern unsigned long __meminitdata pgt_buf_top;
#endif /* _ASM_X86_INIT_32_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index f327d386d6cc..c4bd267dfc50 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -63,17 +63,6 @@ union IO_APIC_reg_03 {
} __attribute__ ((packed)) bits;
};
-enum ioapic_irq_destination_types {
- dest_Fixed = 0,
- dest_LowestPrio = 1,
- dest_SMI = 2,
- dest__reserved_1 = 3,
- dest_NMI = 4,
- dest_INIT = 5,
- dest__reserved_2 = 6,
- dest_ExtINT = 7
-};
-
struct IO_APIC_route_entry {
__u32 vector : 8,
delivery_mode : 3, /* 000: FIXED
@@ -106,6 +95,10 @@ struct IR_IO_APIC_route_entry {
index : 15;
} __attribute__ ((packed));
+#define IOAPIC_AUTO -1
+#define IOAPIC_EDGE 0
+#define IOAPIC_LEVEL 1
+
#ifdef CONFIG_X86_IO_APIC
/*
@@ -150,11 +143,6 @@ extern int timer_through_8259;
#define io_apic_assign_pci_irqs \
(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-extern u8 io_apic_unique_id(u8 id);
-extern int io_apic_get_unique_id(int ioapic, int apic_id);
-extern int io_apic_get_version(int ioapic);
-extern int io_apic_get_redir_entries(int ioapic);
-
struct io_apic_irq_attr;
extern int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr);
@@ -162,6 +150,8 @@ void setup_IO_APIC_irq_extra(u32 gsi);
extern void ioapic_and_gsi_init(void);
extern void ioapic_insert_resources(void);
+int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr);
+
extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
@@ -186,6 +176,8 @@ extern void __init pre_init_apic_IRQ0(void);
extern void mp_save_irq(struct mpc_intsrc *m);
+extern void disable_ioapic_support(void);
+
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
@@ -199,6 +191,26 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; }
struct io_apic_irq_attr;
static inline int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr) { return 0; }
+
+static inline struct IO_APIC_route_entry **alloc_ioapic_entries(void)
+{
+ return NULL;
+}
+
+static inline void free_ioapic_entries(struct IO_APIC_route_entry **ent) { }
+static inline int save_IO_APIC_setup(struct IO_APIC_route_entry **ent)
+{
+ return -ENOMEM;
+}
+
+static inline void mask_IO_APIC_setup(struct IO_APIC_route_entry **ent) { }
+static inline int restore_IO_APIC_setup(struct IO_APIC_route_entry **ent)
+{
+ return -ENOMEM;
+}
+
+static inline void mp_save_irq(struct mpc_intsrc *m) { };
+static inline void disable_ioapic_support(void) { }
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 0b7228268a63..615fa9061b57 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -123,10 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
int vector);
extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
int vector);
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
- int vector);
/* Avoid include hell */
#define NMI_VECTOR 0x02
@@ -150,6 +146,10 @@ static inline void __default_local_send_IPI_all(int vector)
}
#ifdef CONFIG_X86_32
+extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+ int vector);
+extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+ int vector);
extern void default_send_IPI_mask_logical(const struct cpumask *mask,
int vector);
extern void default_send_IPI_allbutself(int vector);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index c704b38c57a2..ba870bb6dd8e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -10,9 +10,6 @@
#include <asm/apicdef.h>
#include <asm/irq_vectors.h>
-/* Even though we don't support this, supply it to appease OF */
-static inline void irq_dispose_mapping(unsigned int virq) { }
-
static inline int irq_canonicalize(int irq)
{
return ((irq == 2) ? 9 : irq);
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h
new file mode 100644
index 000000000000..423bbbddf36d
--- /dev/null
+++ b/arch/x86/include/asm/irq_controller.h
@@ -0,0 +1,12 @@
+#ifndef __IRQ_CONTROLLER__
+#define __IRQ_CONTROLLER__
+
+struct irq_domain {
+ int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize,
+ u32 *out_hwirq, u32 *out_type);
+ void *priv;
+ struct device_node *controller;
+ struct list_head l;
+};
+
+#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6af0894dafb4..4980f48bbbb7 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_IRQ_VECTORS_H
#define _ASM_X86_IRQ_VECTORS_H
+#include <linux/threads.h>
/*
* Linux IRQ vector layout.
*
@@ -16,8 +17,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... 237 : device interrupts
- * Vectors 238 ... 255 : special interrupts
+ * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts
+ * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
@@ -96,37 +97,43 @@
#define THRESHOLD_APIC_VECTOR 0xf9
#define REBOOT_VECTOR 0xf8
-/* f0-f7 used for spreading out TLB flushes: */
-#define INVALIDATE_TLB_VECTOR_END 0xf7
-#define INVALIDATE_TLB_VECTOR_START 0xf0
-#define NUM_INVALIDATE_TLB_VECTORS 8
-
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR 0xef
-
/*
* Generic system vector for platform specific use
*/
-#define X86_PLATFORM_IPI_VECTOR 0xed
+#define X86_PLATFORM_IPI_VECTOR 0xf7
/*
* IRQ work vector:
*/
-#define IRQ_WORK_VECTOR 0xec
+#define IRQ_WORK_VECTOR 0xf6
-#define UV_BAU_MESSAGE 0xea
+#define UV_BAU_MESSAGE 0xf5
/*
* Self IPI vector for machine checks
*/
-#define MCE_SELF_VECTOR 0xeb
+#define MCE_SELF_VECTOR 0xf4
/* Xen vector callback to receive events in a HVM domain */
-#define XEN_HVM_EVTCHN_CALLBACK 0xe9
+#define XEN_HVM_EVTCHN_CALLBACK 0xf3
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR 0xef
+
+/* up to 32 vectors used for spreading out TLB flushes: */
+#if NR_CPUS <= 32
+# define NUM_INVALIDATE_TLB_VECTORS NR_CPUS
+#else
+# define NUM_INVALIDATE_TLB_VECTORS 32
+#endif
+
+#define INVALIDATE_TLB_VECTOR_END 0xee
+#define INVALIDATE_TLB_VECTOR_START \
+ (INVALIDATE_TLB_VECTOR_END - NUM_INVALIDATE_TLB_VECTORS + 1)
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index ca242d35e873..518bbbb9ee59 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -13,7 +13,6 @@ enum die_val {
DIE_PANIC,
DIE_NMI,
DIE_DIE,
- DIE_NMIWATCHDOG,
DIE_KERNELDEBUG,
DIE_TRAP,
DIE_GPF,
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 0c90dd9f0505..9c7d95f6174b 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -25,7 +25,6 @@ extern int pic_mode;
#define MAX_IRQ_SOURCES 256
extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
#ifdef CONFIG_X86_NUMAQ
extern int mp_bus_id_to_node[MAX_MP_BUSSES];
@@ -33,8 +32,6 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES];
extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
#endif
-#define MAX_APICID 256
-
#else /* CONFIG_X86_64: */
#define MAX_MP_BUSSES 256
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c76f5b92b840..07f46016d3ff 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -7,7 +7,6 @@
#ifdef CONFIG_X86_LOCAL_APIC
-extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400d3138..3d4dab43c994 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,57 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/topology.h>
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+
+#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
+
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+ __apicid_to_node[apicid] = node;
+}
+#else /* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif /* CONFIG_NUMA */
+
#ifdef CONFIG_X86_32
# include "numa_32.h"
#else
# include "numa_64.h"
#endif
+
+#ifdef CONFIG_NUMA
+extern void __cpuinit numa_set_node(int cpu, int node);
+extern void __cpuinit numa_clear_node(int cpu);
+extern void __init numa_init_array(void);
+extern void __init init_cpu_to_node(void);
+extern void __cpuinit numa_add_cpu(int cpu);
+extern void __cpuinit numa_remove_cpu(int cpu);
+#else /* CONFIG_NUMA */
+static inline void numa_set_node(int cpu, int node) { }
+static inline void numa_clear_node(int cpu) { }
+static inline void numa_init_array(void) { }
+static inline void init_cpu_to_node(void) { }
+static inline void numa_add_cpu(int cpu) { }
+static inline void numa_remove_cpu(int cpu) { }
+#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable);
+#endif
+
+#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index b0ef2b449a9d..c6beed1ef103 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,7 +4,12 @@
extern int numa_off;
extern int pxm_to_nid(int pxm);
-extern void numa_remove_cpu(int cpu);
+
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int cpu);
+#else /* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
+#endif /* CONFIG_NUMA */
#ifdef CONFIG_HIGHMEM
extern void set_highmem_pages_init(void);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 0493be39607c..344eb1790b46 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,23 +2,16 @@
#define _ASM_X86_NUMA_64_H
#include <linux/nodemask.h>
-#include <asm/apicdef.h>
struct bootnode {
u64 start;
u64 end;
};
-extern int compute_hash_shift(struct bootnode *nodes, int numblks,
- int *nodeids);
-
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-extern void numa_init_array(void);
extern int numa_off;
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
extern unsigned long numa_free_all_bootmem(void);
extern void setup_node_bootmem(int nodeid, unsigned long start,
unsigned long end);
@@ -31,11 +24,11 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
*/
#define NODE_MIN_SIZE (4*1024*1024)
-extern void __init init_cpu_to_node(void);
-extern void __cpuinit numa_set_node(int cpu, int node);
-extern void __cpuinit numa_clear_node(int cpu);
-extern void __cpuinit numa_add_cpu(int cpu);
-extern void __cpuinit numa_remove_cpu(int cpu);
+extern nodemask_t numa_nodes_parsed __initdata;
+
+extern int __cpuinit numa_cpu_node(int cpu);
+extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+extern void __init numa_set_distance(int from, int to, int distance);
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
@@ -43,11 +36,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
void numa_emu_cmdline(char *);
#endif /* CONFIG_NUMA_EMU */
#else
-static inline void init_cpu_to_node(void) { }
-static inline void numa_set_node(int cpu, int node) { }
-static inline void numa_clear_node(int cpu) { }
-static inline void numa_add_cpu(int cpu, int node) { }
-static inline void numa_remove_cpu(int cpu) { }
+static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
#endif
#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index 641988efe063..c5d3a5abbb9f 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -6,7 +6,7 @@
#define OLPC_OFW_SIG 0x2057464F /* aka "OFW " */
-#ifdef CONFIG_OLPC_OPENFIRMWARE
+#ifdef CONFIG_OLPC
extern bool olpc_ofw_is_installed(void);
@@ -26,19 +26,15 @@ extern void setup_olpc_ofw_pgd(void);
/* check if OFW was detected during boot */
extern bool olpc_ofw_present(void);
-#else /* !CONFIG_OLPC_OPENFIRMWARE */
-
-static inline bool olpc_ofw_is_installed(void) { return false; }
+#else /* !CONFIG_OLPC */
static inline void olpc_ofw_detect(void) { }
static inline void setup_olpc_ofw_pgd(void) { }
-static inline bool olpc_ofw_present(void) { return false; }
-
-#endif /* !CONFIG_OLPC_OPENFIRMWARE */
+#endif /* !CONFIG_OLPC */
-#ifdef CONFIG_OLPC_OPENFIRMWARE_DT
+#ifdef CONFIG_OF_PROMTREE
extern void olpc_dt_build_devicetree(void);
#else
static inline void olpc_dt_build_devicetree(void) { }
-#endif /* CONFIG_OLPC_OPENFIRMWARE_DT */
+#endif
#endif /* _ASM_X86_OLPC_OFW_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 1df66211fd1b..97e6007e4edd 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -2,6 +2,7 @@
#define _ASM_X86_PAGE_DEFS_H
#include <linux/const.h>
+#include <linux/types.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
@@ -45,11 +46,17 @@ extern int devmem_is_allowed(unsigned long pagenr);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
+static inline phys_addr_t get_max_mapped(void)
+{
+ return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
+}
+
extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8);
+void init_memory_mapping_high(void);
+
+extern void initmem_init(void);
extern void free_initmem(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 45636cefa186..4c25ab48257b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -94,10 +94,6 @@ struct cpuinfo_x86 {
int x86_cache_alignment; /* In bytes */
int x86_power;
unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
- /* cpus sharing the last level cache: */
- cpumask_var_t llc_shared_map;
-#endif
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index b4ec95f07518..971e0b46446e 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -1 +1,69 @@
-/* dummy prom.h; here to make linux/of.h's #includes happy */
+/*
+ * Definitions for Device tree / OpenFirmware handling on X86
+ *
+ * based on arch/powerpc/include/asm/prom.h which is
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_X86_PROM_H
+#define _ASM_X86_PROM_H
+#ifndef __ASSEMBLY__
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+
+#include <asm/irq.h>
+#include <asm/atomic.h>
+#include <asm/setup.h>
+#include <asm/irq_controller.h>
+
+#ifdef CONFIG_OF
+extern int of_ioapic;
+extern u64 initial_dtb;
+extern void add_dtb(u64 data);
+extern void x86_add_irq_domains(void);
+void __cpuinit x86_of_pci_init(void);
+void x86_dtb_init(void);
+
+static inline struct device_node *pci_device_to_OF_node(struct pci_dev *pdev)
+{
+ return pdev ? pdev->dev.of_node : NULL;
+}
+
+static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
+{
+ return pci_device_to_OF_node(bus->self);
+}
+
+#else
+static inline void add_dtb(u64 data) { }
+static inline void x86_add_irq_domains(void) { }
+static inline void x86_of_pci_init(void) { }
+static inline void x86_dtb_init(void) { }
+#define of_ioapic 0
+#endif
+
+extern char cmd_line[COMMAND_LINE_SIZE];
+
+#define pci_address_to_pio pci_address_to_pio
+unsigned long pci_address_to_pio(phys_addr_t addr);
+
+/**
+ * irq_dispose_mapping - Unmap an interrupt
+ * @virq: linux virq number of the interrupt to unmap
+ *
+ * FIXME: We really should implement proper virq handling like power,
+ * but that's going to be major surgery.
+ */
+static inline void irq_dispose_mapping(unsigned int virq) { }
+
+#define HAVE_ARCH_DEVTREE_FIXUPS
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 562d4fd31ba8..3250e3d605d9 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -18,7 +18,10 @@ extern struct machine_ops machine_ops;
void native_machine_crash_shutdown(struct pt_regs *regs);
void native_machine_shutdown(void);
-void machine_real_restart(const unsigned char *code, int length);
+void machine_real_restart(unsigned int type);
+/* These must match dispatch_table in reboot_32.S */
+#define MRR_BIOS 0
+#define MRR_APM 1
typedef void (*nmi_shootdown_cb)(int, struct die_args*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index d1e41b0f9b60..df4cd32b4cc6 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -37,26 +37,9 @@
#endif
#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/lockdep.h>
#include <asm/asm.h>
-struct rwsem_waiter;
-
-extern asmregparm struct rw_semaphore *
- rwsem_down_read_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_down_write_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_wake(struct rw_semaphore *);
-extern asmregparm struct rw_semaphore *
- rwsem_downgrade_wake(struct rw_semaphore *sem);
-
/*
- * the semaphore definition
- *
* The bias values and the counter type limits the number of
* potential readers/writers to 32767 for 32 bits and 2147483647
* for 64 bits.
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore *
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-typedef signed long rwsem_count_t;
-
-struct rw_semaphore {
- rwsem_count_t count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
/*
* lock for reading
*/
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem)
*/
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t result, tmp;
+ long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %0,%1\n\t"
"1:\n\t"
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
*/
static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning down_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* adds 0xffff0001, returns the old value */
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t ret = cmpxchg(&sem->count,
- RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
+ long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
return 0;
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
*/
static inline void __up_read(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_read\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 1, returns the old value */
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem)
*/
static inline void __up_write(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 0xffff0001, returns the old value */
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
/*
* implement atomic add functionality
*/
-static inline void rwsem_atomic_add(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
{
asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
: "+m" (sem->count)
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta,
/*
* implement exchange and add functionality
*/
-static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
{
- rwsem_count_t tmp = delta;
+ long tmp = delta;
asm volatile(LOCK_PREFIX "xadd %0,%1"
: "+r" (tmp), "+m" (sem->count)
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
return tmp + delta;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 231f1c1d6607..cd84f7208f76 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -1,14 +1,16 @@
#ifndef _ASM_X86_SEGMENT_H
#define _ASM_X86_SEGMENT_H
+#include <linux/const.h>
+
/* Constructor for a conventional segment GDT (or LDT) entry */
/* This is a macro so it can be used in initializers */
#define GDT_ENTRY(flags, base, limit) \
- ((((base) & 0xff000000ULL) << (56-24)) | \
- (((flags) & 0x0000f0ffULL) << 40) | \
- (((limit) & 0x000f0000ULL) << (48-16)) | \
- (((base) & 0x00ffffffULL) << 16) | \
- (((limit) & 0x0000ffffULL)))
+ ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \
+ (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \
+ (((limit) & _AC(0x000f0000,ULL)) << (48-16)) | \
+ (((base) & _AC(0x00ffffff,ULL)) << 16) | \
+ (((limit) & _AC(0x0000ffff,ULL))))
/* Simple and small GDT entries for booting only */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1f4695136776..0747de6227dc 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -23,6 +23,8 @@ extern unsigned int num_processors;
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+/* cpus sharing the last level cache: */
+DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
DECLARE_PER_CPU(int, cpu_number);
@@ -36,8 +38,16 @@ static inline struct cpumask *cpu_core_mask(int cpu)
return per_cpu(cpu_core_map, cpu);
}
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return per_cpu(cpu_llc_shared_map, cpu);
+}
+
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
+DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
+#endif
/* Static state in head.S used to set up a CPU */
extern unsigned long stack_start; /* Initial stack pointer address */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 33ecc3ea8782..12569e691ce3 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -98,8 +98,6 @@ do { \
*/
#define HAVE_DISABLE_HLT
#else
-#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
-#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
/* frame pointer must be last for get_wchan */
#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21899cc31e52..910a7084f7f2 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -47,21 +47,6 @@
#include <asm/mpspec.h>
-#ifdef CONFIG_X86_32
-
-/* Mappings between logical cpu number and node number */
-extern int cpu_to_node_map[];
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int __cpu_to_node(int cpu)
-{
- return cpu_to_node_map[cpu];
-}
-#define early_cpu_to_node __cpu_to_node
-#define cpu_to_node __cpu_to_node
-
-#else /* CONFIG_X86_64 */
-
/* Mappings between logical cpu number and node number */
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
@@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu)
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-#endif /* CONFIG_X86_64 */
-
/* Mappings between node number and cpus on that node. */
extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
@@ -155,7 +138,7 @@ extern unsigned long node_remap_size[];
.balance_interval = 1, \
}
-#ifdef CONFIG_X86_64_ACPI_NUMA
+#ifdef CONFIG_X86_64
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
#endif
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index f4500fb3b485..feca3118a73b 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -3,25 +3,36 @@
#ifndef __ASSEMBLY__
-#ifdef CONFIG_X86_TRAMPOLINE
+#include <linux/types.h>
+#include <asm/io.h>
+
/*
- * Trampoline 80x86 program as an array.
+ * Trampoline 80x86 program as an array. These are in the init rodata
+ * segment, but that's okay, because we only care about the relative
+ * addresses of the symbols.
*/
-extern const unsigned char trampoline_data [];
-extern const unsigned char trampoline_end [];
-extern unsigned char *trampoline_base;
+extern const unsigned char x86_trampoline_start [];
+extern const unsigned char x86_trampoline_end [];
+extern unsigned char *x86_trampoline_base;
extern unsigned long init_rsp;
extern unsigned long initial_code;
extern unsigned long initial_gs;
-#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
+extern void __init setup_trampolines(void);
+
+extern const unsigned char trampoline_data[];
+extern const unsigned char trampoline_status[];
+
+#define TRAMPOLINE_SYM(x) \
+ ((void *)(x86_trampoline_base + \
+ ((const unsigned char *)(x) - x86_trampoline_start)))
-extern unsigned long setup_trampoline(void);
-extern void __init reserve_trampoline_memory(void);
-#else
-static inline void reserve_trampoline_memory(void) {}
-#endif /* CONFIG_X86_TRAMPOLINE */
+/* Address of the SMP trampoline */
+static inline unsigned long trampoline_address(void)
+{
+ return virt_to_phys(TRAMPOLINE_SYM(trampoline_data));
+}
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0e..b6f73f1bc7ac 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,11 @@
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
#define __NR_prlimit64 340
+#define __NR_clock_adjtime 341
#ifdef __KERNEL__
-#define NR_syscalls 341
+#define NR_syscalls 342
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715b..5ee30858f5d6 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,8 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#define __NR_prlimit64 302
__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_clock_adjtime 303
+__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 64642ad019fb..643ebf2e2ad8 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -83,11 +83,13 @@ struct x86_init_paging {
* boot cpu
* @tsc_pre_init: platform function called before TSC init
* @timer_init: initialize the platform timer (default PIT/HPET)
+ * @wallclock_init: init the wallclock device
*/
struct x86_init_timers {
void (*setup_percpu_clockev)(void);
void (*tsc_pre_init)(void);
void (*timer_init)(void);
+ void (*wallclock_init)(void);
};
/**
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34244b2cd880..4ad450948f36 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -47,7 +47,7 @@ obj-y += tsc.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
-obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
+obj-y += trampoline.o trampoline_$(BITS).o
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
@@ -59,6 +59,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
obj-y += reboot.o
+obj-$(CONFIG_X86_32) += reboot_32.o
obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
@@ -69,7 +70,6 @@ obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o
obj-$(CONFIG_SMP) += setup_percpu.o
obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
-obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
@@ -109,6 +109,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
+obj-$(CONFIG_OF) += devicetree.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3e6e2d68f761..9a966c579af5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -595,14 +595,8 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
nid = acpi_get_node(handle);
if (nid == -1 || !node_online(nid))
return;
-#ifdef CONFIG_X86_64
- apicid_to_node[physid] = nid;
+ set_apicid_to_node(physid, nid);
numa_set_node(cpu, nid);
-#else /* CONFIG_X86_32 */
- apicid_2_node[physid] = nid;
- cpu_to_node_map[cpu] = nid;
-#endif
-
#endif
}
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index 28595d6df47c..ead21b663117 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -6,11 +6,17 @@
#include <asm/page_types.h>
#include <asm/pgtable_types.h>
#include <asm/processor-flags.h>
+#include "wakeup.h"
.code16
- .section ".header", "a"
+ .section ".jump", "ax"
+ .globl _start
+_start:
+ cli
+ jmp wakeup_code
/* This should match the structure in wakeup.h */
+ .section ".header", "a"
.globl wakeup_header
wakeup_header:
video_mode: .short 0 /* Video mode number */
@@ -30,14 +36,11 @@ wakeup_jmp: .byte 0xea /* ljmpw */
wakeup_jmp_off: .word 3f
wakeup_jmp_seg: .word 0
wakeup_gdt: .quad 0, 0, 0
-signature: .long 0x51ee1111
+signature: .long WAKEUP_HEADER_SIGNATURE
.text
- .globl _start
.code16
wakeup_code:
-_start:
- cli
cld
/* Apparently some dimwit BIOS programmers don't know how to
@@ -77,12 +80,12 @@ _start:
/* Check header signature... */
movl signature, %eax
- cmpl $0x51ee1111, %eax
+ cmpl $WAKEUP_HEADER_SIGNATURE, %eax
jne bogus_real_magic
/* Check we really have everything... */
movl end_signature, %eax
- cmpl $0x65a22c82, %eax
+ cmpl $WAKEUP_END_SIGNATURE, %eax
jne bogus_real_magic
/* Call the C code */
@@ -147,3 +150,7 @@ wakeup_heap:
wakeup_stack:
.space 2048
wakeup_stack_end:
+
+ .section ".signature","a"
+end_signature:
+ .long WAKEUP_END_SIGNATURE
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h
index 69d38d0b2b64..e1828c07e79c 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.h
+++ b/arch/x86/kernel/acpi/realmode/wakeup.h
@@ -35,7 +35,8 @@ struct wakeup_header {
extern struct wakeup_header wakeup_header;
#endif
-#define HEADER_OFFSET 0x3f00
-#define WAKEUP_SIZE 0x4000
+#define WAKEUP_HEADER_OFFSET 8
+#define WAKEUP_HEADER_SIGNATURE 0x51ee1111
+#define WAKEUP_END_SIGNATURE 0x65a22c82
#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
index 060fff8f5c5b..d4f8010a5b1b 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
@@ -13,9 +13,19 @@ ENTRY(_start)
SECTIONS
{
. = 0;
+ .jump : {
+ *(.jump)
+ } = 0x90909090
+
+ . = WAKEUP_HEADER_OFFSET;
+ .header : {
+ *(.header)
+ }
+
+ . = ALIGN(16);
.text : {
*(.text*)
- }
+ } = 0x90909090
. = ALIGN(16);
.rodata : {
@@ -33,11 +43,6 @@ SECTIONS
*(.data*)
}
- .signature : {
- end_signature = .;
- LONG(0x65a22c82)
- }
-
. = ALIGN(16);
.bss : {
__bss_start = .;
@@ -45,20 +50,13 @@ SECTIONS
__bss_end = .;
}
- . = HEADER_OFFSET;
- .header : {
- *(.header)
+ .signature : {
+ *(.signature)
}
- . = ALIGN(16);
_end = .;
/DISCARD/ : {
*(.note*)
}
-
- /*
- * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
- */
- . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 5f1b747f6ef1..ff93bc1b09c3 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -18,12 +18,8 @@
#include "realmode/wakeup.h"
#include "sleep.h"
-unsigned long acpi_wakeup_address;
unsigned long acpi_realmode_flags;
-/* address in low memory of the wakeup routine. */
-static unsigned long acpi_realmode;
-
#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
static char temp_stack[4096];
#endif
@@ -33,22 +29,17 @@ static char temp_stack[4096];
*
* Create an identity mapped page table and copy the wakeup routine to
* low memory.
- *
- * Note that this is too late to change acpi_wakeup_address.
*/
int acpi_suspend_lowlevel(void)
{
struct wakeup_header *header;
+ /* address in low memory of the wakeup routine. */
+ char *acpi_realmode;
- if (!acpi_realmode) {
- printk(KERN_ERR "Could not allocate memory during boot, "
- "S3 disabled\n");
- return -ENOMEM;
- }
- memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE);
+ acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code);
- header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET);
- if (header->signature != 0x51ee1111) {
+ header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET);
+ if (header->signature != WAKEUP_HEADER_SIGNATURE) {
printk(KERN_ERR "wakeup header does not match\n");
return -EINVAL;
}
@@ -68,9 +59,7 @@ int acpi_suspend_lowlevel(void)
/* GDT[0]: GDT self-pointer */
header->wakeup_gdt[0] =
(u64)(sizeof(header->wakeup_gdt) - 1) +
- ((u64)(acpi_wakeup_address +
- ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
- << 16);
+ ((u64)__pa(&header->wakeup_gdt) << 16);
/* GDT[1]: big real mode-like code segment */
header->wakeup_gdt[1] =
GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
@@ -96,7 +85,7 @@ int acpi_suspend_lowlevel(void)
header->pmode_cr3 = (u32)__pa(&initial_page_table);
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
- header->trampoline_segment = setup_trampoline() >> 4;
+ header->trampoline_segment = trampoline_address() >> 4;
#ifdef CONFIG_SMP
stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
@@ -111,45 +100,6 @@ int acpi_suspend_lowlevel(void)
return 0;
}
-/**
- * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation
- *
- * We allocate a page from the first 1MB of memory for the wakeup
- * routine for when we come back from a sleep state. The
- * runtime allocator allows specification of <16MB pages, but not
- * <1MB pages.
- */
-void __init acpi_reserve_wakeup_memory(void)
-{
- phys_addr_t mem;
-
- if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
- printk(KERN_ERR
- "ACPI: Wakeup code way too big, S3 disabled.\n");
- return;
- }
-
- mem = memblock_find_in_range(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
-
- if (mem == MEMBLOCK_ERROR) {
- printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
- return;
- }
- acpi_realmode = (unsigned long) phys_to_virt(mem);
- acpi_wakeup_address = mem;
- memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
-}
-
-int __init acpi_configure_wakeup_memory(void)
-{
- if (acpi_realmode)
- set_memory_x(acpi_realmode, WAKEUP_SIZE >> PAGE_SHIFT);
-
- return 0;
-}
-arch_initcall(acpi_configure_wakeup_memory);
-
-
static int __init acpi_sleep_setup(char *str)
{
while ((str != NULL) && (*str != '\0')) {
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 31ce13f20297..416d4be13fef 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -4,13 +4,10 @@
#include <asm/trampoline.h>
-extern char wakeup_code_start, wakeup_code_end;
-
extern unsigned long saved_video_mode;
extern long saved_magic;
extern int wakeup_pmode_return;
-extern char swsusp_pg_dir[PAGE_SIZE];
extern unsigned long acpi_copy_wakeup_routine(unsigned long);
extern void wakeup_long64(void);
diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S
index 6ff3b5730575..6ce81ee9cb03 100644
--- a/arch/x86/kernel/acpi/wakeup_rm.S
+++ b/arch/x86/kernel/acpi/wakeup_rm.S
@@ -2,9 +2,11 @@
* Wrapper script for the realmode binary as a transport object
* before copying to low memory.
*/
- .section ".rodata","a"
- .globl wakeup_code_start, wakeup_code_end
-wakeup_code_start:
+#include <asm/page_types.h>
+
+ .section ".x86_trampoline","a"
+ .balign PAGE_SIZE
+ .globl acpi_wakeup_code
+acpi_wakeup_code:
.incbin "arch/x86/kernel/acpi/realmode/wakeup.bin"
-wakeup_code_end:
.size wakeup_code_start, .-wakeup_code_start
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 0a99f7198bc3..ed3c2e5b714a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,7 +12,7 @@
static u32 *flush_words;
-struct pci_device_id amd_nb_misc_ids[] = {
+const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
@@ -20,6 +20,11 @@ struct pci_device_id amd_nb_misc_ids[] = {
};
EXPORT_SYMBOL(amd_nb_misc_ids);
+static struct pci_device_id amd_nb_link_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) },
+ {}
+};
+
const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
{ 0x00, 0x18, 0x20 },
{ 0xff, 0x00, 0x20 },
@@ -31,7 +36,7 @@ struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);
static struct pci_dev *next_northbridge(struct pci_dev *dev,
- struct pci_device_id *ids)
+ const struct pci_device_id *ids)
{
do {
dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
@@ -45,7 +50,7 @@ int amd_cache_northbridges(void)
{
int i = 0;
struct amd_northbridge *nb;
- struct pci_dev *misc;
+ struct pci_dev *misc, *link;
if (amd_nb_num())
return 0;
@@ -64,10 +69,12 @@ int amd_cache_northbridges(void)
amd_northbridges.nb = nb;
amd_northbridges.num = i;
- misc = NULL;
+ link = misc = NULL;
for (i = 0; i != amd_nb_num(); i++) {
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, amd_nb_misc_ids);
+ node_to_amd_nb(i)->link = link =
+ next_northbridge(link, amd_nb_link_ids);
}
/* some CPU families (e.g. family 0x11) do not support GART */
@@ -85,6 +92,13 @@ int amd_cache_northbridges(void)
boot_cpu_data.x86_mask >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+
+ /* L3 cache partitioning is supported on family 0x15 */
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_PARTITIONING;
+
return 0;
}
EXPORT_SYMBOL_GPL(amd_cache_northbridges);
@@ -93,8 +107,9 @@ EXPORT_SYMBOL_GPL(amd_cache_northbridges);
they're useless anyways */
int __init early_is_amd_nb(u32 device)
{
- struct pci_device_id *id;
+ const struct pci_device_id *id;
u32 vendor = device & 0xffff;
+
device >>= 16;
for (id = amd_nb_misc_ids; id->vendor; id++)
if (vendor == id->vendor && device == id->device)
@@ -102,6 +117,65 @@ int __init early_is_amd_nb(u32 device)
return 0;
}
+int amd_get_subcaches(int cpu)
+{
+ struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
+ unsigned int mask;
+ int cuid = 0;
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return 0;
+
+ pci_read_config_dword(link, 0x1d4, &mask);
+
+#ifdef CONFIG_SMP
+ cuid = cpu_data(cpu).compute_unit_id;
+#endif
+ return (mask >> (4 * cuid)) & 0xf;
+}
+
+int amd_set_subcaches(int cpu, int mask)
+{
+ static unsigned int reset, ban;
+ struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
+ unsigned int reg;
+ int cuid = 0;
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
+ return -EINVAL;
+
+ /* if necessary, collect reset state of L3 partitioning and BAN mode */
+ if (reset == 0) {
+ pci_read_config_dword(nb->link, 0x1d4, &reset);
+ pci_read_config_dword(nb->misc, 0x1b8, &ban);
+ ban &= 0x180000;
+ }
+
+ /* deactivate BAN mode if any subcaches are to be disabled */
+ if (mask != 0xf) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
+ }
+
+#ifdef CONFIG_SMP
+ cuid = cpu_data(cpu).compute_unit_id;
+#endif
+ mask <<= 4 * cuid;
+ mask |= (0xf ^ (1 << cuid)) << 26;
+
+ pci_write_config_dword(nb->link, 0x1d4, mask);
+
+ /* reset BAN mode if L3 partitioning returned to reset state */
+ pci_read_config_dword(nb->link, 0x1d4, &reg);
+ if (reg == reset) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ reg &= ~0x180000;
+ pci_write_config_dword(nb->misc, 0x1b8, reg | ban);
+ }
+
+ return 0;
+}
+
int amd_cache_gart(void)
{
int i;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 51d4e1663066..1293c709ee85 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -508,64 +508,12 @@ static int apbt_next_event(unsigned long delta,
return 0;
}
-/*
- * APB timer clock is not in sync with pclk on Langwell, which translates to
- * unreliable read value caused by sampling error. the error does not add up
- * overtime and only happens when sampling a 0 as a 1 by mistake. so the time
- * would go backwards. the following code is trying to prevent time traveling
- * backwards. little bit paranoid.
- */
static cycle_t apbt_read_clocksource(struct clocksource *cs)
{
- unsigned long t0, t1, t2;
- static unsigned long last_read;
-
-bad_count:
- t1 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- t2 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- if (unlikely(t1 < t2)) {
- pr_debug("APBT: read current count error %lx:%lx:%lx\n",
- t1, t2, t2 - t1);
- goto bad_count;
- }
- /*
- * check against cached last read, makes sure time does not go back.
- * it could be a normal rollover but we will do tripple check anyway
- */
- if (unlikely(t2 > last_read)) {
- /* check if we have a normal rollover */
- unsigned long raw_intr_status =
- apbt_readl_reg(APBTMRS_RAW_INT_STATUS);
- /*
- * cs timer interrupt is masked but raw intr bit is set if
- * rollover occurs. then we read EOI reg to clear it.
- */
- if (raw_intr_status & (1 << phy_cs_timer_id)) {
- apbt_readl(phy_cs_timer_id, APBTMR_N_EOI);
- goto out;
- }
- pr_debug("APB CS going back %lx:%lx:%lx ",
- t2, last_read, t2 - last_read);
-bad_count_x3:
- pr_debug("triple check enforced\n");
- t0 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- udelay(1);
- t1 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- udelay(1);
- t2 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- if ((t2 > t1) || (t1 > t0)) {
- printk(KERN_ERR "Error: APB CS tripple check failed\n");
- goto bad_count_x3;
- }
- }
-out:
- last_read = t2;
- return (cycle_t)~t2;
+ unsigned long current_count;
+
+ current_count = apbt_readl(phy_cs_timer_id, APBTMR_N_CURRENT_VALUE);
+ return (cycle_t)~current_count;
}
static int apbt_clocksource_register(void)
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 5955a7800a96..7b1e8e10b89c 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -13,7 +13,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@@ -57,7 +57,7 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
static u32 __init allocate_aperture(void)
{
u32 aper_size;
- void *p;
+ unsigned long addr;
/* aper_size should <= 1G */
if (fallback_aper_order > 5)
@@ -83,27 +83,26 @@ static u32 __init allocate_aperture(void)
* so don't use 512M below as gart iommu, leave the space for kernel
* code for safe
*/
- p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+ addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
+ if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
+ printk(KERN_ERR
+ "Cannot allocate aperture memory hole (%lx,%uK)\n",
+ addr, aper_size>>10);
+ return 0;
+ }
+ memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
/*
* Kmemleak should not scan this block as it may not be mapped via the
* kernel direct mapping.
*/
- kmemleak_ignore(p);
- if (!p || __pa(p)+aper_size > 0xffffffff) {
- printk(KERN_ERR
- "Cannot allocate aperture memory hole (%p,%uK)\n",
- p, aper_size>>10);
- if (p)
- free_bootmem(__pa(p), aper_size);
- return 0;
- }
+ kmemleak_ignore(phys_to_virt(addr));
printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
- aper_size >> 10, __pa(p));
- insert_aperture_resource((u32)__pa(p), aper_size);
- register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
- (u32)__pa(p+aper_size) >> PAGE_SHIFT);
+ aper_size >> 10, addr);
+ insert_aperture_resource((u32)addr, aper_size);
+ register_nosave_region(addr >> PAGE_SHIFT,
+ (addr+aper_size) >> PAGE_SHIFT);
- return (u32)__pa(p);
+ return (u32)addr;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76b96d74978a..6c464a315d4b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -43,6 +43,7 @@
#include <asm/i8259.h>
#include <asm/proto.h>
#include <asm/apic.h>
+#include <asm/io_apic.h>
#include <asm/desc.h>
#include <asm/hpet.h>
#include <asm/idle.h>
@@ -78,6 +79,15 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#ifdef CONFIG_X86_32
+
+/*
+ * On x86_32, the mapping between cpu and logical apicid may vary
+ * depending on apic in use. The following early percpu variable is
+ * used for the mapping. This is where the behaviors of x86_64 and 32
+ * actually diverge. Let's keep it ugly for now.
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
+
/*
* Knob to control our willingness to enable the local APIC.
*
@@ -1209,7 +1219,7 @@ void __cpuinit setup_local_APIC(void)
rdtscll(tsc);
if (disable_apic) {
- arch_disable_smp_support();
+ disable_ioapic_support();
return;
}
@@ -1237,6 +1247,19 @@ void __cpuinit setup_local_APIC(void)
*/
apic->init_apic_ldr();
+#ifdef CONFIG_X86_32
+ /*
+ * APIC LDR is initialized. If logical_apicid mapping was
+ * initialized during get_smp_config(), make sure it matches the
+ * actual value.
+ */
+ i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
+ /* always use the value from LDR */
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ logical_smp_processor_id();
+#endif
+
/*
* Set Task Priority to 'accept all'. We never change this
* later on.
@@ -1448,7 +1471,7 @@ int __init enable_IR(void)
void __init enable_IR_x2apic(void)
{
unsigned long flags;
- struct IO_APIC_route_entry **ioapic_entries = NULL;
+ struct IO_APIC_route_entry **ioapic_entries;
int ret, x2apic_enabled = 0;
int dmar_table_init_ret;
@@ -1562,7 +1585,7 @@ static int apic_verify(void)
return 0;
}
-int apic_force_enable(void)
+int apic_force_enable(unsigned long addr)
{
u32 h, l;
@@ -1578,7 +1601,7 @@ int apic_force_enable(void)
if (!(l & MSR_IA32_APICBASE_ENABLE)) {
pr_info("Local APIC disabled by BIOS -- reenabling.\n");
l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | addr;
wrmsr(MSR_IA32_APICBASE, l, h);
enabled_via_apicbase = 1;
}
@@ -1619,7 +1642,7 @@ static int __init detect_init_APIC(void)
"you can enable it with \"lapic\"\n");
return -1;
}
- if (apic_force_enable())
+ if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
return -1;
} else {
if (apic_verify())
@@ -1930,17 +1953,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
{
int cpu;
- /*
- * Validate version
- */
- if (version == 0x0) {
- pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
- version = 0x10;
- }
- apic_version[apicid] = version;
-
if (num_processors >= nr_cpu_ids) {
int max = nr_cpu_ids;
int thiscpu = max + disabled_cpus;
@@ -1954,22 +1966,34 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
num_processors++;
- cpu = cpumask_next_zero(-1, cpu_present_mask);
-
- if (version != apic_version[boot_cpu_physical_apicid])
- WARN_ONCE(1,
- "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
- apic_version[boot_cpu_physical_apicid], cpu, version);
-
- physid_set(apicid, phys_cpu_present_map);
if (apicid == boot_cpu_physical_apicid) {
/*
* x86_bios_cpu_apicid is required to have processors listed
* in same order as logical cpu numbers. Hence the first
* entry is BSP, and so on.
+ * boot_cpu_init() already hold bit 0 in cpu_present_mask
+ * for BSP.
*/
cpu = 0;
+ } else
+ cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+ /*
+ * Validate version
+ */
+ if (version == 0x0) {
+ pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+ cpu, apicid);
+ version = 0x10;
+ }
+ apic_version[apicid] = version;
+
+ if (version != apic_version[boot_cpu_physical_apicid]) {
+ pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+ apic_version[boot_cpu_physical_apicid], cpu, version);
}
+
+ physid_set(apicid, phys_cpu_present_map);
if (apicid > max_physical_apicid)
max_physical_apicid = apicid;
@@ -1977,7 +2001,10 @@ void __cpuinit generic_processor_info(int apicid, int version)
early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
#endif
-
+#ifdef CONFIG_X86_32
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ apic->x86_32_early_logical_apicid(cpu);
+#endif
set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
@@ -1998,10 +2025,14 @@ void default_init_apic_ldr(void)
}
#ifdef CONFIG_X86_32
-int default_apicid_to_node(int logical_apicid)
+int default_x86_32_numa_cpu_node(int cpu)
{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
+#ifdef CONFIG_NUMA
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+ if (apicid != BAD_APICID)
+ return __apicid_to_node[apicid];
+ return NUMA_NO_NODE;
#else
return 0;
#endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17ce0c2..5652d31fe108 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,8 +185,6 @@ struct apic apic_flat = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
@@ -337,8 +335,6 @@ struct apic apic_physflat = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e31b9ffe25f5..f1baa2dc087a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
return 0;
}
-static int noop_cpu_to_logical_apicid(int cpu)
-{
- return 0;
-}
-
static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
{
return 0;
@@ -113,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
cpumask_set_cpu(cpu, retmask);
}
-int noop_apicid_to_node(int logical_apicid)
-{
- /* we're always on node 0 */
- return 0;
-}
-
static u32 noop_apic_read(u32 reg)
{
WARN_ON_ONCE((cpu_has_apic && !disable_apic));
@@ -130,6 +119,14 @@ static void noop_apic_write(u32 reg, u32 v)
WARN_ON_ONCE(cpu_has_apic && !disable_apic);
}
+#ifdef CONFIG_X86_32
+static int noop_x86_32_numa_cpu_node(int cpu)
+{
+ /* we're always on node 0 */
+ return 0;
+}
+#endif
+
struct apic apic_noop = {
.name = "noop",
.probe = noop_probe,
@@ -153,9 +150,7 @@ struct apic apic_noop = {
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = noop_apicid_to_node,
- .cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
@@ -197,4 +192,9 @@ struct apic apic_noop = {
.icr_write = noop_apic_icr_write,
.wait_icr_idle = noop_apic_wait_icr_idle,
.safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node,
+#endif
};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cb804c5091b9..541a2e431659 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit)
return 1;
}
+static int bigsmp_early_logical_apicid(int cpu)
+{
+ /* on bigsmp, logical apicid is the same as physical */
+ return early_per_cpu(x86_cpu_to_apicid, cpu);
+}
+
static inline unsigned long calculate_ldr(int cpu)
{
unsigned long val, id;
@@ -80,11 +86,6 @@ static void bigsmp_setup_apic_routing(void)
nr_ioapics);
}
-static int bigsmp_apicid_to_node(int logical_apicid)
-{
- return apicid_2_node[hard_smp_processor_id()];
-}
-
static int bigsmp_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
@@ -93,14 +94,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
-/* Mapping from cpu number to logical apicid */
-static inline int bigsmp_cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_physical_id(cpu);
-}
-
static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
@@ -115,7 +108,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
/* As we are using single CPU as destination, pick only one CPU here */
static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
- return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask));
+ int cpu = cpumask_first(cpumask);
+
+ if (cpu < nr_cpu_ids)
+ return cpu_physical_id(cpu);
+ return BAD_APICID;
}
static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -129,9 +126,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
*/
for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
+ return cpu_physical_id(cpu);
}
- return bigsmp_cpu_to_logical_apicid(cpu);
+ return BAD_APICID;
}
static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -219,8 +216,6 @@ struct apic apic_bigsmp = {
.ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
.setup_apic_routing = bigsmp_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = bigsmp_apicid_to_node,
- .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
@@ -256,4 +251,7 @@ struct apic apic_bigsmp = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8593582d8022..3e9de4854c5b 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit)
return physid_isset(bit, phys_cpu_present_map);
}
+static int es7000_early_logical_apicid(int cpu)
+{
+ /* on es7000, logical apicid is the same as physical */
+ return early_per_cpu(x86_bios_cpu_apicid, cpu);
+}
+
static unsigned long calculate_ldr(int cpu)
{
unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
@@ -504,12 +510,11 @@ static void es7000_setup_apic_routing(void)
nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
}
-static int es7000_apicid_to_node(int logical_apicid)
+static int es7000_numa_cpu_node(int cpu)
{
return 0;
}
-
static int es7000_cpu_present_to_apicid(int mps_cpu)
{
if (!mps_cpu)
@@ -528,18 +533,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
++cpu_id;
}
-/* Mapping from cpu number to logical apicid */
-static int es7000_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
@@ -561,7 +554,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
- int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
WARN(1, "Not a valid mask!");
@@ -578,7 +571,7 @@ static unsigned int
es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
- int apicid = es7000_cpu_to_logical_apicid(0);
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -655,8 +648,6 @@ struct apic __refdata apic_es7000_cluster = {
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = es7000_apicid_to_node,
- .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -695,6 +686,9 @@ struct apic __refdata apic_es7000_cluster = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+ .x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
struct apic __refdata apic_es7000 = {
@@ -720,8 +714,6 @@ struct apic __refdata apic_es7000 = {
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = es7000_apicid_to_node,
- .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -758,4 +750,7 @@ struct apic __refdata apic_es7000 = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+ .x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 79fd43ca6f96..c4e557a1ebb6 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -83,7 +83,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
arch_spin_lock(&lock);
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
show_regs(regs);
- dump_stack();
arch_spin_unlock(&lock);
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
return NOTIFY_STOP;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 88659f07c8df..a96c87cfbabc 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -108,7 +108,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
int skip_ioapic_setup;
-void arch_disable_smp_support(void)
+/**
+ * disable_ioapic_support() - disables ioapic support at runtime
+ */
+void disable_ioapic_support(void)
{
#ifdef CONFIG_PCI
noioapicquirk = 1;
@@ -120,11 +123,14 @@ void arch_disable_smp_support(void)
static int __init parse_noapic(char *str)
{
/* disable IO-APIC */
- arch_disable_smp_support();
+ disable_ioapic_support();
return 0;
}
early_param("noapic", parse_noapic);
+static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr);
+
/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
void mp_save_irq(struct mpc_intsrc *m)
{
@@ -818,7 +824,7 @@ static int EISA_ELCR(unsigned int irq)
#define default_MCA_trigger(idx) (1)
#define default_MCA_polarity(idx) default_ISA_polarity(idx)
-static int MPBIOS_polarity(int idx)
+static int irq_polarity(int idx)
{
int bus = mp_irqs[idx].srcbus;
int polarity;
@@ -860,7 +866,7 @@ static int MPBIOS_polarity(int idx)
return polarity;
}
-static int MPBIOS_trigger(int idx)
+static int irq_trigger(int idx)
{
int bus = mp_irqs[idx].srcbus;
int trigger;
@@ -932,16 +938,6 @@ static int MPBIOS_trigger(int idx)
return trigger;
}
-static inline int irq_polarity(int idx)
-{
- return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
- return MPBIOS_trigger(idx);
-}
-
static int pin_2_irq(int idx, int apic, int pin)
{
int irq;
@@ -1220,10 +1216,6 @@ void __setup_vector_irq(int cpu)
static struct irq_chip ioapic_chip;
static struct irq_chip ir_ioapic_chip;
-#define IOAPIC_AUTO -1
-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
-
#ifdef CONFIG_X86_32
static inline int IO_APIC_irq_trigger(int irq)
{
@@ -1385,33 +1377,26 @@ static struct {
DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];
-static void __init setup_IO_APIC_irqs(void)
+static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin)
{
- int apic_id, pin, idx, irq, notcon = 0;
- int node = cpu_to_node(0);
- struct irq_cfg *cfg;
+ if (idx != -1)
+ return false;
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+ apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
+ mp_ioapics[apic_id].apicid, pin);
+ return true;
+}
+
+static void __init __io_apic_setup_irqs(unsigned int apic_id)
+{
+ int idx, node = cpu_to_node(0);
+ struct io_apic_irq_attr attr;
+ unsigned int pin, irq;
- for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
idx = find_irq_entry(apic_id, pin, mp_INT);
- if (idx == -1) {
- if (!notcon) {
- notcon = 1;
- apic_printk(APIC_VERBOSE,
- KERN_DEBUG " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- } else
- apic_printk(APIC_VERBOSE, " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
+ if (io_apic_pin_not_connected(idx, apic_id, pin))
continue;
- }
- if (notcon) {
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
- notcon = 0;
- }
irq = pin_2_irq(idx, apic_id, pin);
@@ -1423,25 +1408,24 @@ static void __init setup_IO_APIC_irqs(void)
* installed and if it returns 1:
*/
if (apic->multi_timer_check &&
- apic->multi_timer_check(apic_id, irq))
+ apic->multi_timer_check(apic_id, irq))
continue;
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- continue;
+ set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+ irq_polarity(idx));
- add_pin_to_irq_node(cfg, node, apic_id, pin);
- /*
- * don't mark it in pin_programmed, so later acpi could
- * set it correctly when irq < 16
- */
- setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
- irq_polarity(idx));
+ io_apic_setup_irq_pin(irq, node, &attr);
}
+}
- if (notcon)
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
+static void __init setup_IO_APIC_irqs(void)
+{
+ unsigned int apic_id;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
+ __io_apic_setup_irqs(apic_id);
}
/*
@@ -1452,7 +1436,7 @@ static void __init setup_IO_APIC_irqs(void)
void setup_IO_APIC_irq_extra(u32 gsi)
{
int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
- struct irq_cfg *cfg;
+ struct io_apic_irq_attr attr;
/*
* Convert 'gsi' to 'ioapic.pin'.
@@ -1472,21 +1456,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)
if (apic_id == 0 || irq < NR_IRQS_LEGACY)
return;
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- return;
-
- add_pin_to_irq_node(cfg, node, apic_id, pin);
-
- if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapics[apic_id].apicid, pin);
- return;
- }
- set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+ set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+ irq_polarity(idx));
- setup_ioapic_irq(apic_id, pin, irq, cfg,
- irq_trigger(idx), irq_polarity(idx));
+ io_apic_setup_irq_pin_once(irq, node, &attr);
}
/*
@@ -3605,7 +3578,40 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
}
#endif /* CONFIG_HT_IRQ */
-int __init io_apic_get_redir_entries (int ioapic)
+int
+io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
+{
+ struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+ int ret;
+
+ if (!cfg)
+ return -EINVAL;
+ ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
+ if (!ret)
+ setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg,
+ attr->trigger, attr->polarity);
+ return ret;
+}
+
+static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr)
+{
+ unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
+ int ret;
+
+ /* Avoid redundant programming */
+ if (test_bit(pin, mp_ioapic_routing[id].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mp_ioapics[id].apicid, pin);
+ return 0;
+ }
+ ret = io_apic_setup_irq_pin(irq, node, attr);
+ if (!ret)
+ set_bit(pin, mp_ioapic_routing[id].pin_programmed);
+ return ret;
+}
+
+static int __init io_apic_get_redir_entries(int ioapic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
@@ -3659,96 +3665,24 @@ int __init arch_probe_nr_irqs(void)
}
#endif
-static int __io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
+int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
{
- struct irq_cfg *cfg;
int node;
- int ioapic, pin;
- int trigger, polarity;
- ioapic = irq_attr->ioapic;
if (!IO_APIC_IRQ(irq)) {
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
+ irq_attr->ioapic);
return -EINVAL;
}
- if (dev)
- node = dev_to_node(dev);
- else
- node = cpu_to_node(0);
-
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- return 0;
-
- pin = irq_attr->ioapic_pin;
- trigger = irq_attr->trigger;
- polarity = irq_attr->polarity;
-
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= legacy_pic->nr_legacy_irqs) {
- if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
- printk(KERN_INFO "can not add pin %d for irq %d\n",
- pin, irq);
- return 0;
- }
- }
-
- setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
+ node = dev ? dev_to_node(dev) : cpu_to_node(0);
- return 0;
+ return io_apic_setup_irq_pin_once(irq, node, irq_attr);
}
-int io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
-{
- int ioapic, pin;
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->gsi mappings (but unique PCI devices);
- * we only program the IOAPIC on the first.
- */
- ioapic = irq_attr->ioapic;
- pin = irq_attr->ioapic_pin;
- if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapics[ioapic].apicid, pin);
- return 0;
- }
- set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
-
- return __io_apic_set_pci_routing(dev, irq, irq_attr);
-}
-
-u8 __init io_apic_unique_id(u8 id)
-{
#ifdef CONFIG_X86_32
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
- return io_apic_get_unique_id(nr_ioapics, id);
- else
- return id;
-#else
- int i;
- DECLARE_BITMAP(used, 256);
-
- bitmap_zero(used, 256);
- for (i = 0; i < nr_ioapics; i++) {
- struct mpc_ioapic *ia = &mp_ioapics[i];
- __set_bit(ia->apicid, used);
- }
- if (!test_bit(id, used))
- return id;
- return find_first_zero_bit(used, 256);
-#endif
-}
-
-#ifdef CONFIG_X86_32
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int __init io_apic_get_unique_id(int ioapic, int apic_id)
{
union IO_APIC_reg_00 reg_00;
static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3821,9 +3755,33 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
return apic_id;
}
+
+static u8 __init io_apic_unique_id(u8 id)
+{
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return io_apic_get_unique_id(nr_ioapics, id);
+ else
+ return id;
+}
+#else
+static u8 __init io_apic_unique_id(u8 id)
+{
+ int i;
+ DECLARE_BITMAP(used, 256);
+
+ bitmap_zero(used, 256);
+ for (i = 0; i < nr_ioapics; i++) {
+ struct mpc_ioapic *ia = &mp_ioapics[i];
+ __set_bit(ia->apicid, used);
+ }
+ if (!test_bit(id, used))
+ return id;
+ return find_first_zero_bit(used, 256);
+}
#endif
-int __init io_apic_get_version(int ioapic)
+static int __init io_apic_get_version(int ioapic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
@@ -4026,7 +3984,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
return gsi - mp_gsi_routing[ioapic].gsi_base;
}
-static int bad_ioapic(unsigned long address)
+static __init int bad_ioapic(unsigned long address)
{
if (nr_ioapics >= MAX_IO_APICS) {
printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
@@ -4086,20 +4044,15 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
- struct irq_cfg *cfg;
+ struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
printk(KERN_INFO "Early APIC setup for system timer0\n");
#ifndef CONFIG_SMP
physid_set_mask_of_physid(boot_cpu_physical_apicid,
&phys_cpu_present_map);
#endif
- /* Make sure the irq descriptor is set up */
- cfg = alloc_irq_and_cfg_at(0, 0);
-
setup_local_APIC();
- add_pin_to_irq_node(cfg, 0, 0, 0);
+ io_apic_setup_irq_pin(0, 0, &attr);
set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
- setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e090a6f..cce91bf26676 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
local_irq_restore(flags);
}
+#ifdef CONFIG_X86_32
+
void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
int vector)
{
@@ -71,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
local_irq_save(flags);
for_each_cpu(query_cpu, mask)
__default_send_IPI_dest_field(
- apic->cpu_to_logical_apicid(query_cpu), vector,
- apic->dest_logical);
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
local_irq_restore(flags);
}
@@ -90,14 +92,12 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
if (query_cpu == this_cpu)
continue;
__default_send_IPI_dest_field(
- apic->cpu_to_logical_apicid(query_cpu), vector,
- apic->dest_logical);
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
}
local_irq_restore(flags);
}
-#ifdef CONFIG_X86_32
-
/*
* This is only used on smaller machines.
*/
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 960f26ab5c9f..6273eee5134b 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -373,13 +373,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask
return physids_promote(0xFUL, retmap);
}
-static inline int numaq_cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-}
-
/*
* Supporting over 60 cpus on NUMA-Q requires a locality-dependent
* cpu to APIC ID relation to properly interact with the intelligent
@@ -398,6 +391,15 @@ static inline int numaq_apicid_to_node(int logical_apicid)
return logical_apicid >> 4;
}
+static int numaq_numa_cpu_node(int cpu)
+{
+ int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ if (logical_apicid != BAD_APICID)
+ return numaq_apicid_to_node(logical_apicid);
+ return NUMA_NO_NODE;
+}
+
static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
{
int node = numaq_apicid_to_node(logical_apicid);
@@ -508,8 +510,6 @@ struct apic __refdata apic_numaq = {
.ioapic_phys_id_map = numaq_ioapic_phys_id_map,
.setup_apic_routing = numaq_setup_apic_routing,
.multi_timer_check = numaq_multi_timer_check,
- .apicid_to_node = numaq_apicid_to_node,
- .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
.cpu_present_to_apicid = numaq_cpu_present_to_apicid,
.apicid_to_cpu_present = numaq_apicid_to_cpu_present,
.setup_portio_remap = numaq_setup_portio_remap,
@@ -547,4 +547,7 @@ struct apic __refdata apic_numaq = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = numaq_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99d2fe016084..fc84c7b61108 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -77,6 +77,11 @@ void __init default_setup_apic_routing(void)
apic->setup_apic_routing();
}
+static int default_x86_32_early_logical_apicid(int cpu)
+{
+ return 1 << cpu;
+}
+
static void setup_apic_flat_routing(void)
{
#ifdef CONFIG_X86_IO_APIC
@@ -130,8 +135,6 @@ struct apic apic_default = {
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = setup_apic_flat_routing,
.multi_timer_check = NULL,
- .apicid_to_node = default_apicid_to_node,
- .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
@@ -167,6 +170,9 @@ struct apic apic_default = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9b419263d90d..e4b8059b414a 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit)
return 1;
}
-static void summit_init_apic_ldr(void)
+static int summit_early_logical_apicid(int cpu)
{
- unsigned long val, id;
int count = 0;
- u8 my_id = (u8)hard_smp_processor_id();
+ u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
u8 my_cluster = APIC_CLUSTER(my_id);
#ifdef CONFIG_SMP
u8 lid;
@@ -206,7 +205,7 @@ static void summit_init_apic_ldr(void)
/* Create logical APIC IDs by counting CPUs already in cluster. */
for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
- lid = cpu_2_logical_apicid[i];
+ lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
++count;
}
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void)
/* We only have a 4 wide bitmap in cluster mode. If a deranged
* BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
- id = my_cluster | (1UL << count);
+ return my_cluster | (1UL << count);
+}
+
+static void summit_init_apic_ldr(void)
+{
+ int cpu = smp_processor_id();
+ unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ unsigned long val;
+
apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
@@ -232,27 +239,6 @@ static void summit_setup_apic_routing(void)
nr_ioapics);
}
-static int summit_apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
-#else
- return 0;
-#endif
-}
-
-/* Mapping from cpu number to logical apicid */
-static inline int summit_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
static int summit_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
@@ -286,7 +272,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
- int new_apicid = summit_cpu_to_logical_apicid(cpu);
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
printk("%s: Not a valid mask!\n", __func__);
@@ -301,7 +287,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
- int apicid = summit_cpu_to_logical_apicid(0);
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -528,8 +514,6 @@ struct apic apic_summit = {
.ioapic_phys_id_map = summit_ioapic_phys_id_map,
.setup_apic_routing = summit_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = summit_apicid_to_node,
- .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
.cpu_present_to_apicid = summit_cpu_present_to_apicid,
.apicid_to_cpu_present = summit_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -565,4 +549,7 @@ struct apic apic_summit = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = summit_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59f4910..90949bbd566d 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -206,8 +206,6 @@ struct apic apic_x2apic_cluster = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38c5ced..c7e6d6645bf4 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -195,8 +195,6 @@ struct apic apic_x2apic_phys = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index bd16b58b8850..3c289281394c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -338,8 +338,6 @@ struct apic __refdata apic_x2apic_uv_x = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 15f47f741983..9079926a5b18 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -976,20 +976,10 @@ recalc:
static void apm_power_off(void)
{
- unsigned char po_bios_call[] = {
- 0xb8, 0x00, 0x10, /* movw $0x1000,ax */
- 0x8e, 0xd0, /* movw ax,ss */
- 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */
- 0xb8, 0x07, 0x53, /* movw $0x5307,ax */
- 0xbb, 0x01, 0x00, /* movw $0x0001,bx */
- 0xb9, 0x03, 0x00, /* movw $0x0003,cx */
- 0xcd, 0x15 /* int $0x15 */
- };
-
/* Some bioses don't like being called from CPU != 0 */
if (apm_info.realmode_power_off) {
set_cpus_allowed_ptr(current, cpumask_of(0));
- machine_real_restart(po_bios_call, sizeof(po_bios_call));
+ machine_real_restart(MRR_APM);
} else {
(void)set_system_power_state(APM_STATE_OFF);
}
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index cfa82c899f47..4f13fafc5264 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -1,5 +1,70 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+#define COMPILE_OFFSETS
+
+#include <linux/crypto.h>
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/hardirq.h>
+#include <linux/suspend.h>
+#include <linux/kbuild.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/sigframe.h>
+#include <asm/bootparam.h>
+#include <asm/suspend.h>
+
+#ifdef CONFIG_XEN
+#include <xen/interface/xen.h>
+#endif
+
#ifdef CONFIG_X86_32
# include "asm-offsets_32.c"
#else
# include "asm-offsets_64.c"
#endif
+
+void common(void) {
+ BLANK();
+ OFFSET(TI_flags, thread_info, flags);
+ OFFSET(TI_status, thread_info, status);
+ OFFSET(TI_addr_limit, thread_info, addr_limit);
+ OFFSET(TI_preempt_count, thread_info, preempt_count);
+
+ BLANK();
+ OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
+
+ BLANK();
+ OFFSET(pbe_address, pbe, address);
+ OFFSET(pbe_orig_address, pbe, orig_address);
+ OFFSET(pbe_next, pbe, next);
+
+#ifdef CONFIG_PARAVIRT
+ BLANK();
+ OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+ OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+ OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+ OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+ OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+ OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+ OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
+ OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+#endif
+
+#ifdef CONFIG_XEN
+ BLANK();
+ OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
+ OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+#endif
+
+ BLANK();
+ OFFSET(BP_scratch, boot_params, scratch);
+ OFFSET(BP_loadflags, boot_params, hdr.loadflags);
+ OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
+ OFFSET(BP_version, boot_params, hdr.version);
+ OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
+}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37a..c29d631af6fc 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,26 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/personality.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
#include <asm/ucontext.h>
-#include <asm/sigframe.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/bootparam.h>
-#include <asm/elf.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
@@ -51,21 +29,10 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
BLANK();
- OFFSET(TI_task, thread_info, task);
- OFFSET(TI_exec_domain, thread_info, exec_domain);
- OFFSET(TI_flags, thread_info, flags);
- OFFSET(TI_status, thread_info, status);
- OFFSET(TI_preempt_count, thread_info, preempt_count);
- OFFSET(TI_addr_limit, thread_info, addr_limit);
- OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
OFFSET(TI_cpu, thread_info, cpu);
BLANK();
- OFFSET(GDS_size, desc_ptr, size);
- OFFSET(GDS_address, desc_ptr, address);
- BLANK();
-
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
@@ -85,42 +52,13 @@ void foo(void)
OFFSET(PT_OLDSS, pt_regs, ss);
BLANK();
- OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
BLANK();
- OFFSET(pbe_address, pbe, address);
- OFFSET(pbe_orig_address, pbe, orig_address);
- OFFSET(pbe_next, pbe, next);
-
/* Offset from the sysenter stack to tss.sp0 */
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
sizeof(struct tss_struct));
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
- DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
- DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
-
- OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
-
-#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
- OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
-#endif
-
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#endif
-
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
@@ -139,11 +77,4 @@ void foo(void)
OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
#endif
-
- BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd965..e72a1194af22 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,27 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-#define COMPILE_OFFSETS
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
-#include <asm/processor.h>
-#include <asm/segment.h>
-#include <asm/thread_info.h>
#include <asm/ia32.h>
-#include <asm/bootparam.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
-
-#include <asm/sigframe.h>
#define __NO_STUBS 1
#undef __SYSCALL
@@ -33,41 +10,19 @@ static char syscalls[] = {
int main(void)
{
-#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
- ENTRY(state);
- ENTRY(flags);
- ENTRY(pid);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
- ENTRY(flags);
- ENTRY(addr_limit);
- ENTRY(preempt_count);
- ENTRY(status);
-#ifdef CONFIG_IA32_EMULATION
- ENTRY(sysenter_return);
-#endif
- BLANK();
-#undef ENTRY
#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
- OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+ BLANK();
#endif
-
#ifdef CONFIG_IA32_EMULATION
-#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
+ OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ BLANK();
+
+#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
ENTRY(ax);
ENTRY(bx);
ENTRY(cx);
@@ -79,15 +34,12 @@ int main(void)
ENTRY(ip);
BLANK();
#undef ENTRY
- DEFINE(IA32_RT_SIGFRAME_sigcontext,
- offsetof (struct rt_sigframe_ia32, uc.uc_mcontext));
+
+ OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
BLANK();
#endif
- DEFINE(pbe_address, offsetof(struct pbe, address));
- DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
- DEFINE(pbe_next, offsetof(struct pbe, next));
- BLANK();
-#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
+
+#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
ENTRY(bx);
ENTRY(cx);
@@ -107,7 +59,8 @@ int main(void)
ENTRY(flags);
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry))
+
+#define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry)
ENTRY(cr0);
ENTRY(cr2);
ENTRY(cr3);
@@ -115,26 +68,11 @@ int main(void)
ENTRY(cr8);
BLANK();
#undef ENTRY
- DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist));
- BLANK();
- DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
- BLANK();
- DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+ OFFSET(TSS_ist, tss_struct, x86_tss.ist);
BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
- BLANK();
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#undef ENTRY
-#endif
+ DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb83c5a..f771ab6b49e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -233,18 +233,22 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
}
#endif
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
+/*
+ * To workaround broken NUMA config. Read the comment in
+ * srat_detect_node().
+ */
static int __cpuinit nearby_node(int apicid)
{
int i, node;
for (i = apicid - 1; i >= 0; i--) {
- node = apicid_to_node[i];
+ node = __apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
- node = apicid_to_node[i];
+ node = __apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
@@ -261,7 +265,7 @@ static int __cpuinit nearby_node(int apicid)
#ifdef CONFIG_X86_HT
static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
{
- u32 nodes;
+ u32 nodes, cores_per_cu = 1;
u8 node_id;
int cpu = smp_processor_id();
@@ -276,6 +280,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* get compute unit information */
smp_num_siblings = ((ebx >> 8) & 3) + 1;
c->compute_unit_id = ebx & 0xff;
+ cores_per_cu += ((ebx >> 8) & 3);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -288,15 +293,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* fixup multi-node processor information */
if (nodes > 1) {
u32 cores_per_node;
+ u32 cus_per_node;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cores_per_node = c->x86_max_cores / nodes;
+ cus_per_node = cores_per_node / cores_per_cu;
/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = node_id;
- /* core id to be in range from 0 to (cores_per_node - 1) */
- c->cpu_core_id = c->cpu_core_id % cores_per_node;
+ /* core id has to be in the [0 .. cores_per_node - 1] range */
+ c->cpu_core_id %= cores_per_node;
+ c->compute_unit_id %= cus_per_node;
}
}
#endif
@@ -334,31 +342,40 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id);
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
int cpu = smp_processor_id();
int node;
unsigned apicid = c->apicid;
- node = per_cpu(cpu_llc_id, cpu);
+ node = numa_cpu_node(cpu);
+ if (node == NUMA_NO_NODE)
+ node = per_cpu(cpu_llc_id, cpu);
- if (apicid_to_node[apicid] != NUMA_NO_NODE)
- node = apicid_to_node[apicid];
if (!node_online(node)) {
- /* Two possibilities here:
- - The CPU is missing memory and no node was created.
- In that case try picking one from a nearby CPU
- - The APIC IDs differ from the HyperTransport node IDs
- which the K8 northbridge parsing fills in.
- Assume they are all increased by a constant offset,
- but in the same order as the HT nodeids.
- If that doesn't result in a usable node fall back to the
- path for the previous case. */
-
+ /*
+ * Two possibilities here:
+ *
+ * - The CPU is missing memory and no node was created. In
+ * that case try picking one from a nearby CPU.
+ *
+ * - The APIC IDs differ from the HyperTransport node IDs
+ * which the K8 northbridge parsing fills in. Assume
+ * they are all increased by a constant offset, but in
+ * the same order as the HT nodeids. If that doesn't
+ * result in a usable node fall back to the path for the
+ * previous case.
+ *
+ * This workaround operates directly on the mapping between
+ * APIC ID and NUMA node, assuming certain relationship
+ * between APIC ID, HT node ID and NUMA topology. As going
+ * through CPU mapping may alter the outcome, directly
+ * access __apicid_to_node[].
+ */
int ht_nodeid = c->initial_apicid;
if (ht_nodeid >= 0 &&
- apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- node = apicid_to_node[ht_nodeid];
+ __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = __apicid_to_node[ht_nodeid];
/* Pick a nearby node */
if (!node_online(node))
node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396bd..a2559c3ed500 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
select_idle_routine(c);
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c53d6bf..df86bc8c859d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -276,14 +276,13 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
unsigned node;
int cpu = smp_processor_id();
- int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
- node = apicid_to_node[apicid];
+ node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE || !node_online(node)) {
/* reuse the value from init_cpu_to_node() */
node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ec2c19a7b8ef..1ce1af2899df 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -304,8 +304,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
struct _cache_attr {
struct attribute attr;
- ssize_t (*show)(struct _cpuid4_info *, char *);
- ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+ ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
+ ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
+ unsigned int);
};
#ifdef CONFIG_AMD_NB
@@ -400,7 +401,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
#define SHOW_CACHE_DISABLE(slot) \
static ssize_t \
-show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \
+show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return show_cache_disable(this_leaf, buf, slot); \
}
@@ -512,7 +514,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
#define STORE_CACHE_DISABLE(slot) \
static ssize_t \
store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
- const char *buf, size_t count) \
+ const char *buf, size_t count, \
+ unsigned int cpu) \
{ \
return store_cache_disable(this_leaf, buf, count, slot); \
}
@@ -524,6 +527,39 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);
+static ssize_t
+show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
+{
+ if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
+}
+
+static ssize_t
+store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
+ unsigned int cpu)
+{
+ unsigned long val;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ if (strict_strtoul(buf, 16, &val) < 0)
+ return -EINVAL;
+
+ if (amd_set_subcaches(cpu, val))
+ return -EINVAL;
+
+ return count;
+}
+
+static struct _cache_attr subcaches =
+ __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
+
#else /* CONFIG_AMD_NB */
#define amd_init_l3_cache(x, y)
#endif /* CONFIG_AMD_NB */
@@ -532,9 +568,9 @@ static int
__cpuinit cpuid4_cache_lookup_regs(int index,
struct _cpuid4_info_regs *this_leaf)
{
- union _cpuid4_leaf_eax eax;
- union _cpuid4_leaf_ebx ebx;
- union _cpuid4_leaf_ecx ecx;
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
unsigned edx;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
@@ -732,11 +768,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
struct cpuinfo_x86 *c = &cpu_data(cpu);
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
- for_each_cpu(i, c->llc_shared_map) {
+ for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
- for_each_cpu(sibling, c->llc_shared_map) {
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
if (!cpu_online(sibling))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
@@ -870,8 +906,8 @@ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
#define show_one_plus(file_name, object, val) \
-static ssize_t show_##file_name \
- (struct _cpuid4_info *this_leaf, char *buf) \
+static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
}
@@ -882,7 +918,8 @@ show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
return sprintf(buf, "%luK\n", this_leaf->size / 1024);
}
@@ -906,17 +943,20 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
return n;
}
-static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 0, buf);
}
-static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 1, buf);
}
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
switch (this_leaf->eax.split.type) {
case CACHE_TYPE_DATA:
@@ -974,6 +1014,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
n += 2;
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ n += 1;
+
attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
if (attrs == NULL)
return attrs = default_attrs;
@@ -986,6 +1029,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
attrs[n++] = &cache_disable_1.attr;
}
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ attrs[n++] = &subcaches.attr;
+
return attrs;
}
#endif
@@ -998,7 +1044,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
ret = fattr->show ?
fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf) :
+ buf, this_leaf->cpu) :
0;
return ret;
}
@@ -1012,7 +1058,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
ret = fattr->store ?
fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf, count) :
+ buf, count, this_leaf->cpu) :
0;
return ret;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5bf2fac52aca..167f97b5596e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int i, err = 0;
struct threshold_bank *b = NULL;
char name[32];
-#ifdef CONFIG_SMP
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-#endif
sprintf(name, "threshold_bank%i", bank);
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
- i = cpumask_first(c->llc_shared_map);
+ i = cpumask_first(cpu_llc_shared_mask(cpu));
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
- cpumask_copy(b->cpus, c->llc_shared_map);
+ cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d977a2ea693..10bfe2472d16 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -298,7 +298,7 @@ x86_perf_event_update(struct perf_event *event)
*/
again:
prev_raw_count = local64_read(&hwc->prev_count);
- rdmsrl(hwc->event_base + idx, new_raw_count);
+ rdmsrl(hwc->event_base, new_raw_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -321,6 +321,24 @@ again:
return new_raw_count;
}
+/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
+static inline int x86_pmu_addr_offset(int index)
+{
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ return index << 1;
+ return index;
+}
+
+static inline unsigned int x86_pmu_config_addr(int index)
+{
+ return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+ return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+}
+
static atomic_t active_events;
static DEFINE_MUTEX(pmc_reserve_mutex);
@@ -331,12 +349,12 @@ static bool reserve_pmc_hardware(void)
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+ if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail;
}
for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+ if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail;
}
@@ -344,13 +362,13 @@ static bool reserve_pmc_hardware(void)
eventsel_fail:
for (i--; i >= 0; i--)
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_evntsel_nmi(x86_pmu_config_addr(i));
i = x86_pmu.num_counters;
perfctr_fail:
for (i--; i >= 0; i--)
- release_perfctr_nmi(x86_pmu.perfctr + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));
return false;
}
@@ -360,8 +378,8 @@ static void release_pmc_hardware(void)
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
- release_perfctr_nmi(x86_pmu.perfctr + i);
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));
+ release_evntsel_nmi(x86_pmu_config_addr(i));
}
}
@@ -382,7 +400,7 @@ static bool check_hw_exists(void)
* complain and bail.
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
- reg = x86_pmu.eventsel + i;
+ reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
@@ -407,8 +425,8 @@ static bool check_hw_exists(void)
* that don't trap on the MSR access and always return 0s.
*/
val = 0xabcdUL;
- ret = checking_wrmsrl(x86_pmu.perfctr, val);
- ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+ ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
+ ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
if (ret || val != val_new)
goto msr_fail;
@@ -617,11 +635,11 @@ static void x86_pmu_disable_all(void)
if (!test_bit(idx, cpuc->active_mask))
continue;
- rdmsrl(x86_pmu.eventsel + idx, val);
+ rdmsrl(x86_pmu_config_addr(idx), val);
if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ wrmsrl(x86_pmu_config_addr(idx), val);
}
}
@@ -642,21 +660,24 @@ static void x86_pmu_disable(struct pmu *pmu)
x86_pmu.disable_all();
}
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+ u64 enable_mask)
+{
+ wrmsrl(hwc->config_base, hwc->config | enable_mask);
+}
+
static void x86_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_event *event = cpuc->events[idx];
- u64 val;
+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask))
continue;
- val = event->hw.config;
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
}
@@ -821,15 +842,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->event_base = 0;
} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- /*
- * We set it so that event_base + idx in wrmsr/rdmsr maps to
- * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
- */
- hwc->event_base =
- MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0;
} else {
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
+ hwc->config_base = x86_pmu_config_addr(hwc->idx);
+ hwc->event_base = x86_pmu_event_addr(hwc->idx);
}
}
@@ -915,17 +931,11 @@ static void x86_pmu_enable(struct pmu *pmu)
x86_pmu.enable_all(added);
}
-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
- u64 enable_mask)
-{
- wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
-}
-
static inline void x86_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base + hwc->idx, hwc->config);
+ wrmsrl(hwc->config_base, hwc->config);
}
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -978,7 +988,7 @@ x86_perf_event_set_period(struct perf_event *event)
*/
local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Due to erratum on certan cpu we need
@@ -986,7 +996,7 @@ x86_perf_event_set_period(struct perf_event *event)
* is updated properly
*/
if (x86_pmu.perfctr_second_write) {
- wrmsrl(hwc->event_base + idx,
+ wrmsrl(hwc->event_base,
(u64)(-left) & x86_pmu.cntval_mask);
}
@@ -1113,8 +1123,8 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
- rdmsrl(x86_pmu.perfctr + idx, pmc_count);
+ rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+ rdmsrl(x86_pmu_event_addr(idx), pmc_count);
prev_left = per_cpu(pmc_prev_left[idx], cpu);
@@ -1389,7 +1399,7 @@ static void __init pmu_check_apic(void)
pr_info("no hardware sampling interrupt available.\n");
}
-int __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
{
struct event_constraint *c;
int err;
@@ -1608,7 +1618,7 @@ out:
return ret;
}
-int x86_pmu_event_init(struct perf_event *event)
+static int x86_pmu_event_init(struct perf_event *event)
{
struct pmu *tmp;
int err;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67e2202a6039..461f62bbd774 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event)
/*
* AMD64 events are detected based on their event codes.
*/
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+ return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
{
return (hwc->config & 0xe0) == 0xe0;
@@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
};
+/* AMD Family 15h */
+
+#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
+
+#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
+#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
+#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
+#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
+#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
+#define AMD_EVENT_EX_LS 0x000000C0ULL
+#define AMD_EVENT_DE 0x000000D0ULL
+#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
+
+/*
+ * AMD family 15h event code/PMC mappings:
+ *
+ * type = event_code & 0x0F0:
+ *
+ * 0x000 FP PERF_CTL[5:3]
+ * 0x010 FP PERF_CTL[5:3]
+ * 0x020 LS PERF_CTL[5:0]
+ * 0x030 LS PERF_CTL[5:0]
+ * 0x040 DC PERF_CTL[5:0]
+ * 0x050 DC PERF_CTL[5:0]
+ * 0x060 CU PERF_CTL[2:0]
+ * 0x070 CU PERF_CTL[2:0]
+ * 0x080 IC/DE PERF_CTL[2:0]
+ * 0x090 IC/DE PERF_CTL[2:0]
+ * 0x0A0 ---
+ * 0x0B0 ---
+ * 0x0C0 EX/LS PERF_CTL[5:0]
+ * 0x0D0 DE PERF_CTL[2:0]
+ * 0x0E0 NB NB_PERF_CTL[3:0]
+ * 0x0F0 NB NB_PERF_CTL[3:0]
+ *
+ * Exceptions:
+ *
+ * 0x003 FP PERF_CTL[3]
+ * 0x00B FP PERF_CTL[3]
+ * 0x00D FP PERF_CTL[3]
+ * 0x023 DE PERF_CTL[2:0]
+ * 0x02D LS PERF_CTL[3]
+ * 0x02E LS PERF_CTL[3,0]
+ * 0x043 CU PERF_CTL[2:0]
+ * 0x045 CU PERF_CTL[2:0]
+ * 0x046 CU PERF_CTL[2:0]
+ * 0x054 CU PERF_CTL[2:0]
+ * 0x055 CU PERF_CTL[2:0]
+ * 0x08F IC PERF_CTL[0]
+ * 0x187 DE PERF_CTL[0]
+ * 0x188 DE PERF_CTL[0]
+ * 0x0DB EX PERF_CTL[5:0]
+ * 0x0DC LS PERF_CTL[5:0]
+ * 0x0DD LS PERF_CTL[5:0]
+ * 0x0DE LS PERF_CTL[5:0]
+ * 0x0DF LS PERF_CTL[5:0]
+ * 0x1D6 EX PERF_CTL[5:0]
+ * 0x1D8 EX PERF_CTL[5:0]
+ */
+
+static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
+static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
+static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
+static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ unsigned int event_code = amd_get_event_code(&event->hw);
+
+ switch (event_code & AMD_EVENT_TYPE_MASK) {
+ case AMD_EVENT_FP:
+ switch (event_code) {
+ case 0x003:
+ case 0x00B:
+ case 0x00D:
+ return &amd_f15_PMC3;
+ default:
+ return &amd_f15_PMC53;
+ }
+ case AMD_EVENT_LS:
+ case AMD_EVENT_DC:
+ case AMD_EVENT_EX_LS:
+ switch (event_code) {
+ case 0x023:
+ case 0x043:
+ case 0x045:
+ case 0x046:
+ case 0x054:
+ case 0x055:
+ return &amd_f15_PMC20;
+ case 0x02D:
+ return &amd_f15_PMC3;
+ case 0x02E:
+ return &amd_f15_PMC30;
+ default:
+ return &amd_f15_PMC50;
+ }
+ case AMD_EVENT_CU:
+ case AMD_EVENT_IC_DE:
+ case AMD_EVENT_DE:
+ switch (event_code) {
+ case 0x08F:
+ case 0x187:
+ case 0x188:
+ return &amd_f15_PMC0;
+ case 0x0DB ... 0x0DF:
+ case 0x1D6:
+ case 0x1D8:
+ return &amd_f15_PMC50;
+ default:
+ return &amd_f15_PMC20;
+ }
+ case AMD_EVENT_NB:
+ /* not yet implemented */
+ return &emptyconstraint;
+ default:
+ return &emptyconstraint;
+ }
+}
+
+static __initconst const struct x86_pmu amd_pmu_f15h = {
+ .name = "AMD Family 15h",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = x86_pmu_enable_all,
+ .enable = x86_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = amd_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_F15H_PERF_CTL,
+ .perfctr = MSR_F15H_PERF_CTR,
+ .event_map = amd_pmu_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = 6,
+ .cntval_bits = 48,
+ .cntval_mask = (1ULL << 48) - 1,
+ .apic = 1,
+ /* use highest bit to detect overflow */
+ .max_period = (1ULL << 47) - 1,
+ .get_event_constraints = amd_get_event_constraints_f15h,
+ /* nortbridge counters not yet implemented: */
+#if 0
+ .put_event_constraints = amd_put_event_constraints,
+
+ .cpu_prepare = amd_pmu_cpu_prepare,
+ .cpu_starting = amd_pmu_cpu_starting,
+ .cpu_dead = amd_pmu_cpu_dead,
+#endif
+};
+
static __init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;
- x86_pmu = amd_pmu;
+ /*
+ * If core performance counter extensions exists, it must be
+ * family 15h, otherwise fail. See x86_pmu_addr_offset().
+ */
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ if (!cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu_f15h;
+ break;
+ default:
+ if (cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu;
+ break;
+ }
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 008835c1d79c..084b38362db7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -691,8 +691,8 @@ static void intel_pmu_reset(void)
printk("clearing PMU state on CPU#%d\n", smp_processor_id());
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
- checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
+ checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
+ checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ff751a9f182b..3769ac822f96 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -764,9 +764,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
u64 v;
/* an official way for overflow indication */
- rdmsrl(hwc->config_base + hwc->idx, v);
+ rdmsrl(hwc->config_base, v);
if (v & P4_CCCR_OVF) {
- wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
+ wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
return 1;
}
@@ -815,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
* state we need to clear P4_CCCR_OVF, otherwise interrupt get
* asserted again and again
*/
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(u64)(p4_config_unpack_cccr(hwc->config)) &
~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
}
@@ -885,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
p4_pmu_enable_pebs(hwc->config);
(void)checking_wrmsrl(escr_addr, escr_conf);
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 34ba07be2cda..20c097e33860 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}
static void p6_pmu_enable_event(struct perf_event *event)
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}
static __initconst const struct x86_pmu p6_pmu = {
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d5a236615501..966512b2cacf 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
/* returns the bit offset of the performance counter register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTR)
+ return (msr - MSR_F15H_PERF_CTR) >> 1;
return msr - MSR_K7_PERFCTR0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
/* returns the bit offset of the event selection register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTL)
+ return (msr - MSR_F15H_PERF_CTL) >> 1;
return msr - MSR_K7_EVNTSEL0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
new file mode 100644
index 000000000000..7a8cebc9ff29
--- /dev/null
+++ b/arch/x86/kernel/devicetree.c
@@ -0,0 +1,441 @@
+/*
+ * Architecture specific OF callbacks.
+ */
+#include <linux/bootmem.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/of_pci.h>
+
+#include <asm/hpet.h>
+#include <asm/irq_controller.h>
+#include <asm/apic.h>
+#include <asm/pci_x86.h>
+
+__initdata u64 initial_dtb;
+char __initdata cmd_line[COMMAND_LINE_SIZE];
+static LIST_HEAD(irq_domains);
+static DEFINE_RAW_SPINLOCK(big_irq_lock);
+
+int __initdata of_ioapic;
+
+#ifdef CONFIG_X86_IO_APIC
+static void add_interrupt_host(struct irq_domain *ih)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&big_irq_lock, flags);
+ list_add(&ih->l, &irq_domains);
+ raw_spin_unlock_irqrestore(&big_irq_lock, flags);
+}
+#endif
+
+static struct irq_domain *get_ih_from_node(struct device_node *controller)
+{
+ struct irq_domain *ih, *found = NULL;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&big_irq_lock, flags);
+ list_for_each_entry(ih, &irq_domains, l) {
+ if (ih->controller == controller) {
+ found = ih;
+ break;
+ }
+ }
+ raw_spin_unlock_irqrestore(&big_irq_lock, flags);
+ return found;
+}
+
+unsigned int irq_create_of_mapping(struct device_node *controller,
+ const u32 *intspec, unsigned int intsize)
+{
+ struct irq_domain *ih;
+ u32 virq, type;
+ int ret;
+
+ ih = get_ih_from_node(controller);
+ if (!ih)
+ return 0;
+ ret = ih->xlate(ih, intspec, intsize, &virq, &type);
+ if (ret)
+ return ret;
+ if (type == IRQ_TYPE_NONE)
+ return virq;
+ /* set the mask if it is different from current */
+ if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
+ set_irq_type(virq, type);
+ return virq;
+}
+EXPORT_SYMBOL_GPL(irq_create_of_mapping);
+
+unsigned long pci_address_to_pio(phys_addr_t address)
+{
+ /*
+ * The ioport address can be directly used by inX / outX
+ */
+ BUG_ON(address >= (1 << 16));
+ return (unsigned long)address;
+}
+EXPORT_SYMBOL_GPL(pci_address_to_pio);
+
+void __init early_init_dt_scan_chosen_arch(unsigned long node)
+{
+ BUG();
+}
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+ BUG();
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+ return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
+}
+
+void __init add_dtb(u64 data)
+{
+ initial_dtb = data + offsetof(struct setup_data, data);
+}
+
+/*
+ * CE4100 ids. Will be moved to machine_device_initcall() once we have it.
+ */
+static struct of_device_id __initdata ce4100_ids[] = {
+ { .compatible = "intel,ce4100-cp", },
+ { .compatible = "isa", },
+ { .compatible = "pci", },
+ {},
+};
+
+static int __init add_bus_probe(void)
+{
+ if (!of_have_populated_dt())
+ return 0;
+
+ return of_platform_bus_probe(NULL, ce4100_ids, NULL);
+}
+module_init(add_bus_probe);
+
+#ifdef CONFIG_PCI
+static int x86_of_pci_irq_enable(struct pci_dev *dev)
+{
+ struct of_irq oirq;
+ u32 virq;
+ int ret;
+ u8 pin;
+
+ ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (ret)
+ return ret;
+ if (!pin)
+ return 0;
+
+ ret = of_irq_map_pci(dev, &oirq);
+ if (ret)
+ return ret;
+
+ virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+ oirq.size);
+ if (virq == 0)
+ return -EINVAL;
+ dev->irq = virq;
+ return 0;
+}
+
+static void x86_of_pci_irq_disable(struct pci_dev *dev)
+{
+}
+
+void __cpuinit x86_of_pci_init(void)
+{
+ struct device_node *np;
+
+ pcibios_enable_irq = x86_of_pci_irq_enable;
+ pcibios_disable_irq = x86_of_pci_irq_disable;
+
+ for_each_node_by_type(np, "pci") {
+ const void *prop;
+ struct pci_bus *bus;
+ unsigned int bus_min;
+ struct device_node *child;
+
+ prop = of_get_property(np, "bus-range", NULL);
+ if (!prop)
+ continue;
+ bus_min = be32_to_cpup(prop);
+
+ bus = pci_find_bus(0, bus_min);
+ if (!bus) {
+ printk(KERN_ERR "Can't find a node for bus %s.\n",
+ np->full_name);
+ continue;
+ }
+
+ if (bus->self)
+ bus->self->dev.of_node = np;
+ else
+ bus->dev.of_node = np;
+
+ for_each_child_of_node(np, child) {
+ struct pci_dev *dev;
+ u32 devfn;
+
+ prop = of_get_property(child, "reg", NULL);
+ if (!prop)
+ continue;
+
+ devfn = (be32_to_cpup(prop) >> 8) & 0xff;
+ dev = pci_get_slot(bus, devfn);
+ if (!dev)
+ continue;
+ dev->dev.of_node = child;
+ pci_dev_put(dev);
+ }
+ }
+}
+#endif
+
+static void __init dtb_setup_hpet(void)
+{
+#ifdef CONFIG_HPET_TIMER
+ struct device_node *dn;
+ struct resource r;
+ int ret;
+
+ dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-hpet");
+ if (!dn)
+ return;
+ ret = of_address_to_resource(dn, 0, &r);
+ if (ret) {
+ WARN_ON(1);
+ return;
+ }
+ hpet_address = r.start;
+#endif
+}
+
+static void __init dtb_lapic_setup(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ struct device_node *dn;
+ struct resource r;
+ int ret;
+
+ dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-lapic");
+ if (!dn)
+ return;
+
+ ret = of_address_to_resource(dn, 0, &r);
+ if (WARN_ON(ret))
+ return;
+
+ /* Did the boot loader setup the local APIC ? */
+ if (!cpu_has_apic) {
+ if (apic_force_enable(r.start))
+ return;
+ }
+ smp_found_config = 1;
+ pic_mode = 1;
+ register_lapic_address(r.start);
+ generic_processor_info(boot_cpu_physical_apicid,
+ GET_APIC_VERSION(apic_read(APIC_LVR)));
+#endif
+}
+
+#ifdef CONFIG_X86_IO_APIC
+static unsigned int ioapic_id;
+
+static void __init dtb_add_ioapic(struct device_node *dn)
+{
+ struct resource r;
+ int ret;
+
+ ret = of_address_to_resource(dn, 0, &r);
+ if (ret) {
+ printk(KERN_ERR "Can't obtain address from node %s.\n",
+ dn->full_name);
+ return;
+ }
+ mp_register_ioapic(++ioapic_id, r.start, gsi_top);
+}
+
+static void __init dtb_ioapic_setup(void)
+{
+ struct device_node *dn;
+
+ for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
+ dtb_add_ioapic(dn);
+
+ if (nr_ioapics) {
+ of_ioapic = 1;
+ return;
+ }
+ printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
+}
+#else
+static void __init dtb_ioapic_setup(void) {}
+#endif
+
+static void __init dtb_apic_setup(void)
+{
+ dtb_lapic_setup();
+ dtb_ioapic_setup();
+}
+
+#ifdef CONFIG_OF_FLATTREE
+static void __init x86_flattree_get_config(void)
+{
+ u32 size, map_len;
+ void *new_dtb;
+
+ if (!initial_dtb)
+ return;
+
+ map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK),
+ (u64)sizeof(struct boot_param_header));
+
+ initial_boot_params = early_memremap(initial_dtb, map_len);
+ size = be32_to_cpu(initial_boot_params->totalsize);
+ if (map_len < size) {
+ early_iounmap(initial_boot_params, map_len);
+ initial_boot_params = early_memremap(initial_dtb, size);
+ map_len = size;
+ }
+
+ new_dtb = alloc_bootmem(size);
+ memcpy(new_dtb, initial_boot_params, size);
+ early_iounmap(initial_boot_params, map_len);
+
+ initial_boot_params = new_dtb;
+
+ /* root level address cells */
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+
+ unflatten_device_tree();
+}
+#else
+static inline void x86_flattree_get_config(void) { }
+#endif
+
+void __init x86_dtb_init(void)
+{
+ x86_flattree_get_config();
+
+ if (!of_have_populated_dt())
+ return;
+
+ dtb_setup_hpet();
+ dtb_apic_setup();
+}
+
+#ifdef CONFIG_X86_IO_APIC
+
+struct of_ioapic_type {
+ u32 out_type;
+ u32 trigger;
+ u32 polarity;
+};
+
+static struct of_ioapic_type of_ioapic_type[] =
+{
+ {
+ .out_type = IRQ_TYPE_EDGE_RISING,
+ .trigger = IOAPIC_EDGE,
+ .polarity = 1,
+ },
+ {
+ .out_type = IRQ_TYPE_LEVEL_LOW,
+ .trigger = IOAPIC_LEVEL,
+ .polarity = 0,
+ },
+ {
+ .out_type = IRQ_TYPE_LEVEL_HIGH,
+ .trigger = IOAPIC_LEVEL,
+ .polarity = 1,
+ },
+ {
+ .out_type = IRQ_TYPE_EDGE_FALLING,
+ .trigger = IOAPIC_EDGE,
+ .polarity = 0,
+ },
+};
+
+static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
+ u32 *out_hwirq, u32 *out_type)
+{
+ struct io_apic_irq_attr attr;
+ struct of_ioapic_type *it;
+ u32 line, idx, type;
+
+ if (intsize < 2)
+ return -EINVAL;
+
+ line = *intspec;
+ idx = (u32) id->priv;
+ *out_hwirq = line + mp_gsi_routing[idx].gsi_base;
+
+ intspec++;
+ type = *intspec;
+
+ if (type >= ARRAY_SIZE(of_ioapic_type))
+ return -EINVAL;
+
+ it = of_ioapic_type + type;
+ *out_type = it->out_type;
+
+ set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
+
+ return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr);
+}
+
+static void __init ioapic_add_ofnode(struct device_node *np)
+{
+ struct resource r;
+ int i, ret;
+
+ ret = of_address_to_resource(np, 0, &r);
+ if (ret) {
+ printk(KERN_ERR "Failed to obtain address for %s\n",
+ np->full_name);
+ return;
+ }
+
+ for (i = 0; i < nr_ioapics; i++) {
+ if (r.start == mp_ioapics[i].apicaddr) {
+ struct irq_domain *id;
+
+ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ BUG_ON(!id);
+ id->controller = np;
+ id->xlate = ioapic_xlate;
+ id->priv = (void *)i;
+ add_interrupt_host(id);
+ return;
+ }
+ }
+ printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name);
+}
+
+void __init x86_add_irq_domains(void)
+{
+ struct device_node *dp;
+
+ if (!of_have_populated_dt())
+ return;
+
+ for_each_node_with_property(dp, "interrupt-controller") {
+ if (of_device_is_compatible(dp, "intel,ce4100-ioapic"))
+ ioapic_add_ofnode(dp);
+ }
+}
+#else
+void __init x86_add_irq_domains(void) { }
+#endif
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1b..220a1c11cfde 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -320,31 +320,6 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig);
}
-void notrace __kprobes
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- unsigned long flags;
-
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- flags = oops_begin();
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- oops_end(flags, regs, 0);
- if (do_panic || panic_on_oops)
- panic("Non maskable interrupt");
- nmi_exit();
- local_irq_enable();
- do_exit(SIGBUS);
-}
-
static int __init oops_setup(char *s)
{
if (!s)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 294f26da0c0c..cdf5bfd9d4d5 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -667,21 +667,15 @@ __init void e820_setup_gap(void)
* boot_params.e820_map, others are passed via SETUP_E820_EXT node of
* linked list of struct setup_data, which is parsed here.
*/
-void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
+void __init parse_e820_ext(struct setup_data *sdata)
{
- u32 map_len;
int entries;
struct e820entry *extmap;
entries = sdata->len / sizeof(struct e820entry);
- map_len = sdata->len + sizeof(struct setup_data);
- if (map_len > PAGE_SIZE)
- sdata = early_ioremap(pa_data, map_len);
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- if (map_len > PAGE_SIZE)
- early_iounmap(sdata, map_len);
printk(KERN_INFO "extended physical RAM map:\n");
e820_print_map("extended");
}
@@ -847,15 +841,21 @@ static int __init parse_memopt(char *p)
if (!p)
return -EINVAL;
-#ifdef CONFIG_X86_32
if (!strcmp(p, "nopentium")) {
+#ifdef CONFIG_X86_32
setup_clear_cpu_cap(X86_FEATURE_PSE);
return 0;
- }
+#else
+ printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
+ return -EINVAL;
#endif
+ }
userdef = 1;
mem_size = memparse(p, &p);
+ /* don't remove all of memory when handling "mem={invalid}" param */
+ if (mem_size == 0)
+ return -EINVAL;
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
return 0;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c8b4efad7ebb..49bdedd2dbcf 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,7 +395,7 @@ sysenter_past_esp:
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
- pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
+ pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
CFI_REL_OFFSET eip, 0
pushl_cfi %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aed1ffbeb0c9..891268c645ea 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -975,9 +975,12 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
invalidate_interrupt\idx smp_invalidate_interrupt
+.endif
.endr
#endif
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 7f138b3c3c52..d6d6bb361931 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -34,15 +34,6 @@ void __init i386_start_kernel(void)
{
memblock_init();
-#ifdef CONFIG_X86_TRAMPOLINE
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
-#endif
-
memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 767d6c43de37..ce0be7cd085e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -73,7 +73,7 @@ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
*/
KERNEL_PAGES = LOWMEM_PAGES
-INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
+INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE
RESERVE_BRK(pagetables, INIT_MAP_SIZE)
/*
@@ -137,7 +137,7 @@ ENTRY(startup_32)
movsl
1:
-#ifdef CONFIG_OLPC_OPENFIRMWARE
+#ifdef CONFIG_OLPC
/* save OFW's pgdir table for later use when calling into OFW */
movl %cr3, %eax
movl %eax, pa(olpc_ofw_pgd)
@@ -623,7 +623,7 @@ ENTRY(initial_code)
* BSS section
*/
__PAGE_ALIGNED_BSS
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
#ifdef CONFIG_X86_PAE
initial_pg_pmd:
.fill 1024*KPMDS,4,0
@@ -644,7 +644,7 @@ ENTRY(swapper_pg_dir)
#ifdef CONFIG_X86_PAE
__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
ENTRY(initial_page_table)
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
# if KPMDS == 3
@@ -662,7 +662,7 @@ ENTRY(initial_page_table)
# else
# error "Kernel PMDs should be 1, 2 or 3"
# endif
- .align PAGE_SIZE_asm /* needs to be page-sized too */
+ .align PAGE_SIZE /* needs to be page-sized too */
#endif
.data
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 239046bd447f..e11e39478a49 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -136,10 +136,9 @@ ident_complete:
/* Fixup phys_base */
addq %rbp, phys_base(%rip)
-#ifdef CONFIG_X86_TRAMPOLINE
+ /* Fixup trampoline */
addq %rbp, trampoline_level4_pgt + 0(%rip)
addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
-#endif
/* Due to ENTRY(), sometimes the empty space gets filled with
* zeros. Better take a jmp than relying on empty space being
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8eec0ec59af2..8c968974253d 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,22 +14,9 @@
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/syscalls.h>
+#include <linux/bitmap.h>
#include <asm/syscalls.h>
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base,
- unsigned int extent, int new_value)
-{
- unsigned int i;
-
- for (i = base; i < base + extent; i++) {
- if (new_value)
- __set_bit(i, bitmap);
- else
- __clear_bit(i, bitmap);
- }
-}
-
/*
* this changes the io permissions bitmap in the current task.
*/
@@ -69,7 +56,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
*/
tss = &per_cpu(init_tss, get_cpu());
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+ if (turn_on)
+ bitmap_clear(t->io_bitmap_ptr, from, num);
+ else
+ bitmap_set(t->io_bitmap_ptr, from, num);
/*
* Search for a (possibly new) maximum. This is simple and stupid,
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 387b6a0c9e81..5478df47d6dd 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -276,15 +276,6 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
-#ifdef CONFIG_OF
-unsigned int irq_create_of_mapping(struct device_node *controller,
- const u32 *intspec, unsigned int intsize)
-{
- return intspec[0];
-}
-EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-#endif
-
#ifdef CONFIG_HOTPLUG_CPU
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
@@ -310,7 +301,7 @@ void fixup_irqs(void)
data = &desc->irq_data;
affinity = data->affinity;
if (!irq_has_action(irq) ||
- cpumask_equal(affinity, cpu_online_mask)) {
+ cpumask_subset(affinity, cpu_online_mask)) {
raw_spin_unlock(&desc->lock);
continue;
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c752e973958d..34f765a2d22e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -25,6 +25,7 @@
#include <asm/setup.h>
#include <asm/i8259.h>
#include <asm/traps.h>
+#include <asm/prom.h>
/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -118,6 +119,12 @@ void __init init_IRQ(void)
int i;
/*
+ * We probably need a better place for this, but it works for
+ * now ...
+ */
+ x86_add_irq_domains();
+
+ /*
* On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
@@ -164,14 +171,77 @@ static void __init smp_intr_init(void)
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPIs for invalidation */
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+#define ALLOC_INVTLB_VEC(NR) \
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \
+ invalidate_interrupt##NR)
+
+ switch (NUM_INVALIDATE_TLB_VECTORS) {
+ default:
+ ALLOC_INVTLB_VEC(31);
+ case 31:
+ ALLOC_INVTLB_VEC(30);
+ case 30:
+ ALLOC_INVTLB_VEC(29);
+ case 29:
+ ALLOC_INVTLB_VEC(28);
+ case 28:
+ ALLOC_INVTLB_VEC(27);
+ case 27:
+ ALLOC_INVTLB_VEC(26);
+ case 26:
+ ALLOC_INVTLB_VEC(25);
+ case 25:
+ ALLOC_INVTLB_VEC(24);
+ case 24:
+ ALLOC_INVTLB_VEC(23);
+ case 23:
+ ALLOC_INVTLB_VEC(22);
+ case 22:
+ ALLOC_INVTLB_VEC(21);
+ case 21:
+ ALLOC_INVTLB_VEC(20);
+ case 20:
+ ALLOC_INVTLB_VEC(19);
+ case 19:
+ ALLOC_INVTLB_VEC(18);
+ case 18:
+ ALLOC_INVTLB_VEC(17);
+ case 17:
+ ALLOC_INVTLB_VEC(16);
+ case 16:
+ ALLOC_INVTLB_VEC(15);
+ case 15:
+ ALLOC_INVTLB_VEC(14);
+ case 14:
+ ALLOC_INVTLB_VEC(13);
+ case 13:
+ ALLOC_INVTLB_VEC(12);
+ case 12:
+ ALLOC_INVTLB_VEC(11);
+ case 11:
+ ALLOC_INVTLB_VEC(10);
+ case 10:
+ ALLOC_INVTLB_VEC(9);
+ case 9:
+ ALLOC_INVTLB_VEC(8);
+ case 8:
+ ALLOC_INVTLB_VEC(7);
+ case 7:
+ ALLOC_INVTLB_VEC(6);
+ case 6:
+ ALLOC_INVTLB_VEC(5);
+ case 5:
+ ALLOC_INVTLB_VEC(4);
+ case 4:
+ ALLOC_INVTLB_VEC(3);
+ case 3:
+ ALLOC_INVTLB_VEC(2);
+ case 2:
+ ALLOC_INVTLB_VEC(1);
+ case 1:
+ ALLOC_INVTLB_VEC(0);
+ break;
+ }
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@ -243,7 +313,7 @@ void __init native_init_IRQ(void)
set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
}
- if (!acpi_ioapic)
+ if (!acpi_ioapic && !of_ioapic)
setup_irq(2, &irq2);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 3c2fb0f25abd..d4a28afe5db5 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
}
return NOTIFY_DONE;
- case DIE_NMIWATCHDOG:
- if (atomic_read(&kgdb_active) != -1) {
- /* KGDB CPU roundup: */
- kgdb_nmicallback(raw_smp_processor_id(), regs);
- return NOTIFY_STOP;
- }
- /* Enter debugger: */
- break;
-
case DIE_DEBUG:
if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
if (user_mode(regs))
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 0fe6d1a66c38..c5610384ab16 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -66,7 +66,6 @@ struct microcode_amd {
unsigned int mpb[0];
};
-#define UCODE_MAX_SIZE 2048
#define UCODE_CONTAINER_SECTION_HDR 8
#define UCODE_CONTAINER_HEADER_SIZE 12
@@ -77,20 +76,20 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 dummy;
- memset(csig, 0, sizeof(*csig));
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- pr_warning("microcode: CPU%d: AMD CPU family 0x%x not "
- "supported\n", cpu, c->x86);
+ pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
return -1;
}
+
rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
- pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev);
+ pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
+
return 0;
}
-static int get_matching_microcode(int cpu, void *mc, int rev)
+static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
+ int rev)
{
- struct microcode_header_amd *mc_header = mc;
unsigned int current_cpu_id;
u16 equiv_cpu_id = 0;
unsigned int i = 0;
@@ -109,17 +108,17 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
if (!equiv_cpu_id)
return 0;
- if (mc_header->processor_rev_id != equiv_cpu_id)
+ if (mc_hdr->processor_rev_id != equiv_cpu_id)
return 0;
/* ucode might be chipset specific -- currently we don't support this */
- if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
- pr_err("CPU%d: loading of chipset specific code not yet supported\n",
+ if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
+ pr_err("CPU%d: chipset specific code not yet supported\n",
cpu);
return 0;
}
- if (mc_header->patch_id <= rev)
+ if (mc_hdr->patch_id <= rev)
return 0;
return 1;
@@ -144,71 +143,93 @@ static int apply_microcode_amd(int cpu)
/* check current patch id and patch's id for match */
if (rev != mc_amd->hdr.patch_id) {
- pr_err("CPU%d: update failed (for patch_level=0x%x)\n",
+ pr_err("CPU%d: update failed for patch_level=0x%08x\n",
cpu, mc_amd->hdr.patch_id);
return -1;
}
- pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev);
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
uci->cpu_sig.rev = rev;
return 0;
}
-static void *
-get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
+static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
{
- unsigned int total_size;
- u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
- void *mc;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ unsigned int max_size, actual_size;
+
+#define F1XH_MPB_MAX_SIZE 2048
+#define F14H_MPB_MAX_SIZE 1824
+#define F15H_MPB_MAX_SIZE 4096
+
+ switch (c->x86) {
+ case 0x14:
+ max_size = F14H_MPB_MAX_SIZE;
+ break;
+ case 0x15:
+ max_size = F15H_MPB_MAX_SIZE;
+ break;
+ default:
+ max_size = F1XH_MPB_MAX_SIZE;
+ break;
+ }
- get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
+ actual_size = buf[4] + (buf[5] << 8);
- if (section_hdr[0] != UCODE_UCODE_TYPE) {
- pr_err("error: invalid type field in container file section header\n");
- return NULL;
+ if (actual_size > size || actual_size > max_size) {
+ pr_err("section size mismatch\n");
+ return 0;
}
- total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
+ return actual_size;
+}
- if (total_size > size || total_size > UCODE_MAX_SIZE) {
- pr_err("error: size mismatch\n");
- return NULL;
+static struct microcode_header_amd *
+get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
+{
+ struct microcode_header_amd *mc = NULL;
+ unsigned int actual_size = 0;
+
+ if (buf[0] != UCODE_UCODE_TYPE) {
+ pr_err("invalid type field in container file section header\n");
+ goto out;
}
- mc = vzalloc(UCODE_MAX_SIZE);
+ actual_size = verify_ucode_size(cpu, buf, size);
+ if (!actual_size)
+ goto out;
+
+ mc = vzalloc(actual_size);
if (!mc)
- return NULL;
+ goto out;
- get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
- *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
+ get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size);
+ *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR;
+out:
return mc;
}
static int install_equiv_cpu_table(const u8 *buf)
{
- u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
- unsigned int *buf_pos = (unsigned int *)container_hdr;
- unsigned long size;
-
- get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
-
- size = buf_pos[2];
-
- if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
- pr_err("error: invalid type field in container file section header\n");
- return 0;
+ unsigned int *ibuf = (unsigned int *)buf;
+ unsigned int type = ibuf[1];
+ unsigned int size = ibuf[2];
+
+ if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
+ pr_err("empty section/"
+ "invalid type field in container file section header\n");
+ return -EINVAL;
}
equiv_cpu_table = vmalloc(size);
if (!equiv_cpu_table) {
pr_err("failed to allocate equivalent CPU table\n");
- return 0;
+ return -ENOMEM;
}
- buf += UCODE_CONTAINER_HEADER_SIZE;
- get_ucode_data(equiv_cpu_table, buf, size);
+ get_ucode_data(equiv_cpu_table, buf + UCODE_CONTAINER_HEADER_SIZE, size);
return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
}
@@ -223,16 +244,16 @@ static enum ucode_state
generic_load_microcode(int cpu, const u8 *data, size_t size)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct microcode_header_amd *mc_hdr = NULL;
+ unsigned int mc_size, leftover;
+ int offset;
const u8 *ucode_ptr = data;
void *new_mc = NULL;
- void *mc;
- int new_rev = uci->cpu_sig.rev;
- unsigned int leftover;
- unsigned long offset;
+ unsigned int new_rev = uci->cpu_sig.rev;
enum ucode_state state = UCODE_OK;
offset = install_equiv_cpu_table(ucode_ptr);
- if (!offset) {
+ if (offset < 0) {
pr_err("failed to create equivalent cpu table\n");
return UCODE_ERROR;
}
@@ -241,64 +262,65 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
leftover = size - offset;
while (leftover) {
- unsigned int uninitialized_var(mc_size);
- struct microcode_header_amd *mc_header;
-
- mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
- if (!mc)
+ mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size);
+ if (!mc_hdr)
break;
- mc_header = (struct microcode_header_amd *)mc;
- if (get_matching_microcode(cpu, mc, new_rev)) {
+ if (get_matching_microcode(cpu, mc_hdr, new_rev)) {
vfree(new_mc);
- new_rev = mc_header->patch_id;
- new_mc = mc;
+ new_rev = mc_hdr->patch_id;
+ new_mc = mc_hdr;
} else
- vfree(mc);
+ vfree(mc_hdr);
ucode_ptr += mc_size;
leftover -= mc_size;
}
- if (new_mc) {
- if (!leftover) {
- vfree(uci->mc);
- uci->mc = new_mc;
- pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
- cpu, new_rev, uci->cpu_sig.rev);
- } else {
- vfree(new_mc);
- state = UCODE_ERROR;
- }
- } else
+ if (!new_mc) {
state = UCODE_NFOUND;
+ goto free_table;
+ }
+ if (!leftover) {
+ vfree(uci->mc);
+ uci->mc = new_mc;
+ pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
+ cpu, uci->cpu_sig.rev, new_rev);
+ } else {
+ vfree(new_mc);
+ state = UCODE_ERROR;
+ }
+
+free_table:
free_equiv_cpu_table();
return state;
}
-static enum ucode_state request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_amd(int cpu, struct device *device)
{
const char *fw_name = "amd-ucode/microcode_amd.bin";
- const struct firmware *firmware;
- enum ucode_state ret;
+ const struct firmware *fw;
+ enum ucode_state ret = UCODE_NFOUND;
- if (request_firmware(&firmware, fw_name, device)) {
- printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
- return UCODE_NFOUND;
+ if (request_firmware(&fw, fw_name, device)) {
+ pr_err("failed to load file %s\n", fw_name);
+ goto out;
}
- if (*(u32 *)firmware->data != UCODE_MAGIC) {
- pr_err("invalid UCODE_MAGIC (0x%08x)\n",
- *(u32 *)firmware->data);
- return UCODE_ERROR;
+ ret = UCODE_ERROR;
+ if (*(u32 *)fw->data != UCODE_MAGIC) {
+ pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
+ goto fw_release;
}
- ret = generic_load_microcode(cpu, firmware->data, firmware->size);
+ ret = generic_load_microcode(cpu, fw->data, fw->size);
- release_firmware(firmware);
+fw_release:
+ release_firmware(fw);
+out:
return ret;
}
@@ -319,7 +341,7 @@ static void microcode_fini_cpu_amd(int cpu)
static struct microcode_ops microcode_amd_ops = {
.request_microcode_user = request_microcode_user,
- .request_microcode_fw = request_microcode_fw,
+ .request_microcode_fw = request_microcode_amd,
.collect_cpu_info = collect_cpu_info_amd,
.apply_microcode = apply_microcode_amd,
.microcode_fini_cpu = microcode_fini_cpu_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 1cca374a2bac..87af68e0e1e1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -417,8 +417,10 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
if (err)
return err;
- if (microcode_init_cpu(cpu) == UCODE_ERROR)
- err = -EINVAL;
+ if (microcode_init_cpu(cpu) == UCODE_ERROR) {
+ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+ return -EINVAL;
+ }
return err;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff4554198981..99fa3adf0141 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -110,12 +110,9 @@ void show_regs_common(void)
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk(KERN_CONT " ");
- printk(KERN_CONT "%s %s", vendor, product);
- if (board) {
- printk(KERN_CONT "/");
- printk(KERN_CONT "%s", board);
- }
+ printk(KERN_CONT " %s %s", vendor, product);
+ if (board)
+ printk(KERN_CONT "/%s", board);
printk(KERN_CONT "\n");
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 715037caeb43..d3ce37edb54d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -303,68 +303,16 @@ static int __init reboot_init(void)
}
core_initcall(reboot_init);
-/* The following code and data reboots the machine by switching to real
- mode and jumping to the BIOS reset entry point, as if the CPU has
- really been reset. The previous version asked the keyboard
- controller to pulse the CPU reset line, which is more thorough, but
- doesn't work with at least one type of 486 motherboard. It is easy
- to stop this code working; hence the copious comments. */
-static const unsigned long long
-real_mode_gdt_entries [3] =
-{
- 0x0000000000000000ULL, /* Null descriptor */
- 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */
- 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
-};
+extern const unsigned char machine_real_restart_asm[];
+extern const u64 machine_real_restart_gdt[3];
-static const struct desc_ptr
-real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries },
-real_mode_idt = { 0x3ff, 0 };
-
-/* This is 16-bit protected mode code to disable paging and the cache,
- switch to real mode and jump to the BIOS reset code.
-
- The instruction that switches to real mode by writing to CR0 must be
- followed immediately by a far jump instruction, which set CS to a
- valid value for real mode, and flushes the prefetch queue to avoid
- running instructions that have already been decoded in protected
- mode.
-
- Clears all the flags except ET, especially PG (paging), PE
- (protected-mode enable) and TS (task switch for coprocessor state
- save). Flushes the TLB after paging has been disabled. Sets CD and
- NW, to disable the cache on a 486, and invalidates the cache. This
- is more like the state of a 486 after reset. I don't know if
- something else should be done for other chips.
-
- More could be done here to set up the registers as if a CPU reset had
- occurred; hopefully real BIOSs don't assume much. */
-static const unsigned char real_mode_switch [] =
-{
- 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */
- 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */
- 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */
- 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */
- 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */
- 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */
- 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */
- 0x74, 0x02, /* jz f */
- 0x0f, 0x09, /* wbinvd */
- 0x24, 0x10, /* f: andb $0x10,al */
- 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */
-};
-static const unsigned char jump_to_bios [] =
+void machine_real_restart(unsigned int type)
{
- 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */
-};
+ void *restart_va;
+ unsigned long restart_pa;
+ void (*restart_lowmem)(unsigned int);
+ u64 *lowmem_gdt;
-/*
- * Switch to real mode and then execute the code
- * specified by the code and length parameters.
- * We assume that length will aways be less that 100!
- */
-void machine_real_restart(const unsigned char *code, int length)
-{
local_irq_disable();
/* Write zero to CMOS register number 0x0f, which the BIOS POST
@@ -392,41 +340,23 @@ void machine_real_restart(const unsigned char *code, int length)
too. */
*((unsigned short *)0x472) = reboot_mode;
- /* For the switch to real mode, copy some code to low memory. It has
- to be in the first 64k because it is running in 16-bit mode, and it
- has to have the same physical and virtual address, because it turns
- off paging. Copy it near the end of the first page, out of the way
- of BIOS variables. */
- memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100),
- real_mode_switch, sizeof (real_mode_switch));
- memcpy((void *)(0x1000 - 100), code, length);
-
- /* Set up the IDT for real mode. */
- load_idt(&real_mode_idt);
-
- /* Set up a GDT from which we can load segment descriptors for real
- mode. The GDT is not used in real mode; it is just needed here to
- prepare the descriptors. */
- load_gdt(&real_mode_gdt);
-
- /* Load the data segment registers, and thus the descriptors ready for
- real mode. The base address of each segment is 0x100, 16 times the
- selector value being loaded here. This is so that the segment
- registers don't have to be reloaded after switching to real mode:
- the values are consistent for real mode operation already. */
- __asm__ __volatile__ ("movl $0x0010,%%eax\n"
- "\tmovl %%eax,%%ds\n"
- "\tmovl %%eax,%%es\n"
- "\tmovl %%eax,%%fs\n"
- "\tmovl %%eax,%%gs\n"
- "\tmovl %%eax,%%ss" : : : "eax");
-
- /* Jump to the 16-bit code that we copied earlier. It disables paging
- and the cache, switches to real mode, and jumps to the BIOS reset
- entry point. */
- __asm__ __volatile__ ("ljmp $0x0008,%0"
- :
- : "i" ((void *)(0x1000 - sizeof (real_mode_switch) - 100)));
+ /* Patch the GDT in the low memory trampoline */
+ lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt);
+
+ restart_va = TRAMPOLINE_SYM(machine_real_restart_asm);
+ restart_pa = virt_to_phys(restart_va);
+ restart_lowmem = (void (*)(unsigned int))restart_pa;
+
+ /* GDT[0]: GDT self-pointer */
+ lowmem_gdt[0] =
+ (u64)(sizeof(machine_real_restart_gdt) - 1) +
+ ((u64)virt_to_phys(lowmem_gdt) << 16);
+ /* GDT[1]: 64K real mode code segment */
+ lowmem_gdt[1] =
+ GDT_ENTRY(0x009b, restart_pa, 0xffff);
+
+ /* Jump to the identity-mapped low memory code */
+ restart_lowmem(type);
}
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(machine_real_restart);
@@ -581,7 +511,7 @@ static void native_machine_emergency_restart(void)
#ifdef CONFIG_X86_32
case BOOT_BIOS:
- machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
+ machine_real_restart(MRR_BIOS);
reboot_type = BOOT_KBD;
break;
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S
new file mode 100644
index 000000000000..29092b38d816
--- /dev/null
+++ b/arch/x86/kernel/reboot_32.S
@@ -0,0 +1,135 @@
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/segment.h>
+#include <asm/page_types.h>
+
+/*
+ * The following code and data reboots the machine by switching to real
+ * mode and jumping to the BIOS reset entry point, as if the CPU has
+ * really been reset. The previous version asked the keyboard
+ * controller to pulse the CPU reset line, which is more thorough, but
+ * doesn't work with at least one type of 486 motherboard. It is easy
+ * to stop this code working; hence the copious comments.
+ *
+ * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax.
+ */
+ .section ".x86_trampoline","a"
+ .balign 16
+ .code32
+ENTRY(machine_real_restart_asm)
+r_base = .
+ /* Get our own relocated address */
+ call 1f
+1: popl %ebx
+ subl $1b, %ebx
+
+ /* Compute the equivalent real-mode segment */
+ movl %ebx, %ecx
+ shrl $4, %ecx
+
+ /* Patch post-real-mode segment jump */
+ movw dispatch_table(%ebx,%eax,2),%ax
+ movw %ax, 101f(%ebx)
+ movw %cx, 102f(%ebx)
+
+ /* Set up the IDT for real mode. */
+ lidtl machine_real_restart_idt(%ebx)
+
+ /*
+ * Set up a GDT from which we can load segment descriptors for real
+ * mode. The GDT is not used in real mode; it is just needed here to
+ * prepare the descriptors.
+ */
+ lgdtl machine_real_restart_gdt(%ebx)
+
+ /*
+ * Load the data segment registers with 16-bit compatible values
+ */
+ movl $16, %ecx
+ movl %ecx, %ds
+ movl %ecx, %es
+ movl %ecx, %fs
+ movl %ecx, %gs
+ movl %ecx, %ss
+ ljmpl $8, $1f - r_base
+
+/*
+ * This is 16-bit protected mode code to disable paging and the cache,
+ * switch to real mode and jump to the BIOS reset code.
+ *
+ * The instruction that switches to real mode by writing to CR0 must be
+ * followed immediately by a far jump instruction, which set CS to a
+ * valid value for real mode, and flushes the prefetch queue to avoid
+ * running instructions that have already been decoded in protected
+ * mode.
+ *
+ * Clears all the flags except ET, especially PG (paging), PE
+ * (protected-mode enable) and TS (task switch for coprocessor state
+ * save). Flushes the TLB after paging has been disabled. Sets CD and
+ * NW, to disable the cache on a 486, and invalidates the cache. This
+ * is more like the state of a 486 after reset. I don't know if
+ * something else should be done for other chips.
+ *
+ * More could be done here to set up the registers as if a CPU reset had
+ * occurred; hopefully real BIOSs don't assume much. This is not the
+ * actual BIOS entry point, anyway (that is at 0xfffffff0).
+ *
+ * Most of this work is probably excessive, but it is what is tested.
+ */
+ .code16
+1:
+ xorl %ecx, %ecx
+ movl %cr0, %eax
+ andl $0x00000011, %eax
+ orl $0x60000000, %eax
+ movl %eax, %cr0
+ movl %ecx, %cr3
+ movl %cr0, %edx
+ andl $0x60000000, %edx /* If no cache bits -> no wbinvd */
+ jz 2f
+ wbinvd
+2:
+ andb $0x10, %al
+ movl %eax, %cr0
+ .byte 0xea /* ljmpw */
+101: .word 0 /* Offset */
+102: .word 0 /* Segment */
+
+bios:
+ ljmpw $0xf000, $0xfff0
+
+apm:
+ movw $0x1000, %ax
+ movw %ax, %ss
+ movw $0xf000, %sp
+ movw $0x5307, %ax
+ movw $0x0001, %bx
+ movw $0x0003, %cx
+ int $0x15
+
+END(machine_real_restart_asm)
+
+ .balign 16
+ /* These must match <asm/reboot.h */
+dispatch_table:
+ .word bios - r_base
+ .word apm - r_base
+END(dispatch_table)
+
+ .balign 16
+machine_real_restart_idt:
+ .word 0xffff /* Length - real mode default value */
+ .long 0 /* Base - real mode default value */
+END(machine_real_restart_idt)
+
+ .balign 16
+ENTRY(machine_real_restart_gdt)
+ .quad 0 /* Self-pointer, filled in by PM code */
+ .quad 0 /* 16-bit code segment, filled in by PM code */
+ /*
+ * 16-bit data segment with the selector value 16 = 0x10 and
+ * base value 0x100; since this is consistent with real mode
+ * semantics we don't have to reload the segments once CR0.PE = 0.
+ */
+ .quad GDT_ENTRY(0x0093, 0x100, 0xffff)
+END(machine_real_restart_gdt)
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 6f39cab052d5..3f2ad2640d85 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -6,6 +6,7 @@
#include <linux/acpi.h>
#include <linux/bcd.h>
#include <linux/pnp.h>
+#include <linux/of.h>
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
@@ -236,6 +237,8 @@ static __init int add_rtc_cmos(void)
}
}
#endif
+ if (of_have_populated_dt())
+ return 0;
platform_device_register(&rtc_device);
dev_info(&rtc_device.dev,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d3cfe26c0252..a47fe0099c27 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -113,6 +113,7 @@
#endif
#include <asm/mce.h>
#include <asm/alternative.h>
+#include <asm/prom.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -293,10 +294,32 @@ static void __init init_gbpages(void)
else
direct_gbpages = 0;
}
+
+static void __init cleanup_highmap_brk_end(void)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ mmu_cr4_features = read_cr4();
+
+ /*
+ * _brk_end cannot change anymore, but it and _end may be
+ * located on different 2M pages. cleanup_highmap(), however,
+ * can only consider _end when it runs, so destroy any
+ * mappings beyond _brk_end here.
+ */
+ pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
+ pmd = pmd_offset(pud, _brk_end - 1);
+ while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
+ pmd_clear(pmd);
+}
#else
static inline void init_gbpages(void)
{
}
+static inline void cleanup_highmap_brk_end(void)
+{
+}
#endif
static void __init reserve_brk(void)
@@ -307,6 +330,8 @@ static void __init reserve_brk(void)
/* Mark brk area as locked down and no longer taking any
new allocations */
_brk_start = 0;
+
+ cleanup_highmap_brk_end();
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -429,16 +454,30 @@ static void __init parse_setup_data(void)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- data = early_memremap(pa_data, PAGE_SIZE);
+ u32 data_len, map_len;
+
+ map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
+ (u64)sizeof(struct setup_data));
+ data = early_memremap(pa_data, map_len);
+ data_len = data->len + sizeof(struct setup_data);
+ if (data_len > map_len) {
+ early_iounmap(data, map_len);
+ data = early_memremap(pa_data, data_len);
+ map_len = data_len;
+ }
+
switch (data->type) {
case SETUP_E820_EXT:
- parse_e820_ext(data, pa_data);
+ parse_e820_ext(data);
+ break;
+ case SETUP_DTB:
+ add_dtb(pa_data);
break;
default:
break;
}
pa_data = data->next;
- early_iounmap(data, PAGE_SIZE);
+ early_iounmap(data, map_len);
}
}
@@ -680,15 +719,6 @@ static int __init parse_reservelow(char *p)
early_param("reservelow", parse_reservelow);
-static u64 __init get_max_mapped(void)
-{
- u64 end = max_pfn_mapped;
-
- end <<= PAGE_SHIFT;
-
- return end;
-}
-
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -704,8 +734,6 @@ static u64 __init get_max_mapped(void)
void __init setup_arch(char **cmdline_p)
{
- int acpi = 0;
- int amd = 0;
unsigned long flags;
#ifdef CONFIG_X86_32
@@ -935,29 +963,14 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
- reserve_trampoline_memory();
+ setup_trampolines();
-#ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
- * even before init_memory_mapping
- */
- acpi_reserve_wakeup_memory();
-#endif
init_gbpages();
/* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
-#ifdef CONFIG_X86_64
- if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping(1UL<<32,
- max_pfn<<PAGE_SHIFT);
- /* can we preseve max_low_pfn ?*/
- max_low_pfn = max_pfn;
- }
-#endif
memblock.current_limit = get_max_mapped();
/*
@@ -984,19 +997,7 @@ void __init setup_arch(char **cmdline_p)
early_acpi_boot_init();
-#ifdef CONFIG_ACPI_NUMA
- /*
- * Parse SRAT to discover nodes.
- */
- acpi = acpi_numa_init();
-#endif
-
-#ifdef CONFIG_AMD_NUMA
- if (!acpi)
- amd = !amd_numa_init(0, max_pfn);
-#endif
-
- initmem_init(0, max_pfn, acpi, amd);
+ initmem_init();
memblock_find_dma_reserve();
dma32_reserve_bootmem();
@@ -1029,8 +1030,8 @@ void __init setup_arch(char **cmdline_p)
* Read APIC and some other early information from ACPI tables.
*/
acpi_boot_init();
-
sfi_init();
+ x86_dtb_init();
/*
* get boot-time SMP configuration:
@@ -1040,9 +1041,7 @@ void __init setup_arch(char **cmdline_p)
prefill_possible_map();
-#ifdef CONFIG_X86_64
init_cpu_to_node();
-#endif
init_apic_mappings();
ioapic_and_gsi_init();
@@ -1066,6 +1065,8 @@ void __init setup_arch(char **cmdline_p)
#endif
x86_init.oem.banner();
+ x86_init.timers.wallclock_init();
+
mcheck_init();
local_irq_save(flags);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 002b79685f73..71f4727da373 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -225,10 +225,15 @@ void __init setup_per_cpu_areas(void)
per_cpu(x86_bios_cpu_apicid, cpu) =
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#endif
+#ifdef CONFIG_X86_32
+ per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
+#endif
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE - 64;
+#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
@@ -242,7 +247,6 @@ void __init setup_per_cpu_areas(void)
*/
set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
-#endif
/*
* Up to this point, the boot CPU has been using .init.data
* area. Reload any changed state for the boot CPU.
@@ -256,7 +260,10 @@ void __init setup_per_cpu_areas(void)
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+#ifdef CONFIG_X86_32
+ early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
+#endif
+#ifdef CONFIG_NUMA
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 08776a953487..c2871d3c71b6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,6 +64,7 @@
#include <asm/mtrr.h>
#include <asm/mwait.h>
#include <asm/apic.h>
+#include <asm/io_apic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
@@ -71,10 +72,6 @@
#include <asm/smpboot_hooks.h>
#include <asm/i8259.h>
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_APICID];
-#endif
-
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -130,68 +127,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
+
/* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
-/* which node each logical CPU is on */
-int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_to_node_map);
-
-/* set up a mapping between cpu and node. */
-static void map_cpu_to_node(int cpu, int node)
-{
- printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
- cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
- cpu_to_node_map[cpu] = node;
-}
-
-/* undo a mapping between cpu and node. */
-static void unmap_cpu_to_node(int cpu)
-{
- int node;
-
- printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
- for (node = 0; node < MAX_NUMNODES; node++)
- cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
- cpu_to_node_map[cpu] = 0;
-}
-#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
-#define map_cpu_to_node(cpu, node) ({})
-#define unmap_cpu_to_node(cpu) ({})
-#endif
-
-#ifdef CONFIG_X86_32
-static int boot_cpu_logical_apicid;
-
-u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
- { [0 ... NR_CPUS-1] = BAD_APICID };
-
-static void map_cpu_to_logical_apicid(void)
-{
- int cpu = smp_processor_id();
- int apicid = logical_smp_processor_id();
- int node = apic->apicid_to_node(apicid);
-
- if (!node_online(node))
- node = first_online_node;
-
- cpu_2_logical_apicid[cpu] = apicid;
- map_cpu_to_node(cpu, node);
-}
-
-void numa_remove_cpu(int cpu)
-{
- cpu_2_logical_apicid[cpu] = BAD_APICID;
- unmap_cpu_to_node(cpu);
-}
-#else
-#define map_cpu_to_logical_apicid() do {} while (0)
-#endif
-
/*
* Report back to the Boot Processor.
* Running on AP.
@@ -259,7 +202,6 @@ static void __cpuinit smp_callin(void)
apic->smp_callin_clear_local_apic();
setup_local_APIC();
end_local_APIC_setup();
- map_cpu_to_logical_apicid();
/*
* Need to setup vector mappings before we enable interrupts.
@@ -355,23 +297,6 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* In this case, llc_shared_map is a pointer to a cpumask. */
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- struct cpumask *llc = dst->llc_shared_map;
- *dst = *src;
- dst->llc_shared_map = llc;
-}
-#else
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- *dst = *src;
-}
-#endif /* CONFIG_CPUMASK_OFFSTACK */
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
@@ -381,7 +306,7 @@ void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- copy_cpuinfo_x86(c, &boot_cpu_data);
+ *c = boot_cpu_data;
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
@@ -389,15 +314,12 @@ void __cpuinit smp_store_cpu_info(int id)
static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
{
- struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
- struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
-
cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
- cpumask_set_cpu(cpu1, c2->llc_shared_map);
- cpumask_set_cpu(cpu2, c1->llc_shared_map);
+ cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
}
@@ -414,6 +336,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (c->phys_proc_id == o->phys_proc_id &&
+ per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
c->compute_unit_id == o->compute_unit_id)
link_thread_siblings(cpu, i);
} else if (c->phys_proc_id == o->phys_proc_id &&
@@ -425,7 +348,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
}
- cpumask_set_cpu(cpu, c->llc_shared_map);
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -436,8 +359,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
- cpumask_set_cpu(i, c->llc_shared_map);
- cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
+ cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
}
if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -476,7 +399,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
- return c->llc_shared_map;
+ return cpu_llc_shared_mask(cpu);
}
static void impress_friends(void)
@@ -788,7 +711,7 @@ do_rest:
stack_start = c_idle.idle->thread.sp;
/* start_ip had better be page-aligned! */
- start_ip = setup_trampoline();
+ start_ip = trampoline_address();
/* So we see what's up */
announce_cpu(cpu, apicid);
@@ -798,6 +721,8 @@ do_rest:
* the targeted processor.
*/
+ printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip);
+
atomic_set(&init_deasserted, 0);
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
@@ -851,8 +776,8 @@ do_rest:
pr_debug("CPU%d: has booted.\n", cpu);
else {
boot_error = 1;
- if (*((volatile unsigned char *)trampoline_base)
- == 0xA5)
+ if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status)
+ == 0xA5A5A5A5)
/* trampoline started but...? */
pr_err("CPU%d: Stuck ??\n", cpu);
else
@@ -878,7 +803,7 @@ do_rest:
}
/* mark "stuck" area as not stuck */
- *((volatile unsigned long *)trampoline_base) = 0;
+ *(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0;
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
/*
@@ -945,6 +870,14 @@ int __cpuinit native_cpu_up(unsigned int cpu)
return 0;
}
+/**
+ * arch_disable_smp_support() - disables SMP support for x86 at runtime
+ */
+void arch_disable_smp_support(void)
+{
+ disable_ioapic_support();
+}
+
/*
* Fall back to non SMP mode after errors.
*
@@ -960,7 +893,6 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
else
physid_set_mask_of_physid(0, &phys_cpu_present_map);
- map_cpu_to_logical_apicid();
cpumask_set_cpu(0, cpu_sibling_mask(0));
cpumask_set_cpu(0, cpu_core_mask(0));
}
@@ -1045,7 +977,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
"(tell your hw vendor)\n");
}
smpboot_clear_io_apic();
- arch_disable_smp_support();
+ disable_ioapic_support();
return -1;
}
@@ -1089,21 +1021,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
preempt_disable();
smp_cpu_index_default();
- memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
- cpumask_copy(cpu_callin_mask, cpumask_of(0));
- mb();
+
/*
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
-#ifdef CONFIG_X86_32
- boot_cpu_logical_apicid = logical_smp_processor_id();
-#endif
+ cpumask_copy(cpu_callin_mask, cpumask_of(0));
+ mb();
+
current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
@@ -1139,8 +1069,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
bsp_end_local_APIC_setup();
- map_cpu_to_logical_apicid();
-
if (apic->setup_portio_remap)
apic->setup_portio_remap();
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8f..68c7b9aa5001 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,4 @@ ENTRY(sys_call_table)
.long sys_fanotify_init
.long sys_fanotify_mark
.long sys_prlimit64 /* 340 */
+ .long sys_clock_adjtime
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a375616d77f7..a91ae7709b49 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -2,39 +2,41 @@
#include <linux/memblock.h>
#include <asm/trampoline.h>
+#include <asm/cacheflush.h>
#include <asm/pgtable.h>
-#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
-#define __trampinit
-#define __trampinitdata
-#else
-#define __trampinit __cpuinit
-#define __trampinitdata __cpuinitdata
-#endif
+unsigned char *x86_trampoline_base;
-/* ready for x86_64 and x86 */
-unsigned char *__trampinitdata trampoline_base;
-
-void __init reserve_trampoline_memory(void)
+void __init setup_trampolines(void)
{
phys_addr_t mem;
+ size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start);
/* Has to be in very low memory so we can execute real-mode AP code. */
- mem = memblock_find_in_range(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE);
+ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
if (mem == MEMBLOCK_ERROR)
panic("Cannot allocate trampoline\n");
- trampoline_base = __va(mem);
- memblock_x86_reserve_range(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE");
+ x86_trampoline_base = __va(mem);
+ memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE");
+
+ printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
+ x86_trampoline_base, (unsigned long long)mem, size);
+
+ memcpy(x86_trampoline_base, x86_trampoline_start, size);
}
/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
+ * setup_trampolines() gets called very early, to guarantee the
+ * availability of low memory. This is before the proper kernel page
+ * tables are set up, so we cannot set page permissions in that
+ * function. Thus, we use an arch_initcall instead.
*/
-unsigned long __trampinit setup_trampoline(void)
+static int __init configure_trampolines(void)
{
- memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
- return virt_to_phys(trampoline_base);
+ size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start);
+
+ set_memory_x((unsigned long)x86_trampoline_base, size >> PAGE_SHIFT);
+ return 0;
}
+arch_initcall(configure_trampolines);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 8508237e8e43..451c0a7ef7fd 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -32,9 +32,11 @@
#include <asm/segment.h>
#include <asm/page_types.h>
-/* We can free up trampoline after bootup if cpu hotplug is not supported. */
-__CPUINITRODATA
-.code16
+#ifdef CONFIG_SMP
+
+ .section ".x86_trampoline","a"
+ .balign PAGE_SIZE
+ .code16
ENTRY(trampoline_data)
r_base = .
@@ -44,7 +46,7 @@ r_base = .
cli # We should be safe anyway
- movl $0xA5A5A5A5, trampoline_data - r_base
+ movl $0xA5A5A5A5, trampoline_status - r_base
# write marker for master knows we're running
/* GDT tables in non default location kernel can be beyond 16MB and
@@ -72,5 +74,10 @@ boot_idt_descr:
.word 0 # idt limit = 0
.long 0 # idt base = 0L
+ENTRY(trampoline_status)
+ .long 0
+
.globl trampoline_end
trampoline_end:
+
+#endif /* CONFIG_SMP */
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 075d130efcf9..09ff51799e96 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -32,13 +32,9 @@
#include <asm/segment.h>
#include <asm/processor-flags.h>
-#ifdef CONFIG_ACPI_SLEEP
-.section .rodata, "a", @progbits
-#else
-/* We can free up the trampoline after bootup if cpu hotplug is not supported. */
-__CPUINITRODATA
-#endif
-.code16
+ .section ".x86_trampoline","a"
+ .balign PAGE_SIZE
+ .code16
ENTRY(trampoline_data)
r_base = .
@@ -50,7 +46,7 @@ r_base = .
mov %ax, %ss
- movl $0xA5A5A5A5, trampoline_data - r_base
+ movl $0xA5A5A5A5, trampoline_status - r_base
# write marker for master knows we're running
# Setup stack
@@ -64,10 +60,13 @@ r_base = .
movzx %ax, %esi # Find the 32bit trampoline location
shll $4, %esi
- # Fixup the vectors
- addl %esi, startup_32_vector - r_base
- addl %esi, startup_64_vector - r_base
- addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer
+ # Fixup the absolute vectors
+ leal (startup_32 - r_base)(%esi), %eax
+ movl %eax, startup_32_vector - r_base
+ leal (startup_64 - r_base)(%esi), %eax
+ movl %eax, startup_64_vector - r_base
+ leal (tgdt - r_base)(%esi), %eax
+ movl %eax, (tgdt + 2 - r_base)
/*
* GDT tables in non default location kernel can be beyond 16MB and
@@ -129,6 +128,7 @@ no_longmode:
jmp no_longmode
#include "verify_cpu.S"
+ .balign 4
# Careful these need to be in the same 64K segment as the above;
tidt:
.word 0 # idt limit = 0
@@ -156,6 +156,10 @@ startup_64_vector:
.long startup_64 - r_base
.word __KERNEL_CS, 0
+ .balign 4
+ENTRY(trampoline_status)
+ .long 0
+
trampoline_stack:
.org 0x1000
trampoline_stack_end:
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bf4700755184..e70cc3d599a7 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -240,6 +240,18 @@ SECTIONS
INIT_DATA_SECTION(16)
+ /*
+ * Code and data for a variety of lowlevel trampolines, to be
+ * copied into base memory (< 1 MiB) during initialization.
+ * Since it is copied early, the main copy can be discarded
+ * afterwards.
+ */
+ .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) {
+ x86_trampoline_start = .;
+ *(.x86_trampoline)
+ x86_trampoline_end = .;
+ }
+
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
__x86_cpu_dev_start = .;
*(.x86_cpu_dev.init)
@@ -291,6 +303,7 @@ SECTIONS
*(.iommu_table)
__iommu_table_end = .;
}
+
. = ALIGN(8);
/*
* .exit.text is discard at runtime, not link time, to deal with
@@ -305,7 +318,7 @@ SECTIONS
}
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
- PERCPU(THREAD_SIZE)
+ PERCPU(PAGE_SIZE)
#endif
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1b950d151e58..9796c2f3d074 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ceb2911aa439..c11514e9128b 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -70,6 +70,7 @@ struct x86_init_ops x86_init __initdata = {
.setup_percpu_clockev = setup_boot_APIC_clock,
.tsc_pre_init = x86_init_noop,
.timer_init = hpet_time_init,
+ .wallclock_init = x86_init_noop,
},
.iommu = {
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
new file mode 100644
index 000000000000..0ecb8433e5a8
--- /dev/null
+++ b/arch/x86/lib/memmove_64.S
@@ -0,0 +1,197 @@
+/*
+ * Normally compiler builtins are used, but sometimes the compiler calls out
+ * of line code. Based on asm-i386/string.h.
+ *
+ * This assembly file is re-written from memmove_64.c file.
+ * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
+ */
+#define _STRING_C
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+#undef memmove
+
+/*
+ * Implement memmove(). This can handle overlap between src and dst.
+ *
+ * Input:
+ * rdi: dest
+ * rsi: src
+ * rdx: count
+ *
+ * Output:
+ * rax: dest
+ */
+ENTRY(memmove)
+ CFI_STARTPROC
+ /* Handle more 32bytes in loop */
+ mov %rdi, %rax
+ cmp $0x20, %rdx
+ jb 1f
+
+ /* Decide forward/backward copy mode */
+ cmp %rdi, %rsi
+ jb 2f
+
+ /*
+ * movsq instruction have many startup latency
+ * so we handle small size by general register.
+ */
+ cmp $680, %rdx
+ jb 3f
+ /*
+ * movsq instruction is only good for aligned case.
+ */
+
+ cmpb %dil, %sil
+ je 4f
+3:
+ sub $0x20, %rdx
+ /*
+ * We gobble 32byts forward in each loop.
+ */
+5:
+ sub $0x20, %rdx
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq 2*8(%rsi), %r9
+ movq 3*8(%rsi), %r8
+ leaq 4*8(%rsi), %rsi
+
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, 2*8(%rdi)
+ movq %r8, 3*8(%rdi)
+ leaq 4*8(%rdi), %rdi
+ jae 5b
+ addq $0x20, %rdx
+ jmp 1f
+ /*
+ * Handle data forward by movsq.
+ */
+ .p2align 4
+4:
+ movq %rdx, %rcx
+ movq -8(%rsi, %rdx), %r11
+ lea -8(%rdi, %rdx), %r10
+ shrq $3, %rcx
+ rep movsq
+ movq %r11, (%r10)
+ jmp 13f
+ /*
+ * Handle data backward by movsq.
+ */
+ .p2align 4
+7:
+ movq %rdx, %rcx
+ movq (%rsi), %r11
+ movq %rdi, %r10
+ leaq -8(%rsi, %rdx), %rsi
+ leaq -8(%rdi, %rdx), %rdi
+ shrq $3, %rcx
+ std
+ rep movsq
+ cld
+ movq %r11, (%r10)
+ jmp 13f
+
+ /*
+ * Start to prepare for backward copy.
+ */
+ .p2align 4
+2:
+ cmp $680, %rdx
+ jb 6f
+ cmp %dil, %sil
+ je 7b
+6:
+ /*
+ * Calculate copy position to tail.
+ */
+ addq %rdx, %rsi
+ addq %rdx, %rdi
+ subq $0x20, %rdx
+ /*
+ * We gobble 32byts backward in each loop.
+ */
+8:
+ subq $0x20, %rdx
+ movq -1*8(%rsi), %r11
+ movq -2*8(%rsi), %r10
+ movq -3*8(%rsi), %r9
+ movq -4*8(%rsi), %r8
+ leaq -4*8(%rsi), %rsi
+
+ movq %r11, -1*8(%rdi)
+ movq %r10, -2*8(%rdi)
+ movq %r9, -3*8(%rdi)
+ movq %r8, -4*8(%rdi)
+ leaq -4*8(%rdi), %rdi
+ jae 8b
+ /*
+ * Calculate copy position to head.
+ */
+ addq $0x20, %rdx
+ subq %rdx, %rsi
+ subq %rdx, %rdi
+1:
+ cmpq $16, %rdx
+ jb 9f
+ /*
+ * Move data from 16 bytes to 31 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq -2*8(%rsi, %rdx), %r9
+ movq -1*8(%rsi, %rdx), %r8
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, -2*8(%rdi, %rdx)
+ movq %r8, -1*8(%rdi, %rdx)
+ jmp 13f
+ .p2align 4
+9:
+ cmpq $8, %rdx
+ jb 10f
+ /*
+ * Move data from 8 bytes to 15 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq -1*8(%rsi, %rdx), %r10
+ movq %r11, 0*8(%rdi)
+ movq %r10, -1*8(%rdi, %rdx)
+ jmp 13f
+10:
+ cmpq $4, %rdx
+ jb 11f
+ /*
+ * Move data from 4 bytes to 7 bytes.
+ */
+ movl (%rsi), %r11d
+ movl -4(%rsi, %rdx), %r10d
+ movl %r11d, (%rdi)
+ movl %r10d, -4(%rdi, %rdx)
+ jmp 13f
+11:
+ cmp $2, %rdx
+ jb 12f
+ /*
+ * Move data from 2 bytes to 3 bytes.
+ */
+ movw (%rsi), %r11w
+ movw -2(%rsi, %rdx), %r10w
+ movw %r11w, (%rdi)
+ movw %r10w, -2(%rdi, %rdx)
+ jmp 13f
+12:
+ cmp $1, %rdx
+ jb 13f
+ /*
+ * Move data for 1 byte.
+ */
+ movb (%rsi), %r11b
+ movb %r11b, (%rdi)
+13:
+ retq
+ CFI_ENDPROC
+ENDPROC(memmove)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
deleted file mode 100644
index 6d0f0ec41b34..000000000000
--- a/arch/x86/lib/memmove_64.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/* Normally compiler builtins are used, but sometimes the compiler calls out
- of line code. Based on asm-i386/string.h.
- */
-#define _STRING_C
-#include <linux/string.h>
-#include <linux/module.h>
-
-#undef memmove
-void *memmove(void *dest, const void *src, size_t count)
-{
- unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
- char *ret;
-
- __asm__ __volatile__(
- /* Handle more 32bytes in loop */
- "mov %2, %3\n\t"
- "cmp $0x20, %0\n\t"
- "jb 1f\n\t"
-
- /* Decide forward/backward copy mode */
- "cmp %2, %1\n\t"
- "jb 2f\n\t"
-
- /*
- * movsq instruction have many startup latency
- * so we handle small size by general register.
- */
- "cmp $680, %0\n\t"
- "jb 3f\n\t"
- /*
- * movsq instruction is only good for aligned case.
- */
- "cmpb %%dil, %%sil\n\t"
- "je 4f\n\t"
- "3:\n\t"
- "sub $0x20, %0\n\t"
- /*
- * We gobble 32byts forward in each loop.
- */
- "5:\n\t"
- "sub $0x20, %0\n\t"
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq 2*8(%1), %6\n\t"
- "movq 3*8(%1), %7\n\t"
- "leaq 4*8(%1), %1\n\t"
-
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, 2*8(%2)\n\t"
- "movq %7, 3*8(%2)\n\t"
- "leaq 4*8(%2), %2\n\t"
- "jae 5b\n\t"
- "addq $0x20, %0\n\t"
- "jmp 1f\n\t"
- /*
- * Handle data forward by movsq.
- */
- ".p2align 4\n\t"
- "4:\n\t"
- "movq %0, %8\n\t"
- "movq -8(%1, %0), %4\n\t"
- "lea -8(%2, %0), %5\n\t"
- "shrq $3, %8\n\t"
- "rep movsq\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
- /*
- * Handle data backward by movsq.
- */
- ".p2align 4\n\t"
- "7:\n\t"
- "movq %0, %8\n\t"
- "movq (%1), %4\n\t"
- "movq %2, %5\n\t"
- "leaq -8(%1, %0), %1\n\t"
- "leaq -8(%2, %0), %2\n\t"
- "shrq $3, %8\n\t"
- "std\n\t"
- "rep movsq\n\t"
- "cld\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
-
- /*
- * Start to prepare for backward copy.
- */
- ".p2align 4\n\t"
- "2:\n\t"
- "cmp $680, %0\n\t"
- "jb 6f \n\t"
- "cmp %%dil, %%sil\n\t"
- "je 7b \n\t"
- "6:\n\t"
- /*
- * Calculate copy position to tail.
- */
- "addq %0, %1\n\t"
- "addq %0, %2\n\t"
- "subq $0x20, %0\n\t"
- /*
- * We gobble 32byts backward in each loop.
- */
- "8:\n\t"
- "subq $0x20, %0\n\t"
- "movq -1*8(%1), %4\n\t"
- "movq -2*8(%1), %5\n\t"
- "movq -3*8(%1), %6\n\t"
- "movq -4*8(%1), %7\n\t"
- "leaq -4*8(%1), %1\n\t"
-
- "movq %4, -1*8(%2)\n\t"
- "movq %5, -2*8(%2)\n\t"
- "movq %6, -3*8(%2)\n\t"
- "movq %7, -4*8(%2)\n\t"
- "leaq -4*8(%2), %2\n\t"
- "jae 8b\n\t"
- /*
- * Calculate copy position to head.
- */
- "addq $0x20, %0\n\t"
- "subq %0, %1\n\t"
- "subq %0, %2\n\t"
- "1:\n\t"
- "cmpq $16, %0\n\t"
- "jb 9f\n\t"
- /*
- * Move data from 16 bytes to 31 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq -2*8(%1, %0), %6\n\t"
- "movq -1*8(%1, %0), %7\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, -2*8(%2, %0)\n\t"
- "movq %7, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- ".p2align 4\n\t"
- "9:\n\t"
- "cmpq $8, %0\n\t"
- "jb 10f\n\t"
- /*
- * Move data from 8 bytes to 15 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq -1*8(%1, %0), %5\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- "10:\n\t"
- "cmpq $4, %0\n\t"
- "jb 11f\n\t"
- /*
- * Move data from 4 bytes to 7 bytes.
- */
- "movl (%1), %4d\n\t"
- "movl -4(%1, %0), %5d\n\t"
- "movl %4d, (%2)\n\t"
- "movl %5d, -4(%2, %0)\n\t"
- "jmp 13f\n\t"
- "11:\n\t"
- "cmp $2, %0\n\t"
- "jb 12f\n\t"
- /*
- * Move data from 2 bytes to 3 bytes.
- */
- "movw (%1), %4w\n\t"
- "movw -2(%1, %0), %5w\n\t"
- "movw %4w, (%2)\n\t"
- "movw %5w, -2(%2, %0)\n\t"
- "jmp 13f\n\t"
- "12:\n\t"
- "cmp $1, %0\n\t"
- "jb 13f\n\t"
- /*
- * Move data for 1 byte.
- */
- "movb (%1), %4b\n\t"
- "movb %4b, (%2)\n\t"
- "13:\n\t"
- : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
- "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
- :"0" (count),
- "1" (src),
- "2" (dest)
- :"memory");
-
- return ret;
-
-}
-EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 09df2f9a3d69..3e608edf9958 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
+obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c435ed..0919c26820d4 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -26,9 +26,7 @@
#include <asm/apic.h>
#include <asm/amd_nb.h>
-static struct bootnode __initdata nodes[8];
static unsigned char __initdata nodeids[8];
-static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
static __init int find_northbridge(void)
{
@@ -51,7 +49,7 @@ static __init int find_northbridge(void)
return num;
}
- return -1;
+ return -ENOENT;
}
static __init void early_get_boot_cpu_id(void)
@@ -69,17 +67,18 @@ static __init void early_get_boot_cpu_id(void)
#endif
}
-int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
+int __init amd_numa_init(void)
{
- unsigned long start = PFN_PHYS(start_pfn);
- unsigned long end = PFN_PHYS(end_pfn);
+ unsigned long start = PFN_PHYS(0);
+ unsigned long end = PFN_PHYS(max_pfn);
unsigned numnodes;
unsigned long prevbase;
- int i, nb, found = 0;
+ int i, j, nb;
u32 nodeid, reg;
+ unsigned int bits, cores, apicid_base;
if (!early_pci_allowed())
- return -1;
+ return -EINVAL;
nb = find_northbridge();
if (nb < 0)
@@ -90,7 +89,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
reg = read_pci_config(0, nb, 0, 0x60);
numnodes = ((reg >> 4) & 0xF) + 1;
if (numnodes <= 1)
- return -1;
+ return -ENOENT;
pr_info("Number of physical nodes %d\n", numnodes);
@@ -121,9 +120,9 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
if ((base >> 8) & 3 || (limit >> 8) & 3) {
pr_err("Node %d using interleaving mode %lx/%lx\n",
nodeid, (base >> 8) & 3, (limit >> 8) & 3);
- return -1;
+ return -EINVAL;
}
- if (node_isset(nodeid, nodes_parsed)) {
+ if (node_isset(nodeid, numa_nodes_parsed)) {
pr_info("Node %d already present, skipping\n",
nodeid);
continue;
@@ -160,117 +159,28 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
if (prevbase > base) {
pr_err("Node map not sorted %lx,%lx\n",
prevbase, base);
- return -1;
+ return -EINVAL;
}
pr_info("Node %d MemBase %016lx Limit %016lx\n",
nodeid, base, limit);
- found++;
-
- nodes[nodeid].start = base;
- nodes[nodeid].end = limit;
-
prevbase = base;
-
- node_set(nodeid, nodes_parsed);
- }
-
- if (!found)
- return -1;
- return 0;
-}
-
-#ifdef CONFIG_NUMA_EMU
-static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
-void __init amd_get_nodes(struct bootnode *physnodes)
-{
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- physnodes[i].start = nodes[i].start;
- physnodes[i].end = nodes[i].end;
+ numa_add_memblk(nodeid, base, limit);
+ node_set(nodeid, numa_nodes_parsed);
}
-}
-
-static int __init find_node_by_addr(unsigned long addr)
-{
- int ret = NUMA_NO_NODE;
- int i;
-
- for (i = 0; i < 8; i++)
- if (addr >= nodes[i].start && addr < nodes[i].end) {
- ret = i;
- break;
- }
- return ret;
-}
-/*
- * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
- * setup to represent the physical topology but reflect the emulated
- * environment. For each emulated node, the real node which it appears on is
- * found and a fake pxm to nid mapping is created which mirrors the actual
- * locality. node_distance() then represents the correct distances between
- * emulated nodes by using the fake acpi mappings to pxms.
- */
-void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
-{
- unsigned int bits;
- unsigned int cores;
- unsigned int apicid_base = 0;
- int i;
+ if (!nodes_weight(numa_nodes_parsed))
+ return -ENOENT;
+ /*
+ * We seem to have valid NUMA configuration. Map apicids to nodes
+ * using the coreid bits from early_identify_cpu.
+ */
bits = boot_cpu_data.x86_coreid_bits;
cores = 1 << bits;
- early_get_boot_cpu_id();
- if (boot_cpu_physical_apicid > 0)
- apicid_base = boot_cpu_physical_apicid;
-
- for (i = 0; i < nr_nodes; i++) {
- int index;
- int nid;
- int j;
-
- nid = find_node_by_addr(nodes[i].start);
- if (nid == NUMA_NO_NODE)
- continue;
-
- index = nodeids[nid] << bits;
- if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
- for (j = apicid_base; j < cores + apicid_base; j++)
- fake_apicid_to_node[index + j] = i;
-#ifdef CONFIG_ACPI_NUMA
- __acpi_map_pxm_to_node(nid, i);
-#endif
- }
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
-}
-#endif /* CONFIG_NUMA_EMU */
-
-int __init amd_scan_nodes(void)
-{
- unsigned int bits;
- unsigned int cores;
- unsigned int apicid_base;
- int i;
-
- BUG_ON(nodes_empty(nodes_parsed));
- node_possible_map = nodes_parsed;
- memnode_shift = compute_hash_shift(nodes, 8, NULL);
- if (memnode_shift < 0) {
- pr_err("No NUMA node hash function found. Contact maintainer\n");
- return -1;
- }
- pr_info("Using node hash shift of %d\n", memnode_shift);
-
- /* use the coreid bits from early_identify_cpu */
- bits = boot_cpu_data.x86_coreid_bits;
- cores = (1<<bits);
apicid_base = 0;
+
/* get the APIC ID of the BSP early for systems with apicid lifting */
early_get_boot_cpu_id();
if (boot_cpu_physical_apicid > 0) {
@@ -278,17 +188,9 @@ int __init amd_scan_nodes(void)
apicid_base = boot_cpu_physical_apicid;
}
- for_each_node_mask(i, node_possible_map) {
- int j;
-
- memblock_x86_register_active_regions(i,
- nodes[i].start >> PAGE_SHIFT,
- nodes[i].end >> PAGE_SHIFT);
+ for_each_node_mask(i, numa_nodes_parsed)
for (j = apicid_base; j < cores + apicid_base; j++)
- apicid_to_node[(i << bits) + j] = i;
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
+ set_apicid_to_node((i << bits) + j, i);
- numa_init_array();
return 0;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 947f42abe820..286d289b039b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,9 +18,9 @@
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-unsigned long __initdata e820_table_start;
-unsigned long __meminitdata e820_table_end;
-unsigned long __meminitdata e820_table_top;
+unsigned long __initdata pgt_buf_start;
+unsigned long __meminitdata pgt_buf_end;
+unsigned long __meminitdata pgt_buf_top;
int after_bootmem;
@@ -33,7 +33,7 @@ int direct_gbpages
static void __init find_early_table_space(unsigned long end, int use_pse,
int use_gbpages)
{
- unsigned long puds, pmds, ptes, tables, start;
+ unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
phys_addr_t base;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
@@ -65,29 +65,20 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
#ifdef CONFIG_X86_32
/* for fixmap */
tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-#endif
- /*
- * RED-PEN putting page tables only on node 0 could
- * cause a hotspot and fill up ZONE_DMA. The page tables
- * need roughly 0.5KB per GB.
- */
-#ifdef CONFIG_X86_32
- start = 0x7000;
-#else
- start = 0x8000;
+ good_end = max_pfn_mapped << PAGE_SHIFT;
#endif
- base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT,
- tables, PAGE_SIZE);
+
+ base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
if (base == MEMBLOCK_ERROR)
panic("Cannot find space for the kernel page tables");
- e820_table_start = base >> PAGE_SHIFT;
- e820_table_end = e820_table_start;
- e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
+ pgt_buf_start = base >> PAGE_SHIFT;
+ pgt_buf_end = pgt_buf_start;
+ pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
+ end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
}
struct map_range {
@@ -279,30 +270,11 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
load_cr3(swapper_pg_dir);
#endif
-#ifdef CONFIG_X86_64
- if (!after_bootmem && !start) {
- pud_t *pud;
- pmd_t *pmd;
-
- mmu_cr4_features = read_cr4();
-
- /*
- * _brk_end cannot change anymore, but it and _end may be
- * located on different 2M pages. cleanup_highmap(), however,
- * can only consider _end when it runs, so destroy any
- * mappings beyond _brk_end here.
- */
- pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
- pmd = pmd_offset(pud, _brk_end - 1);
- while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
- pmd_clear(pmd);
- }
-#endif
__flush_tlb_all();
- if (!after_bootmem && e820_table_end > e820_table_start)
- memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
- e820_table_end << PAGE_SHIFT, "PGTABLE");
+ if (!after_bootmem && pgt_buf_end > pgt_buf_start)
+ memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
+ pgt_buf_end << PAGE_SHIFT, "PGTABLE");
if (!after_bootmem)
early_memtest(start, end);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c821074b7f0b..73ad7ebd6e9c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -62,10 +62,10 @@ bool __read_mostly __vmalloc_start_set = false;
static __init void *alloc_low_page(void)
{
- unsigned long pfn = e820_table_end++;
+ unsigned long pfn = pgt_buf_end++;
void *adr;
- if (pfn >= e820_table_top)
+ if (pfn >= pgt_buf_top)
panic("alloc_low_page: ran out of memory");
adr = __va(pfn * PAGE_SIZE);
@@ -163,8 +163,8 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
- && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
- || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
+ && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
+ || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
pte_t *newpte;
int i;
@@ -644,8 +644,7 @@ void __init find_low_pfn_range(void)
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a59296af80..470cc4704a9a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -314,7 +314,7 @@ void __init cleanup_highmap(void)
static __ref void *alloc_low_page(unsigned long *phys)
{
- unsigned long pfn = e820_table_end++;
+ unsigned long pfn = pgt_buf_end++;
void *adr;
if (after_bootmem) {
@@ -324,7 +324,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
- if (pfn >= e820_table_top)
+ if (pfn >= pgt_buf_top)
panic("alloc_low_page: ran out of memory");
adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -333,12 +333,28 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
+static __ref void *map_low_page(void *virt)
+{
+ void *adr;
+ unsigned long phys, left;
+
+ if (after_bootmem)
+ return virt;
+
+ phys = __pa(virt);
+ left = phys & (PAGE_SIZE - 1);
+ adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
+ adr = (void *)(((unsigned long)adr) | left);
+
+ return adr;
+}
+
static __ref void unmap_low_page(void *adr)
{
if (after_bootmem)
return;
- early_iounmap(adr, PAGE_SIZE);
+ early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
}
static unsigned long __meminit
@@ -386,15 +402,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
}
static unsigned long __meminit
-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
- pgprot_t prot)
-{
- pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
-
- return phys_pte_init(pte, address, end, prot);
-}
-
-static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
unsigned long page_size_mask, pgprot_t prot)
{
@@ -420,8 +427,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
if (pmd_val(*pmd)) {
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
- last_map_addr = phys_pte_update(pmd, address,
+ pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+ last_map_addr = phys_pte_init(pte, address,
end, prot);
+ unmap_low_page(pte);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@@ -468,18 +477,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
}
static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
- unsigned long page_size_mask, pgprot_t prot)
-{
- pmd_t *pmd = pmd_offset(pud, 0);
- unsigned long last_map_addr;
-
- last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
- __flush_tlb_all();
- return last_map_addr;
-}
-
-static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
unsigned long page_size_mask)
{
@@ -504,8 +501,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
if (pud_val(*pud)) {
if (!pud_large(*pud)) {
- last_map_addr = phys_pmd_update(pud, addr, end,
+ pmd = map_low_page(pmd_offset(pud, 0));
+ last_map_addr = phys_pmd_init(pmd, addr, end,
page_size_mask, prot);
+ unmap_low_page(pmd);
+ __flush_tlb_all();
continue;
}
/*
@@ -553,17 +553,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
return last_map_addr;
}
-static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
- unsigned long page_size_mask)
-{
- pud_t *pud;
-
- pud = (pud_t *)pgd_page_vaddr(*pgd);
-
- return phys_pud_init(pud, addr, end, page_size_mask);
-}
-
unsigned long __meminit
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
@@ -587,8 +576,10 @@ kernel_physical_mapping_init(unsigned long start,
next = end;
if (pgd_val(*pgd)) {
- last_map_addr = phys_pud_update(pgd, __pa(start),
+ pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+ last_map_addr = phys_pud_init(pud, __pa(start),
__pa(end), page_size_mask);
+ unmap_low_page(pud);
continue;
}
@@ -612,13 +603,66 @@ kernel_physical_mapping_init(unsigned long start,
}
#ifndef CONFIG_NUMA
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
- memblock_x86_register_active_regions(0, start_pfn, end_pfn);
+ memblock_x86_register_active_regions(0, 0, max_pfn);
+ init_memory_mapping_high();
}
#endif
+struct mapping_work_data {
+ unsigned long start;
+ unsigned long end;
+ unsigned long pfn_mapped;
+};
+
+static int __init_refok
+mapping_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax)
+{
+ struct mapping_work_data *data = datax;
+ unsigned long pfn_mapped;
+ unsigned long final_start, final_end;
+
+ final_start = max_t(unsigned long, start_pfn<<PAGE_SHIFT, data->start);
+ final_end = min_t(unsigned long, end_pfn<<PAGE_SHIFT, data->end);
+
+ if (final_end <= final_start)
+ return 0;
+
+ pfn_mapped = init_memory_mapping(final_start, final_end);
+
+ if (pfn_mapped > data->pfn_mapped)
+ data->pfn_mapped = pfn_mapped;
+
+ return 0;
+}
+
+static unsigned long __init_refok
+init_memory_mapping_active_regions(unsigned long start, unsigned long end)
+{
+ struct mapping_work_data data;
+
+ data.start = start;
+ data.end = end;
+ data.pfn_mapped = 0;
+
+ work_with_active_regions(MAX_NUMNODES, mapping_work_fn, &data);
+
+ return data.pfn_mapped;
+}
+
+void __init_refok init_memory_mapping_high(void)
+{
+ if (max_pfn > max_low_pfn) {
+ max_pfn_mapped = init_memory_mapping_active_regions(1UL<<32,
+ max_pfn<<PAGE_SHIFT);
+ /* can we preserve max_low_pfn ? */
+ max_low_pfn = max_pfn;
+
+ memblock.current_limit = get_max_mapped();
+ }
+}
+
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ebf6d7887a38..9559d360fde7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,12 +26,50 @@ static __init int numa_setup(char *opt)
early_param("numa", numa_setup);
/*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
*/
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
EXPORT_SYMBOL(node_to_cpumask_map);
/*
+ * Map cpu index to node index
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+ int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+ /* early setting, no percpu area yet */
+ if (cpu_to_node_map) {
+ cpu_to_node_map[cpu] = node;
+ return;
+ }
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
+ printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+ dump_stack();
+ return;
+ }
+#endif
+ per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+ if (node != NUMA_NO_NODE)
+ set_cpu_numa_node(cpu, node);
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+ numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
@@ -57,7 +95,174 @@ void __init setup_node_to_cpumask_map(void)
pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
}
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
+void __init numa_init_array(void)
+{
+ int rr, i;
+
+ rr = first_node(node_online_map);
+ for (i = 0; i < nr_cpu_ids; i++) {
+ if (early_cpu_to_node(i) != NUMA_NO_NODE)
+ continue;
+ numa_set_node(i, rr);
+ rr = next_node(rr, node_online_map);
+ if (rr == MAX_NUMNODES)
+ rr = first_node(node_online_map);
+ }
+}
+
+static __init int find_near_online_node(int node)
+{
+ int n, val;
+ int min_val = INT_MAX;
+ int best_node = -1;
+
+ for_each_online_node(n) {
+ val = node_distance(node, n);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ return best_node;
+}
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
+ */
+void __init init_cpu_to_node(void)
+{
+ int cpu;
+ u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+
+ BUG_ON(cpu_to_apicid == NULL);
+
+ for_each_possible_cpu(cpu) {
+ int node = numa_cpu_node(cpu);
+
+ if (node == NUMA_NO_NODE)
+ continue;
+ if (!node_online(node))
+ node = find_near_online_node(node);
+ numa_set_node(cpu, node);
+ }
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
+# ifndef CONFIG_NUMA_EMU
+void __cpuinit numa_add_cpu(int cpu)
+{
+ cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+# endif /* !CONFIG_NUMA_EMU */
+
+#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
+
+int __cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+ printk(KERN_WARNING
+ "cpu_to_node(%d): usage too early!\n", cpu);
+ dump_stack();
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(__cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map))
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+ if (!cpu_possible(cpu)) {
+ printk(KERN_WARNING
+ "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+ dump_stack();
+ return NUMA_NO_NODE;
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
+struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ struct cpumask *mask;
+ char buf[64];
+
+ if (node == NUMA_NO_NODE) {
+ /* early_cpu_to_node() already emits a warning and trace */
+ return NULL;
+ }
+ mask = node_to_cpumask_map[node];
+ if (!mask) {
+ pr_err("node_to_cpumask_map[%i] NULL\n", node);
+ dump_stack();
+ return NULL;
+ }
+
+ cpulist_scnprintf(buf, sizeof(buf), mask);
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ enable ? "numa_add_cpu" : "numa_remove_cpu",
+ cpu, node, buf);
+ return mask;
+}
+
+# ifndef CONFIG_NUMA_EMU
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ struct cpumask *mask;
+
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
+ return;
+
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+# endif /* !CONFIG_NUMA_EMU */
+
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
@@ -80,4 +285,5 @@ const struct cpumask *cpumask_of_node(int node)
return node_to_cpumask_map[node];
}
EXPORT_SYMBOL(cpumask_of_node);
-#endif
+
+#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c9f277..bde3906420df 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
static unsigned long kva_start_pfn;
static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+ return apic->x86_32_numa_cpu_node(cpu);
+}
+
/*
* FLAT - support for basic PC memory model with discontig enabled, essentially
* a single node with all available processors in it with a flat
@@ -346,8 +352,7 @@ static void init_remap_allocator(int nid)
(ulong) node_remap_end_vaddr[nid]);
}
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
int nid;
long kva_target_pfn;
@@ -361,6 +366,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
*/
get_memcfg_numa();
+ numa_init_array();
kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea1551eebc..7757d2214fab 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -13,31 +13,30 @@
#include <linux/module.h>
#include <linux/nodemask.h>
#include <linux/sched.h>
+#include <linux/acpi.h>
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/dma.h>
-#include <asm/numa.h>
#include <asm/acpi.h>
#include <asm/amd_nb.h>
+#include "numa_internal.h"
+
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
-struct memnode memnode;
+nodemask_t numa_nodes_parsed __initdata;
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
+struct memnode memnode;
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;
-/*
- * Map cpu index to node index
- */
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+static struct numa_meminfo numa_meminfo __initdata;
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
/*
* Given a shift value, try to populate memnodemap[]
@@ -46,16 +45,15 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
* 0 if memnodmap[] too small (of shift too small)
* -1 if node overlap or lost ram (shift too big)
*/
-static int __init populate_memnodemap(const struct bootnode *nodes,
- int numnodes, int shift, int *nodeids)
+static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
{
unsigned long addr, end;
int i, res = -1;
memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
- for (i = 0; i < numnodes; i++) {
- addr = nodes[i].start;
- end = nodes[i].end;
+ for (i = 0; i < mi->nr_blks; i++) {
+ addr = mi->blk[i].start;
+ end = mi->blk[i].end;
if (addr >= end)
continue;
if ((end >> shift) >= memnodemapsize)
@@ -63,12 +61,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
do {
if (memnodemap[addr >> shift] != NUMA_NO_NODE)
return -1;
-
- if (!nodeids)
- memnodemap[addr >> shift] = i;
- else
- memnodemap[addr >> shift] = nodeids[i];
-
+ memnodemap[addr >> shift] = mi->blk[i].nid;
addr += (1UL << shift);
} while (addr < end);
res = 1;
@@ -86,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void)
addr = 0x8000;
nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
- nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT,
+ nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
nodemap_size, L1_CACHE_BYTES);
if (nodemap_addr == MEMBLOCK_ERROR) {
printk(KERN_ERR
@@ -106,16 +99,15 @@ static int __init allocate_cachealigned_memnodemap(void)
* The LSB of all start and end addresses in the node map is the value of the
* maximum possible shift.
*/
-static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
- int numnodes)
+static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
{
int i, nodes_used = 0;
unsigned long start, end;
unsigned long bitfield = 0, memtop = 0;
- for (i = 0; i < numnodes; i++) {
- start = nodes[i].start;
- end = nodes[i].end;
+ for (i = 0; i < mi->nr_blks; i++) {
+ start = mi->blk[i].start;
+ end = mi->blk[i].end;
if (start >= end)
continue;
bitfield |= start;
@@ -131,18 +123,17 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
return i;
}
-int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
- int *nodeids)
+static int __init compute_hash_shift(const struct numa_meminfo *mi)
{
int shift;
- shift = extract_lsb_from_nodes(nodes, numnodes);
+ shift = extract_lsb_from_nodes(mi);
if (allocate_cachealigned_memnodemap())
return -1;
printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
shift);
- if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
+ if (populate_memnodemap(mi, shift) != 1) {
printk(KERN_INFO "Your memory is not aligned you need to "
"rebuild your kernel with a bigger NODEMAPSIZE "
"shift=%d\n", shift);
@@ -188,6 +179,63 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
return NULL;
}
+static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
+ struct numa_meminfo *mi)
+{
+ /* ignore zero length blks */
+ if (start == end)
+ return 0;
+
+ /* whine about and ignore invalid blks */
+ if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
+ pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
+ nid, start, end);
+ return 0;
+ }
+
+ if (mi->nr_blks >= NR_NODE_MEMBLKS) {
+ pr_err("NUMA: too many memblk ranges\n");
+ return -EINVAL;
+ }
+
+ mi->blk[mi->nr_blks].start = start;
+ mi->blk[mi->nr_blks].end = end;
+ mi->blk[mi->nr_blks].nid = nid;
+ mi->nr_blks++;
+ return 0;
+}
+
+/**
+ * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
+ * @idx: Index of memblk to remove
+ * @mi: numa_meminfo to remove memblk from
+ *
+ * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
+ * decrementing @mi->nr_blks.
+ */
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
+{
+ mi->nr_blks--;
+ memmove(&mi->blk[idx], &mi->blk[idx + 1],
+ (mi->nr_blks - idx) * sizeof(mi->blk[0]));
+}
+
+/**
+ * numa_add_memblk - Add one numa_memblk to numa_meminfo
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @end: End address of the new memblk
+ *
+ * Add a new memblk to the default numa_meminfo.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 end)
+{
+ return numa_add_memblk_to(nid, start, end, &numa_meminfo);
+}
+
/* Initialize bootmem allocator for a node */
void __init
setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
@@ -234,696 +282,365 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
node_set_online(nodeid);
}
-/*
- * There are unfortunately some poorly designed mainboards around that
- * only connect memory to a single CPU. This breaks the 1:1 cpu->node
- * mapping. To avoid this fill in the mapping for all possible CPUs,
- * as the number of CPUs is not known yet. We round robin the existing
- * nodes.
+/**
+ * numa_cleanup_meminfo - Cleanup a numa_meminfo
+ * @mi: numa_meminfo to clean up
+ *
+ * Sanitize @mi by merging and removing unncessary memblks. Also check for
+ * conflicts and clear unused memblks.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
*/
-void __init numa_init_array(void)
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
{
- int rr, i;
-
- rr = first_node(node_online_map);
- for (i = 0; i < nr_cpu_ids; i++) {
- if (early_cpu_to_node(i) != NUMA_NO_NODE)
- continue;
- numa_set_node(i, rr);
- rr = next_node(rr, node_online_map);
- if (rr == MAX_NUMNODES)
- rr = first_node(node_online_map);
- }
-}
-
-#ifdef CONFIG_NUMA_EMU
-/* Numa emulation */
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
-static char *cmdline __initdata;
+ const u64 low = 0;
+ const u64 high = (u64)max_pfn << PAGE_SHIFT;
+ int i, j, k;
-void __init numa_emu_cmdline(char *str)
-{
- cmdline = str;
-}
+ for (i = 0; i < mi->nr_blks; i++) {
+ struct numa_memblk *bi = &mi->blk[i];
-static int __init setup_physnodes(unsigned long start, unsigned long end,
- int acpi, int amd)
-{
- int ret = 0;
- int i;
+ /* make sure all blocks are inside the limits */
+ bi->start = max(bi->start, low);
+ bi->end = min(bi->end, high);
- memset(physnodes, 0, sizeof(physnodes));
-#ifdef CONFIG_ACPI_NUMA
- if (acpi)
- acpi_get_nodes(physnodes, start, end);
-#endif
-#ifdef CONFIG_AMD_NUMA
- if (amd)
- amd_get_nodes(physnodes);
-#endif
- /*
- * Basic sanity checking on the physical node map: there may be errors
- * if the SRAT or AMD code incorrectly reported the topology or the mem=
- * kernel parameter is used.
- */
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (physnodes[i].start == physnodes[i].end)
- continue;
- if (physnodes[i].start > end) {
- physnodes[i].end = physnodes[i].start;
- continue;
- }
- if (physnodes[i].end < start) {
- physnodes[i].start = physnodes[i].end;
+ /* and there's no empty block */
+ if (bi->start == bi->end) {
+ numa_remove_memblk_from(i--, mi);
continue;
}
- if (physnodes[i].start < start)
- physnodes[i].start = start;
- if (physnodes[i].end > end)
- physnodes[i].end = end;
- ret++;
- }
-
- /*
- * If no physical topology was detected, a single node is faked to cover
- * the entire address space.
- */
- if (!ret) {
- physnodes[ret].start = start;
- physnodes[ret].end = end;
- ret = 1;
- }
- return ret;
-}
-
-static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
-{
- int i;
-
- BUG_ON(acpi && amd);
-#ifdef CONFIG_ACPI_NUMA
- if (acpi)
- acpi_fake_nodes(nodes, nr_nodes);
-#endif
-#ifdef CONFIG_AMD_NUMA
- if (amd)
- amd_fake_nodes(nodes, nr_nodes);
-#endif
- if (!acpi && !amd)
- for (i = 0; i < nr_cpu_ids; i++)
- numa_set_node(i, 0);
-}
-
-/*
- * Setups up nid to range from addr to addr + size. If the end
- * boundary is greater than max_addr, then max_addr is used instead.
- * The return value is 0 if there is additional memory left for
- * allocation past addr and -1 otherwise. addr is adjusted to be at
- * the end of the node.
- */
-static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
-{
- int ret = 0;
- nodes[nid].start = *addr;
- *addr += size;
- if (*addr >= max_addr) {
- *addr = max_addr;
- ret = -1;
- }
- nodes[nid].end = *addr;
- node_set(nid, node_possible_map);
- printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
- nodes[nid].start, nodes[nid].end,
- (nodes[nid].end - nodes[nid].start) >> 20);
- return ret;
-}
-
-/*
- * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
- * to max_addr. The return value is the number of nodes allocated.
- */
-static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
-{
- nodemask_t physnode_mask = NODE_MASK_NONE;
- u64 size;
- int big;
- int ret = 0;
- int i;
-
- if (nr_nodes <= 0)
- return -1;
- if (nr_nodes > MAX_NUMNODES) {
- pr_info("numa=fake=%d too large, reducing to %d\n",
- nr_nodes, MAX_NUMNODES);
- nr_nodes = MAX_NUMNODES;
- }
- size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
- /*
- * Calculate the number of big nodes that can be allocated as a result
- * of consolidating the remainder.
- */
- big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
- FAKE_NODE_MIN_SIZE;
-
- size &= FAKE_NODE_MIN_HASH_MASK;
- if (!size) {
- pr_err("Not enough memory for each node. "
- "NUMA emulation disabled.\n");
- return -1;
- }
-
- for (i = 0; i < MAX_NUMNODES; i++)
- if (physnodes[i].start != physnodes[i].end)
- node_set(i, physnode_mask);
-
- /*
- * Continue to fill physical nodes with fake nodes until there is no
- * memory left on any of them.
- */
- while (nodes_weight(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 end = physnodes[i].start + size;
- u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
-
- if (ret < big)
- end += FAKE_NODE_MIN_SIZE;
+ for (j = i + 1; j < mi->nr_blks; j++) {
+ struct numa_memblk *bj = &mi->blk[j];
+ unsigned long start, end;
/*
- * Continue to add memory to this fake node if its
- * non-reserved memory is less than the per-node size.
+ * See whether there are overlapping blocks. Whine
+ * about but allow overlaps of the same nid. They
+ * will be merged below.
*/
- while (end - physnodes[i].start -
- memblock_x86_hole_size(physnodes[i].start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > physnodes[i].end) {
- end = physnodes[i].end;
- break;
+ if (bi->end > bj->start && bi->start < bj->end) {
+ if (bi->nid != bj->nid) {
+ pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
+ bi->nid, bi->start, bi->end,
+ bj->nid, bj->start, bj->end);
+ return -EINVAL;
}
+ pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
+ bi->nid, bi->start, bi->end,
+ bj->start, bj->end);
}
/*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
- */
- if (end < dma32_end && dma32_end - end -
- memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
-
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if (physnodes[i].end - end -
- memblock_x86_hole_size(end, physnodes[i].end) < size)
- end = physnodes[i].end;
-
- /*
- * Avoid allocating more nodes than requested, which can
- * happen as a result of rounding down each node's size
- * to FAKE_NODE_MIN_SIZE.
+ * Join together blocks on the same node, holes
+ * between which don't overlap with memory on other
+ * nodes.
*/
- if (nodes_weight(physnode_mask) + ret >= nr_nodes)
- end = physnodes[i].end;
-
- if (setup_node_range(ret++, &physnodes[i].start,
- end - physnodes[i].start,
- physnodes[i].end) < 0)
- node_clear(i, physnode_mask);
+ if (bi->nid != bj->nid)
+ continue;
+ start = max(min(bi->start, bj->start), low);
+ end = min(max(bi->end, bj->end), high);
+ for (k = 0; k < mi->nr_blks; k++) {
+ struct numa_memblk *bk = &mi->blk[k];
+
+ if (bi->nid == bk->nid)
+ continue;
+ if (start < bk->end && end > bk->start)
+ break;
+ }
+ if (k < mi->nr_blks)
+ continue;
+ printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
+ bi->nid, bi->start, bi->end, bj->start, bj->end,
+ start, end);
+ bi->start = start;
+ bi->end = end;
+ numa_remove_memblk_from(j--, mi);
}
}
- return ret;
-}
-
-/*
- * Returns the end address of a node so that there is at least `size' amount of
- * non-reserved memory or `max_addr' is reached.
- */
-static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
-{
- u64 end = start + size;
- while (end - start - memblock_x86_hole_size(start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > max_addr) {
- end = max_addr;
- break;
- }
+ for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
+ mi->blk[i].start = mi->blk[i].end = 0;
+ mi->blk[i].nid = NUMA_NO_NODE;
}
- return end;
+
+ return 0;
}
/*
- * Sets up fake nodes of `size' interleaved over physical nodes ranging from
- * `addr' to `max_addr'. The return value is the number of nodes allocated.
+ * Set nodes, which have memory in @mi, in *@nodemask.
*/
-static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
+static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
+ const struct numa_meminfo *mi)
{
- nodemask_t physnode_mask = NODE_MASK_NONE;
- u64 min_size;
- int ret = 0;
int i;
- if (!size)
- return -1;
- /*
- * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
- * increased accordingly if the requested size is too small. This
- * creates a uniform distribution of node sizes across the entire
- * machine (but not necessarily over physical nodes).
- */
- min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
- MAX_NUMNODES;
- min_size = max(min_size, FAKE_NODE_MIN_SIZE);
- if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
- min_size = (min_size + FAKE_NODE_MIN_SIZE) &
- FAKE_NODE_MIN_HASH_MASK;
- if (size < min_size) {
- pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
- size >> 20, min_size >> 20);
- size = min_size;
- }
- size &= FAKE_NODE_MIN_HASH_MASK;
-
- for (i = 0; i < MAX_NUMNODES; i++)
- if (physnodes[i].start != physnodes[i].end)
- node_set(i, physnode_mask);
- /*
- * Fill physical nodes with fake nodes of size until there is no memory
- * left on any of them.
- */
- while (nodes_weight(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
- u64 end;
-
- end = find_end_of_node(physnodes[i].start,
- physnodes[i].end, size);
- /*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
- */
- if (end < dma32_end && dma32_end - end -
- memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
+ for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
+ if (mi->blk[i].start != mi->blk[i].end &&
+ mi->blk[i].nid != NUMA_NO_NODE)
+ node_set(mi->blk[i].nid, *nodemask);
+}
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if (physnodes[i].end - end -
- memblock_x86_hole_size(end, physnodes[i].end) < size)
- end = physnodes[i].end;
+/**
+ * numa_reset_distance - Reset NUMA distance table
+ *
+ * The current table is freed. The next numa_set_distance() call will
+ * create a new one.
+ */
+void __init numa_reset_distance(void)
+{
+ size_t size;
- /*
- * Setup the fake node that will be allocated as bootmem
- * later. If setup_node_range() returns non-zero, there
- * is no more memory available on this physical node.
- */
- if (setup_node_range(ret++, &physnodes[i].start,
- end - physnodes[i].start,
- physnodes[i].end) < 0)
- node_clear(i, physnode_mask);
- }
+ if (numa_distance_cnt) {
+ size = numa_distance_cnt * sizeof(numa_distance[0]);
+ memblock_x86_free_range(__pa(numa_distance),
+ __pa(numa_distance) + size);
+ numa_distance_cnt = 0;
}
- return ret;
+ numa_distance = NULL;
}
-/*
- * Sets up the system RAM area from start_pfn to last_pfn according to the
- * numa=fake command-line option.
- */
-static int __init numa_emulation(unsigned long start_pfn,
- unsigned long last_pfn, int acpi, int amd)
+static int __init numa_alloc_distance(void)
{
- u64 addr = start_pfn << PAGE_SHIFT;
- u64 max_addr = last_pfn << PAGE_SHIFT;
- int num_nodes;
- int i;
+ nodemask_t nodes_parsed;
+ size_t size;
+ int i, j, cnt = 0;
+ u64 phys;
- /*
- * If the numa=fake command-line contains a 'M' or 'G', it represents
- * the fixed node size. Otherwise, if it is just a single number N,
- * split the system RAM into N fake nodes.
- */
- if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
- u64 size;
+ /* size the new table and allocate it */
+ nodes_parsed = numa_nodes_parsed;
+ numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
- size = memparse(cmdline, &cmdline);
- num_nodes = split_nodes_size_interleave(addr, max_addr, size);
- } else {
- unsigned long n;
+ for_each_node_mask(i, nodes_parsed)
+ cnt = i;
+ cnt++;
+ size = cnt * cnt * sizeof(numa_distance[0]);
- n = simple_strtoul(cmdline, NULL, 0);
- num_nodes = split_nodes_interleave(addr, max_addr, n);
+ phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
+ size, PAGE_SIZE);
+ if (phys == MEMBLOCK_ERROR) {
+ pr_warning("NUMA: Warning: can't allocate distance table!\n");
+ /* don't retry until explicitly reset */
+ numa_distance = (void *)1LU;
+ return -ENOMEM;
}
+ memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
- if (num_nodes < 0)
- return num_nodes;
- memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
- if (memnode_shift < 0) {
- memnode_shift = 0;
- printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
- "disabled.\n");
- return -1;
- }
+ numa_distance = __va(phys);
+ numa_distance_cnt = cnt;
+
+ /* fill with the default distances */
+ for (i = 0; i < cnt; i++)
+ for (j = 0; j < cnt; j++)
+ numa_distance[i * cnt + j] = i == j ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
+ printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
- /*
- * We need to vacate all active ranges that may have been registered for
- * the e820 memory map.
- */
- remove_all_active_ranges();
- for_each_node_mask(i, node_possible_map) {
- memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
- nodes[i].end >> PAGE_SHIFT);
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
- setup_physnodes(addr, max_addr, acpi, amd);
- fake_physnodes(acpi, amd, num_nodes);
- numa_init_array();
return 0;
}
-#endif /* CONFIG_NUMA_EMU */
-void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
- int acpi, int amd)
+/**
+ * numa_set_distance - Set NUMA distance from one NUMA to another
+ * @from: the 'from' node to set distance
+ * @to: the 'to' node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance. If distance table
+ * doesn't exist, one which is large enough to accomodate all the currently
+ * known nodes will be created.
+ */
+void __init numa_set_distance(int from, int to, int distance)
{
- int i;
-
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-
-#ifdef CONFIG_NUMA_EMU
- setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
- acpi, amd);
- if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
+ if (!numa_distance && numa_alloc_distance() < 0)
return;
- setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
- acpi, amd);
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-#endif
-#ifdef CONFIG_ACPI_NUMA
- if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
- last_pfn << PAGE_SHIFT))
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
+ printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
+ from, to, distance);
return;
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-#endif
+ }
-#ifdef CONFIG_AMD_NUMA
- if (!numa_off && amd && !amd_scan_nodes())
+ if ((u8)distance != distance ||
+ (from == to && distance != LOCAL_DISTANCE)) {
+ pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+ from, to, distance);
return;
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-#endif
- printk(KERN_INFO "%s\n",
- numa_off ? "NUMA turned off" : "No NUMA configuration found");
+ }
- printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
- start_pfn << PAGE_SHIFT,
- last_pfn << PAGE_SHIFT);
- /* setup dummy node covering all memory */
- memnode_shift = 63;
- memnodemap = memnode.embedded_map;
- memnodemap[0] = 0;
- node_set_online(0);
- node_set(0, node_possible_map);
- for (i = 0; i < nr_cpu_ids; i++)
- numa_set_node(i, 0);
- memblock_x86_register_active_regions(0, start_pfn, last_pfn);
- setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
+ numa_distance[from * numa_distance_cnt + to] = distance;
}
-unsigned long __init numa_free_all_bootmem(void)
+int __node_distance(int from, int to)
{
- unsigned long pages = 0;
- int i;
-
- for_each_online_node(i)
- pages += free_all_bootmem_node(NODE_DATA(i));
-
- pages += free_all_memory_core_early(MAX_NUMNODES);
-
- return pages;
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+ return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+ return numa_distance[from * numa_distance_cnt + to];
}
+EXPORT_SYMBOL(__node_distance);
-#ifdef CONFIG_NUMA
-
-static __init int find_near_online_node(int node)
+/*
+ * Sanity check to catch more bad NUMA configurations (they are amazingly
+ * common). Make sure the nodes cover all memory.
+ */
+static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
{
- int n, val;
- int min_val = INT_MAX;
- int best_node = -1;
-
- for_each_online_node(n) {
- val = node_distance(node, n);
+ unsigned long numaram, e820ram;
+ int i;
- if (val < min_val) {
- min_val = val;
- best_node = n;
- }
+ numaram = 0;
+ for (i = 0; i < mi->nr_blks; i++) {
+ unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
+ unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
+ numaram += e - s;
+ numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
+ if ((long)numaram < 0)
+ numaram = 0;
}
- return best_node;
+ e820ram = max_pfn - (memblock_x86_hole_size(0,
+ max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
+ /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+ if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
+ printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
+ (numaram << PAGE_SHIFT) >> 20,
+ (e820ram << PAGE_SHIFT) >> 20);
+ return false;
+ }
+ return true;
}
-/*
- * Setup early cpu_to_node.
- *
- * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
- * and apicid_to_node[] tables have valid entries for a CPU.
- * This means we skip cpu_to_node[] initialisation for NUMA
- * emulation and faking node case (when running a kernel compiled
- * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
- * is already initialized in a round robin manner at numa_init_array,
- * prior to this call, and this initialization is good enough
- * for the fake NUMA cases.
- *
- * Called before the per_cpu areas are setup.
- */
-void __init init_cpu_to_node(void)
+static int __init numa_register_memblks(struct numa_meminfo *mi)
{
- int cpu;
- u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+ int i, nid;
- BUG_ON(cpu_to_apicid == NULL);
+ /* Account for nodes with cpus and no memory */
+ node_possible_map = numa_nodes_parsed;
+ numa_nodemask_from_meminfo(&node_possible_map, mi);
+ if (WARN_ON(nodes_empty(node_possible_map)))
+ return -EINVAL;
- for_each_possible_cpu(cpu) {
- int node;
- u16 apicid = cpu_to_apicid[cpu];
-
- if (apicid == BAD_APICID)
- continue;
- node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE)
- continue;
- if (!node_online(node))
- node = find_near_online_node(node);
- numa_set_node(cpu, node);
+ memnode_shift = compute_hash_shift(mi);
+ if (memnode_shift < 0) {
+ printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
+ return -EINVAL;
}
-}
-#endif
+ for (i = 0; i < mi->nr_blks; i++)
+ memblock_x86_register_active_regions(mi->blk[i].nid,
+ mi->blk[i].start >> PAGE_SHIFT,
+ mi->blk[i].end >> PAGE_SHIFT);
-void __cpuinit numa_set_node(int cpu, int node)
-{
- int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+ /* for out of order entries */
+ sort_node_map();
+ if (!numa_meminfo_cover_memory(mi))
+ return -EINVAL;
- /* early setting, no percpu area yet */
- if (cpu_to_node_map) {
- cpu_to_node_map[cpu] = node;
- return;
- }
+ init_memory_mapping_high();
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
- if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
- printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
- dump_stack();
- return;
+ /* Finally register nodes. */
+ for_each_node_mask(nid, node_possible_map) {
+ u64 start = (u64)max_pfn << PAGE_SHIFT;
+ u64 end = 0;
+
+ for (i = 0; i < mi->nr_blks; i++) {
+ if (nid != mi->blk[i].nid)
+ continue;
+ start = min(mi->blk[i].start, start);
+ end = max(mi->blk[i].end, end);
+ }
+
+ if (start < end)
+ setup_node_bootmem(nid, start, end);
}
-#endif
- per_cpu(x86_cpu_to_node_map, cpu) = node;
- if (node != NUMA_NO_NODE)
- set_cpu_numa_node(cpu, node);
+ return 0;
}
-void __cpuinit numa_clear_node(int cpu)
+static int __init dummy_numa_init(void)
{
- numa_set_node(cpu, NUMA_NO_NODE);
-}
+ printk(KERN_INFO "%s\n",
+ numa_off ? "NUMA turned off" : "No NUMA configuration found");
+ printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
+ 0LU, max_pfn << PAGE_SHIFT);
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+ node_set(0, numa_nodes_parsed);
+ numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
-#ifndef CONFIG_NUMA_EMU
-void __cpuinit numa_add_cpu(int cpu)
-{
- cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+ return 0;
}
-void __cpuinit numa_remove_cpu(int cpu)
-{
- cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-#else
-void __cpuinit numa_add_cpu(int cpu)
+void __init initmem_init(void)
{
- unsigned long addr;
- u16 apicid;
- int physnid;
- int nid = NUMA_NO_NODE;
+ int (*numa_init[])(void) = { [2] = dummy_numa_init };
+ int i, j;
- apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
- if (apicid != BAD_APICID)
- nid = apicid_to_node[apicid];
- if (nid == NUMA_NO_NODE)
- nid = early_cpu_to_node(cpu);
- BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
-
- /*
- * Use the starting address of the emulated node to find which physical
- * node it is allocated on.
- */
- addr = node_start_pfn(nid) << PAGE_SHIFT;
- for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
- if (addr >= physnodes[physnid].start &&
- addr < physnodes[physnid].end)
- break;
-
- /*
- * Map the cpu to each emulated node that is allocated on the physical
- * node of the cpu's apic id.
- */
- for_each_online_node(nid) {
- addr = node_start_pfn(nid) << PAGE_SHIFT;
- if (addr >= physnodes[physnid].start &&
- addr < physnodes[physnid].end)
- cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+ if (!numa_off) {
+#ifdef CONFIG_ACPI_NUMA
+ numa_init[0] = x86_acpi_numa_init;
+#endif
+#ifdef CONFIG_AMD_NUMA
+ numa_init[1] = amd_numa_init;
+#endif
}
-}
-void __cpuinit numa_remove_cpu(int cpu)
-{
- int i;
+ for (i = 0; i < ARRAY_SIZE(numa_init); i++) {
+ if (!numa_init[i])
+ continue;
- for_each_online_node(i)
- cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
-}
-#endif /* !CONFIG_NUMA_EMU */
+ for (j = 0; j < MAX_LOCAL_APIC; j++)
+ set_apicid_to_node(j, NUMA_NO_NODE);
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
-static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
-{
- int node = early_cpu_to_node(cpu);
- struct cpumask *mask;
- char buf[64];
-
- mask = node_to_cpumask_map[node];
- if (!mask) {
- pr_err("node_to_cpumask_map[%i] NULL\n", node);
- dump_stack();
- return NULL;
- }
+ nodes_clear(numa_nodes_parsed);
+ nodes_clear(node_possible_map);
+ nodes_clear(node_online_map);
+ memset(&numa_meminfo, 0, sizeof(numa_meminfo));
+ remove_all_active_ranges();
+ numa_reset_distance();
- cpulist_scnprintf(buf, sizeof(buf), mask);
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
- enable ? "numa_add_cpu" : "numa_remove_cpu",
- cpu, node, buf);
- return mask;
-}
+ if (numa_init[i]() < 0)
+ continue;
-/*
- * --------- debug versions of the numa functions ---------
- */
-#ifndef CONFIG_NUMA_EMU
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
-{
- struct cpumask *mask;
+ if (numa_cleanup_meminfo(&numa_meminfo) < 0)
+ continue;
- mask = debug_cpumask_set_cpu(cpu, enable);
- if (!mask)
- return;
+ numa_emulation(&numa_meminfo, numa_distance_cnt);
- if (enable)
- cpumask_set_cpu(cpu, mask);
- else
- cpumask_clear_cpu(cpu, mask);
-}
-#else
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
-{
- int node = early_cpu_to_node(cpu);
- struct cpumask *mask;
- int i;
+ if (numa_register_memblks(&numa_meminfo) < 0)
+ continue;
- for_each_online_node(i) {
- unsigned long addr;
+ for (j = 0; j < nr_cpu_ids; j++) {
+ int nid = early_cpu_to_node(j);
- addr = node_start_pfn(i) << PAGE_SHIFT;
- if (addr < physnodes[node].start ||
- addr >= physnodes[node].end)
- continue;
- mask = debug_cpumask_set_cpu(cpu, enable);
- if (!mask)
- return;
-
- if (enable)
- cpumask_set_cpu(cpu, mask);
- else
- cpumask_clear_cpu(cpu, mask);
+ if (nid == NUMA_NO_NODE)
+ continue;
+ if (!node_online(nid))
+ numa_clear_node(j);
+ }
+ numa_init_array();
+ return;
}
+ BUG();
}
-#endif /* CONFIG_NUMA_EMU */
-void __cpuinit numa_add_cpu(int cpu)
+unsigned long __init numa_free_all_bootmem(void)
{
- numa_set_cpumask(cpu, 1);
-}
+ unsigned long pages = 0;
+ int i;
-void __cpuinit numa_remove_cpu(int cpu)
-{
- numa_set_cpumask(cpu, 0);
-}
+ for_each_online_node(i)
+ pages += free_all_bootmem_node(NODE_DATA(i));
-int __cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
- printk(KERN_WARNING
- "cpu_to_node(%d): usage too early!\n", cpu);
- dump_stack();
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
-}
-EXPORT_SYMBOL(__cpu_to_node);
+ pages += free_all_memory_core_early(MAX_NUMNODES);
-/*
- * Same function as cpu_to_node() but used if called before the
- * per_cpu areas are setup.
- */
-int early_cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map))
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
- if (!cpu_possible(cpu)) {
- printk(KERN_WARNING
- "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
- dump_stack();
- return NUMA_NO_NODE;
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
+ return pages;
}
-/*
- * --------- end of debug versions of the numa functions ---------
- */
+int __cpuinit numa_cpu_node(int cpu)
+{
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+ if (apicid != BAD_APICID)
+ return __apicid_to_node[apicid];
+ return NUMA_NO_NODE;
+}
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
new file mode 100644
index 000000000000..607a2e8bc87a
--- /dev/null
+++ b/arch/x86/mm/numa_emulation.c
@@ -0,0 +1,475 @@
+/*
+ * NUMA emulation
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/topology.h>
+#include <linux/memblock.h>
+#include <asm/dma.h>
+
+#include "numa_internal.h"
+
+static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
+static char *emu_cmdline __initdata;
+
+void __init numa_emu_cmdline(char *str)
+{
+ emu_cmdline = str;
+}
+
+static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
+{
+ int i;
+
+ for (i = 0; i < mi->nr_blks; i++)
+ if (mi->blk[i].nid == nid)
+ return i;
+ return -ENOENT;
+}
+
+/*
+ * Sets up nid to range from @start to @end. The return value is -errno if
+ * something went wrong, 0 otherwise.
+ */
+static int __init emu_setup_memblk(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
+ int nid, int phys_blk, u64 size)
+{
+ struct numa_memblk *eb = &ei->blk[ei->nr_blks];
+ struct numa_memblk *pb = &pi->blk[phys_blk];
+
+ if (ei->nr_blks >= NR_NODE_MEMBLKS) {
+ pr_err("NUMA: Too many emulated memblks, failing emulation\n");
+ return -EINVAL;
+ }
+
+ ei->nr_blks++;
+ eb->start = pb->start;
+ eb->end = pb->start + size;
+ eb->nid = nid;
+
+ if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
+ emu_nid_to_phys[nid] = pb->nid;
+
+ pb->start += size;
+ if (pb->start >= pb->end) {
+ WARN_ON_ONCE(pb->start > pb->end);
+ numa_remove_memblk_from(phys_blk, pi);
+ }
+
+ printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
+ eb->start, eb->end, (eb->end - eb->start) >> 20);
+ return 0;
+}
+
+/*
+ * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
+ * to max_addr. The return value is the number of nodes allocated.
+ */
+static int __init split_nodes_interleave(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
+ u64 addr, u64 max_addr, int nr_nodes)
+{
+ nodemask_t physnode_mask = NODE_MASK_NONE;
+ u64 size;
+ int big;
+ int nid = 0;
+ int i, ret;
+
+ if (nr_nodes <= 0)
+ return -1;
+ if (nr_nodes > MAX_NUMNODES) {
+ pr_info("numa=fake=%d too large, reducing to %d\n",
+ nr_nodes, MAX_NUMNODES);
+ nr_nodes = MAX_NUMNODES;
+ }
+
+ size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
+ /*
+ * Calculate the number of big nodes that can be allocated as a result
+ * of consolidating the remainder.
+ */
+ big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
+ FAKE_NODE_MIN_SIZE;
+
+ size &= FAKE_NODE_MIN_HASH_MASK;
+ if (!size) {
+ pr_err("Not enough memory for each node. "
+ "NUMA emulation disabled.\n");
+ return -1;
+ }
+
+ for (i = 0; i < pi->nr_blks; i++)
+ node_set(pi->blk[i].nid, physnode_mask);
+
+ /*
+ * Continue to fill physical nodes with fake nodes until there is no
+ * memory left on any of them.
+ */
+ while (nodes_weight(physnode_mask)) {
+ for_each_node_mask(i, physnode_mask) {
+ u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
+ u64 start, limit, end;
+ int phys_blk;
+
+ phys_blk = emu_find_memblk_by_nid(i, pi);
+ if (phys_blk < 0) {
+ node_clear(i, physnode_mask);
+ continue;
+ }
+ start = pi->blk[phys_blk].start;
+ limit = pi->blk[phys_blk].end;
+ end = start + size;
+
+ if (nid < big)
+ end += FAKE_NODE_MIN_SIZE;
+
+ /*
+ * Continue to add memory to this fake node if its
+ * non-reserved memory is less than the per-node size.
+ */
+ while (end - start -
+ memblock_x86_hole_size(start, end) < size) {
+ end += FAKE_NODE_MIN_SIZE;
+ if (end > limit) {
+ end = limit;
+ break;
+ }
+ }
+
+ /*
+ * If there won't be at least FAKE_NODE_MIN_SIZE of
+ * non-reserved memory in ZONE_DMA32 for the next node,
+ * this one must extend to the boundary.
+ */
+ if (end < dma32_end && dma32_end - end -
+ memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ end = dma32_end;
+
+ /*
+ * If there won't be enough non-reserved memory for the
+ * next node, this one must extend to the end of the
+ * physical node.
+ */
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
+ end = limit;
+
+ ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
+ phys_blk,
+ min(end, limit) - start);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Returns the end address of a node so that there is at least `size' amount of
+ * non-reserved memory or `max_addr' is reached.
+ */
+static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
+{
+ u64 end = start + size;
+
+ while (end - start - memblock_x86_hole_size(start, end) < size) {
+ end += FAKE_NODE_MIN_SIZE;
+ if (end > max_addr) {
+ end = max_addr;
+ break;
+ }
+ }
+ return end;
+}
+
+/*
+ * Sets up fake nodes of `size' interleaved over physical nodes ranging from
+ * `addr' to `max_addr'. The return value is the number of nodes allocated.
+ */
+static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
+ u64 addr, u64 max_addr, u64 size)
+{
+ nodemask_t physnode_mask = NODE_MASK_NONE;
+ u64 min_size;
+ int nid = 0;
+ int i, ret;
+
+ if (!size)
+ return -1;
+ /*
+ * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
+ * increased accordingly if the requested size is too small. This
+ * creates a uniform distribution of node sizes across the entire
+ * machine (but not necessarily over physical nodes).
+ */
+ min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
+ MAX_NUMNODES;
+ min_size = max(min_size, FAKE_NODE_MIN_SIZE);
+ if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
+ min_size = (min_size + FAKE_NODE_MIN_SIZE) &
+ FAKE_NODE_MIN_HASH_MASK;
+ if (size < min_size) {
+ pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
+ size >> 20, min_size >> 20);
+ size = min_size;
+ }
+ size &= FAKE_NODE_MIN_HASH_MASK;
+
+ for (i = 0; i < pi->nr_blks; i++)
+ node_set(pi->blk[i].nid, physnode_mask);
+
+ /*
+ * Fill physical nodes with fake nodes of size until there is no memory
+ * left on any of them.
+ */
+ while (nodes_weight(physnode_mask)) {
+ for_each_node_mask(i, physnode_mask) {
+ u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
+ u64 start, limit, end;
+ int phys_blk;
+
+ phys_blk = emu_find_memblk_by_nid(i, pi);
+ if (phys_blk < 0) {
+ node_clear(i, physnode_mask);
+ continue;
+ }
+ start = pi->blk[phys_blk].start;
+ limit = pi->blk[phys_blk].end;
+
+ end = find_end_of_node(start, limit, size);
+ /*
+ * If there won't be at least FAKE_NODE_MIN_SIZE of
+ * non-reserved memory in ZONE_DMA32 for the next node,
+ * this one must extend to the boundary.
+ */
+ if (end < dma32_end && dma32_end - end -
+ memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ end = dma32_end;
+
+ /*
+ * If there won't be enough non-reserved memory for the
+ * next node, this one must extend to the end of the
+ * physical node.
+ */
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
+ end = limit;
+
+ ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
+ phys_blk,
+ min(end, limit) - start);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/**
+ * numa_emulation - Emulate NUMA nodes
+ * @numa_meminfo: NUMA configuration to massage
+ * @numa_dist_cnt: The size of the physical NUMA distance table
+ *
+ * Emulate NUMA nodes according to the numa=fake kernel parameter.
+ * @numa_meminfo contains the physical memory configuration and is modified
+ * to reflect the emulated configuration on success. @numa_dist_cnt is
+ * used to determine the size of the physical distance table.
+ *
+ * On success, the following modifications are made.
+ *
+ * - @numa_meminfo is updated to reflect the emulated nodes.
+ *
+ * - __apicid_to_node[] is updated such that APIC IDs are mapped to the
+ * emulated nodes.
+ *
+ * - NUMA distance table is rebuilt to represent distances between emulated
+ * nodes. The distances are determined considering how emulated nodes
+ * are mapped to physical nodes and match the actual distances.
+ *
+ * - emu_nid_to_phys[] reflects how emulated nodes are mapped to physical
+ * nodes. This is used by numa_add_cpu() and numa_remove_cpu().
+ *
+ * If emulation is not enabled or fails, emu_nid_to_phys[] is filled with
+ * identity mapping and no other modification is made.
+ */
+void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
+{
+ static struct numa_meminfo ei __initdata;
+ static struct numa_meminfo pi __initdata;
+ const u64 max_addr = max_pfn << PAGE_SHIFT;
+ u8 *phys_dist = NULL;
+ int i, j, ret;
+
+ if (!emu_cmdline)
+ goto no_emu;
+
+ memset(&ei, 0, sizeof(ei));
+ pi = *numa_meminfo;
+
+ for (i = 0; i < MAX_NUMNODES; i++)
+ emu_nid_to_phys[i] = NUMA_NO_NODE;
+
+ /*
+ * If the numa=fake command-line contains a 'M' or 'G', it represents
+ * the fixed node size. Otherwise, if it is just a single number N,
+ * split the system RAM into N fake nodes.
+ */
+ if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) {
+ u64 size;
+
+ size = memparse(emu_cmdline, &emu_cmdline);
+ ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
+ } else {
+ unsigned long n;
+
+ n = simple_strtoul(emu_cmdline, NULL, 0);
+ ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
+ }
+
+ if (ret < 0)
+ goto no_emu;
+
+ if (numa_cleanup_meminfo(&ei) < 0) {
+ pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
+ goto no_emu;
+ }
+
+ /*
+ * Copy the original distance table. It's temporary so no need to
+ * reserve it.
+ */
+ if (numa_dist_cnt) {
+ size_t size = numa_dist_cnt * sizeof(phys_dist[0]);
+ u64 phys;
+
+ phys = memblock_find_in_range(0,
+ (u64)max_pfn_mapped << PAGE_SHIFT,
+ size, PAGE_SIZE);
+ if (phys == MEMBLOCK_ERROR) {
+ pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
+ goto no_emu;
+ }
+ phys_dist = __va(phys);
+
+ for (i = 0; i < numa_dist_cnt; i++)
+ for (j = 0; j < numa_dist_cnt; j++)
+ phys_dist[i * numa_dist_cnt + j] =
+ node_distance(i, j);
+ }
+
+ /* commit */
+ *numa_meminfo = ei;
+
+ /*
+ * Transform __apicid_to_node table to use emulated nids by
+ * reverse-mapping phys_nid. The maps should always exist but fall
+ * back to zero just in case.
+ */
+ for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
+ if (__apicid_to_node[i] == NUMA_NO_NODE)
+ continue;
+ for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
+ if (__apicid_to_node[i] == emu_nid_to_phys[j])
+ break;
+ __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0;
+ }
+
+ /* make sure all emulated nodes are mapped to a physical node */
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
+ if (emu_nid_to_phys[i] == NUMA_NO_NODE)
+ emu_nid_to_phys[i] = 0;
+
+ /* transform distance table */
+ numa_reset_distance();
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ for (j = 0; j < MAX_NUMNODES; j++) {
+ int physi = emu_nid_to_phys[i];
+ int physj = emu_nid_to_phys[j];
+ int dist;
+
+ if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
+ dist = physi == physj ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
+ else
+ dist = phys_dist[physi * numa_dist_cnt + physj];
+
+ numa_set_distance(i, j, dist);
+ }
+ }
+ return;
+
+no_emu:
+ /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
+ emu_nid_to_phys[i] = i;
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+void __cpuinit numa_add_cpu(int cpu)
+{
+ int physnid, nid;
+
+ nid = numa_cpu_node(cpu);
+ if (nid == NUMA_NO_NODE)
+ nid = early_cpu_to_node(cpu);
+ BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
+
+ physnid = emu_nid_to_phys[nid];
+
+ /*
+ * Map the cpu to each emulated node that is allocated on the physical
+ * node of the cpu's apic id.
+ */
+ for_each_online_node(nid)
+ if (emu_nid_to_phys[nid] == physnid)
+ cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ int i;
+
+ for_each_online_node(i)
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
+}
+#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ struct cpumask *mask;
+ int nid, physnid, i;
+
+ nid = early_cpu_to_node(cpu);
+ if (nid == NUMA_NO_NODE) {
+ /* early_cpu_to_node() already emits a warning and trace */
+ return;
+ }
+
+ physnid = emu_nid_to_phys[nid];
+
+ for_each_online_node(i) {
+ if (emu_nid_to_phys[nid] != physnid)
+ continue;
+
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
+ return;
+
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+ }
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
new file mode 100644
index 000000000000..ef2d97377d7c
--- /dev/null
+++ b/arch/x86/mm/numa_internal.h
@@ -0,0 +1,31 @@
+#ifndef __X86_MM_NUMA_INTERNAL_H
+#define __X86_MM_NUMA_INTERNAL_H
+
+#include <linux/types.h>
+#include <asm/numa.h>
+
+struct numa_memblk {
+ u64 start;
+ u64 end;
+ int nid;
+};
+
+struct numa_meminfo {
+ int nr_blks;
+ struct numa_memblk blk[NR_NODE_MEMBLKS];
+};
+
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
+void __init numa_reset_distance(void);
+
+#ifdef CONFIG_NUMA_EMU
+void __init numa_emulation(struct numa_meminfo *numa_meminfo,
+ int numa_dist_cnt);
+#else
+static inline void numa_emulation(struct numa_meminfo *numa_meminfo,
+ int numa_dist_cnt)
+{ }
+#endif
+
+#endif /* __X86_MM_NUMA_INTERNAL_H */
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index ae96e7b8051d..48651c6f657d 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -57,7 +57,7 @@ struct node_memory_chunk_s {
static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_APICID];
+static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
int acpi_numa __initdata;
@@ -254,8 +254,8 @@ int __init get_memcfg_from_srat(void)
printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
num_memory_chunks);
- for (i = 0; i < MAX_APICID; i++)
- apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
for (j = 0; j < num_memory_chunks; j++){
struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 603d285d1daa..8e9d3394f6d4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -26,88 +26,34 @@
int acpi_numa __initdata;
-static struct acpi_table_slit *acpi_slit;
-
-static nodemask_t nodes_parsed __initdata;
-static nodemask_t cpu_nodes_parsed __initdata;
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
static struct bootnode nodes_add[MAX_NUMNODES];
-static int num_node_memblks __initdata;
-static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
-static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
-
static __init int setup_node(int pxm)
{
return acpi_map_pxm_to_node(pxm);
}
-static __init int conflicting_memblks(unsigned long start, unsigned long end)
-{
- int i;
- for (i = 0; i < num_node_memblks; i++) {
- struct bootnode *nd = &node_memblk_range[i];
- if (nd->start == nd->end)
- continue;
- if (nd->end > start && nd->start < end)
- return memblk_nodeid[i];
- if (nd->end == end && nd->start == start)
- return memblk_nodeid[i];
- }
- return -1;
-}
-
-static __init void cutoff_node(int i, unsigned long start, unsigned long end)
-{
- struct bootnode *nd = &nodes[i];
-
- if (nd->start < start) {
- nd->start = start;
- if (nd->end < nd->start)
- nd->start = nd->end;
- }
- if (nd->end > end) {
- nd->end = end;
- if (nd->start > nd->end)
- nd->start = nd->end;
- }
-}
-
static __init void bad_srat(void)
{
- int i;
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
- for (i = 0; i < MAX_LOCAL_APIC; i++)
- apicid_to_node[i] = NUMA_NO_NODE;
- for (i = 0; i < MAX_NUMNODES; i++) {
- nodes[i].start = nodes[i].end = 0;
- nodes_add[i].start = nodes_add[i].end = 0;
- }
- remove_all_active_ranges();
+ memset(nodes_add, 0, sizeof(nodes_add));
}
static __init inline int srat_disabled(void)
{
- return numa_off || acpi_numa < 0;
+ return acpi_numa < 0;
}
/* Callback for SLIT parsing */
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
{
- unsigned length;
- unsigned long phys;
-
- length = slit->header.length;
- phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
- PAGE_SIZE);
-
- if (phys == MEMBLOCK_ERROR)
- panic(" Can not save slit!\n");
+ int i, j;
- acpi_slit = __va(phys);
- memcpy(acpi_slit, slit, length);
- memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
+ for (i = 0; i < slit->locality_count; i++)
+ for (j = 0; j < slit->locality_count; j++)
+ numa_set_distance(pxm_to_node(i), pxm_to_node(j),
+ slit->entry[slit->locality_count * i + j]);
}
/* Callback for Proximity Domain -> x2APIC mapping */
@@ -138,8 +84,8 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
return;
}
- apicid_to_node[apic_id] = node;
- node_set(node, cpu_nodes_parsed);
+ set_apicid_to_node(apic_id, node);
+ node_set(node, numa_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
@@ -178,8 +124,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
return;
}
- apicid_to_node[apic_id] = node;
- node_set(node, cpu_nodes_parsed);
+ set_apicid_to_node(apic_id, node);
+ node_set(node, numa_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
@@ -241,7 +187,7 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
}
if (changed) {
- node_set(node, cpu_nodes_parsed);
+ node_set(node, numa_nodes_parsed);
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
nd->start, nd->end);
}
@@ -251,10 +197,8 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
- struct bootnode *nd, oldnode;
unsigned long start, end;
int node, pxm;
- int i;
if (srat_disabled())
return;
@@ -276,300 +220,31 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
bad_srat();
return;
}
- i = conflicting_memblks(start, end);
- if (i == node) {
- printk(KERN_WARNING
- "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
- pxm, start, end, nodes[i].start, nodes[i].end);
- } else if (i >= 0) {
- printk(KERN_ERR
- "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
- pxm, start, end, node_to_pxm(i),
- nodes[i].start, nodes[i].end);
+
+ if (numa_add_memblk(node, start, end) < 0) {
bad_srat();
return;
}
- nd = &nodes[node];
- oldnode = *nd;
- if (!node_test_and_set(node, nodes_parsed)) {
- nd->start = start;
- nd->end = end;
- } else {
- if (start < nd->start)
- nd->start = start;
- if (nd->end < end)
- nd->end = end;
- }
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
start, end);
- if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+ if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
update_nodes_add(node, start, end);
- /* restore nodes[node] */
- *nd = oldnode;
- if ((nd->start | nd->end) == 0)
- node_clear(node, nodes_parsed);
- }
-
- node_memblk_range[num_node_memblks].start = start;
- node_memblk_range[num_node_memblks].end = end;
- memblk_nodeid[num_node_memblks] = node;
- num_node_memblks++;
-}
-
-/* Sanity check to catch more bad SRATs (they are amazingly common).
- Make sure the PXMs cover all memory. */
-static int __init nodes_cover_memory(const struct bootnode *nodes)
-{
- int i;
- unsigned long pxmram, e820ram;
-
- pxmram = 0;
- for_each_node_mask(i, nodes_parsed) {
- unsigned long s = nodes[i].start >> PAGE_SHIFT;
- unsigned long e = nodes[i].end >> PAGE_SHIFT;
- pxmram += e - s;
- pxmram -= __absent_pages_in_range(i, s, e);
- if ((long)pxmram < 0)
- pxmram = 0;
- }
-
- e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
- /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
- if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
- printk(KERN_ERR
- "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
- (pxmram << PAGE_SHIFT) >> 20,
- (e820ram << PAGE_SHIFT) >> 20);
- return 0;
- }
- return 1;
}
void __init acpi_numa_arch_fixup(void) {}
-#ifdef CONFIG_NUMA_EMU
-void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
- unsigned long end)
-{
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- cutoff_node(i, start, end);
- physnodes[i].start = nodes[i].start;
- physnodes[i].end = nodes[i].end;
- }
-}
-#endif /* CONFIG_NUMA_EMU */
-
-/* Use the information discovered above to actually set up the nodes. */
-int __init acpi_scan_nodes(unsigned long start, unsigned long end)
+int __init x86_acpi_numa_init(void)
{
- int i;
-
- if (acpi_numa <= 0)
- return -1;
-
- /* First clean up the node list */
- for (i = 0; i < MAX_NUMNODES; i++)
- cutoff_node(i, start, end);
-
- /*
- * Join together blocks on the same node, holes between
- * which don't overlap with memory on other nodes.
- */
- for (i = 0; i < num_node_memblks; ++i) {
- int j, k;
-
- for (j = i + 1; j < num_node_memblks; ++j) {
- unsigned long start, end;
-
- if (memblk_nodeid[i] != memblk_nodeid[j])
- continue;
- start = min(node_memblk_range[i].end,
- node_memblk_range[j].end);
- end = max(node_memblk_range[i].start,
- node_memblk_range[j].start);
- for (k = 0; k < num_node_memblks; ++k) {
- if (memblk_nodeid[i] == memblk_nodeid[k])
- continue;
- if (start < node_memblk_range[k].end &&
- end > node_memblk_range[k].start)
- break;
- }
- if (k < num_node_memblks)
- continue;
- start = min(node_memblk_range[i].start,
- node_memblk_range[j].start);
- end = max(node_memblk_range[i].end,
- node_memblk_range[j].end);
- printk(KERN_INFO "SRAT: Node %d "
- "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
- memblk_nodeid[i],
- node_memblk_range[i].start,
- node_memblk_range[i].end,
- node_memblk_range[j].start,
- node_memblk_range[j].end,
- start, end);
- node_memblk_range[i].start = start;
- node_memblk_range[i].end = end;
- k = --num_node_memblks - j;
- memmove(memblk_nodeid + j, memblk_nodeid + j+1,
- k * sizeof(*memblk_nodeid));
- memmove(node_memblk_range + j, node_memblk_range + j+1,
- k * sizeof(*node_memblk_range));
- --j;
- }
- }
-
- memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
- memblk_nodeid);
- if (memnode_shift < 0) {
- printk(KERN_ERR
- "SRAT: No NUMA node hash function found. Contact maintainer\n");
- bad_srat();
- return -1;
- }
-
- for (i = 0; i < num_node_memblks; i++)
- memblock_x86_register_active_regions(memblk_nodeid[i],
- node_memblk_range[i].start >> PAGE_SHIFT,
- node_memblk_range[i].end >> PAGE_SHIFT);
-
- /* for out of order entries in SRAT */
- sort_node_map();
- if (!nodes_cover_memory(nodes)) {
- bad_srat();
- return -1;
- }
+ int ret;
- /* Account for nodes with cpus and no memory */
- nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
-
- /* Finally register nodes */
- for_each_node_mask(i, node_possible_map)
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- /* Try again in case setup_node_bootmem missed one due
- to missing bootmem */
- for_each_node_mask(i, node_possible_map)
- if (!node_online(i))
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-
- for (i = 0; i < nr_cpu_ids; i++) {
- int node = early_cpu_to_node(i);
-
- if (node == NUMA_NO_NODE)
- continue;
- if (!node_online(node))
- numa_clear_node(i);
- }
- numa_init_array();
- return 0;
-}
-
-#ifdef CONFIG_NUMA_EMU
-static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
- [0 ... MAX_NUMNODES-1] = PXM_INVAL
-};
-static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-static int __init find_node_by_addr(unsigned long addr)
-{
- int ret = NUMA_NO_NODE;
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- /*
- * Find the real node that this emulated node appears on. For
- * the sake of simplicity, we only use a real node's starting
- * address to determine which emulated node it appears on.
- */
- if (addr >= nodes[i].start && addr < nodes[i].end) {
- ret = i;
- break;
- }
- }
- return ret;
+ ret = acpi_numa_init();
+ if (ret < 0)
+ return ret;
+ return srat_disabled() ? -EINVAL : 0;
}
-/*
- * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
- * mappings that respect the real ACPI topology but reflect our emulated
- * environment. For each emulated node, we find which real node it appears on
- * and create PXM to NID mappings for those fake nodes which mirror that
- * locality. SLIT will now represent the correct distances between emulated
- * nodes as a result of the real topology.
- */
-void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
-{
- int i, j;
-
- for (i = 0; i < num_nodes; i++) {
- int nid, pxm;
-
- nid = find_node_by_addr(fake_nodes[i].start);
- if (nid == NUMA_NO_NODE)
- continue;
- pxm = node_to_pxm(nid);
- if (pxm == PXM_INVAL)
- continue;
- fake_node_to_pxm_map[i] = pxm;
- /*
- * For each apicid_to_node mapping that exists for this real
- * node, it must now point to the fake node ID.
- */
- for (j = 0; j < MAX_LOCAL_APIC; j++)
- if (apicid_to_node[j] == nid &&
- fake_apicid_to_node[j] == NUMA_NO_NODE)
- fake_apicid_to_node[j] = i;
- }
-
- /*
- * If there are apicid-to-node mappings for physical nodes that do not
- * have a corresponding emulated node, it should default to a guaranteed
- * value.
- */
- for (i = 0; i < MAX_LOCAL_APIC; i++)
- if (apicid_to_node[i] != NUMA_NO_NODE &&
- fake_apicid_to_node[i] == NUMA_NO_NODE)
- fake_apicid_to_node[i] = 0;
-
- for (i = 0; i < num_nodes; i++)
- __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
-
- nodes_clear(nodes_parsed);
- for (i = 0; i < num_nodes; i++)
- if (fake_nodes[i].start != fake_nodes[i].end)
- node_set(i, nodes_parsed);
-}
-
-static int null_slit_node_compare(int a, int b)
-{
- return node_to_pxm(a) == node_to_pxm(b);
-}
-#else
-static int null_slit_node_compare(int a, int b)
-{
- return a == b;
-}
-#endif /* CONFIG_NUMA_EMU */
-
-int __node_distance(int a, int b)
-{
- int index;
-
- if (!acpi_slit)
- return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
- REMOTE_DISTANCE;
- index = acpi_slit->locality_count * node_to_pxm(a);
- return acpi_slit->entry[index + node_to_pxm(b)];
-}
-
-EXPORT_SYMBOL(__node_distance);
-
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
int memory_add_physaddr_to_nid(u64 start)
{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6acc724d5d8f..55272d7c3b0b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -179,12 +179,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
sender = this_cpu_read(tlb_vector_offset);
f = &flush_state[sender];
- /*
- * Could avoid this lock when
- * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
- * probably not worth checking this for a cache-hot lock.
- */
- raw_spin_lock(&f->tlbstate_lock);
+ if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
+ raw_spin_lock(&f->tlbstate_lock);
f->flush_mm = mm;
f->flush_va = va;
@@ -202,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
f->flush_mm = NULL;
f->flush_va = 0;
- raw_spin_unlock(&f->tlbstate_lock);
+ if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
+ raw_spin_unlock(&f->tlbstate_lock);
}
void native_flush_tlb_others(const struct cpumask *cpumask,
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index e27dffbbb1a7..026e4931d162 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -350,7 +350,7 @@ static int __init early_fill_mp_bus_info(void)
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
-static void enable_pci_io_ecs(void *unused)
+static void __cpuinit enable_pci_io_ecs(void *unused)
{
u64 reg;
rdmsrl(MSR_AMD64_NB_CFG, reg);
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 85b68ef5e809..c63c6d3dd8e8 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -254,7 +254,7 @@ int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
static int ce4100_conf_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
- int i, retval = 1;
+ int i;
if (bus == 1) {
for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index d2c0d51a7178..68c0dbcc95be 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -15,21 +15,19 @@
#include <linux/serial_reg.h>
#include <linux/serial_8250.h>
+#include <asm/prom.h>
#include <asm/setup.h>
+#include <asm/i8259.h>
#include <asm/io.h>
+#include <asm/io_apic.h>
static int ce4100_i8042_detect(void)
{
return 0;
}
-static void __init sdv_find_smp_config(void)
-{
-}
-
#ifdef CONFIG_SERIAL_8250
-
static unsigned int mem_serial_in(struct uart_port *p, int offset)
{
offset = offset << p->regshift;
@@ -118,6 +116,15 @@ static void __init sdv_arch_setup(void)
sdv_serial_fixup();
}
+#ifdef CONFIG_X86_IO_APIC
+static void __cpuinit sdv_pci_init(void)
+{
+ x86_of_pci_init();
+ /* We can't set this earlier, because we need to calibrate the timer */
+ legacy_pic = &null_legacy_pic;
+}
+#endif
+
/*
* CE4100 specific x86_init function overrides and early setup
* calls.
@@ -128,5 +135,10 @@ void __init x86_ce4100_early_setup(void)
x86_platform.i8042_detect = ce4100_i8042_detect;
x86_init.resources.probe_roms = x86_init_noop;
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
- x86_init.mpparse.find_smp_config = sdv_find_smp_config;
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+
+#ifdef CONFIG_X86_IO_APIC
+ x86_init.pci.init_irq = sdv_pci_init;
+ x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
+#endif
}
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
new file mode 100644
index 000000000000..dc701ea58546
--- /dev/null
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -0,0 +1,428 @@
+/*
+ * CE4100 on Falcon Falls
+ *
+ * (c) Copyright 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+/dts-v1/;
+/ {
+ model = "intel,falconfalls";
+ compatible = "intel,falconfalls";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "intel,ce4100";
+ reg = <0>;
+ lapic = <&lapic0>;
+ };
+ };
+
+ soc@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "intel,ce4100-cp";
+ ranges;
+
+ ioapic1: interrupt-controller@fec00000 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ce4100-ioapic";
+ interrupt-controller;
+ reg = <0xfec00000 0x1000>;
+ };
+
+ timer@fed00000 {
+ compatible = "intel,ce4100-hpet";
+ reg = <0xfed00000 0x200>;
+ };
+
+ lapic0: interrupt-controller@fee00000 {
+ compatible = "intel,ce4100-lapic";
+ reg = <0xfee00000 0x1000>;
+ };
+
+ pci@3fc {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ compatible = "intel,ce4100-pci", "pci";
+ device_type = "pci";
+ bus-range = <0 0>;
+ ranges = <0x2000000 0 0xbffff000 0xbffff000 0 0x1000
+ 0x2000000 0 0xdffe0000 0xdffe0000 0 0x1000
+ 0x0000000 0 0x0 0x0 0 0x100>;
+
+ /* Secondary IO-APIC */
+ ioapic2: interrupt-controller@0,1 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ce4100-ioapic";
+ interrupt-controller;
+ reg = <0x100 0x0 0x0 0x0 0x0>;
+ assigned-addresses = <0x02000000 0x0 0xbffff000 0x0 0x1000>;
+ };
+
+ pci@1,0 {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ compatible = "intel,ce4100-pci", "pci";
+ device_type = "pci";
+ bus-range = <1 1>;
+ ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>;
+
+ interrupt-parent = <&ioapic2>;
+
+ display@2,0 {
+ compatible = "pci8086,2e5b.2",
+ "pci8086,2e5b",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x11000 0x0 0x0 0x0 0x0>;
+ interrupts = <0 1>;
+ };
+
+ multimedia@3,0 {
+ compatible = "pci8086,2e5c.2",
+ "pci8086,2e5c",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x11800 0x0 0x0 0x0 0x0>;
+ interrupts = <2 1>;
+ };
+
+ multimedia@4,0 {
+ compatible = "pci8086,2e5d.2",
+ "pci8086,2e5d",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x12000 0x0 0x0 0x0 0x0>;
+ interrupts = <4 1>;
+ };
+
+ multimedia@4,1 {
+ compatible = "pci8086,2e5e.2",
+ "pci8086,2e5e",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x12100 0x0 0x0 0x0 0x0>;
+ interrupts = <5 1>;
+ };
+
+ sound@6,0 {
+ compatible = "pci8086,2e5f.2",
+ "pci8086,2e5f",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13000 0x0 0x0 0x0 0x0>;
+ interrupts = <6 1>;
+ };
+
+ sound@6,1 {
+ compatible = "pci8086,2e5f.2",
+ "pci8086,2e5f",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13100 0x0 0x0 0x0 0x0>;
+ interrupts = <7 1>;
+ };
+
+ sound@6,2 {
+ compatible = "pci8086,2e60.2",
+ "pci8086,2e60",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13200 0x0 0x0 0x0 0x0>;
+ interrupts = <8 1>;
+ };
+
+ display@8,0 {
+ compatible = "pci8086,2e61.2",
+ "pci8086,2e61",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x14000 0x0 0x0 0x0 0x0>;
+ interrupts = <9 1>;
+ };
+
+ display@8,1 {
+ compatible = "pci8086,2e62.2",
+ "pci8086,2e62",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x14100 0x0 0x0 0x0 0x0>;
+ interrupts = <10 1>;
+ };
+
+ multimedia@8,2 {
+ compatible = "pci8086,2e63.2",
+ "pci8086,2e63",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x14200 0x0 0x0 0x0 0x0>;
+ interrupts = <11 1>;
+ };
+
+ entertainment-encryption@9,0 {
+ compatible = "pci8086,2e64.2",
+ "pci8086,2e64",
+ "pciclass101000",
+ "pciclass1010";
+
+ reg = <0x14800 0x0 0x0 0x0 0x0>;
+ interrupts = <12 1>;
+ };
+
+ localbus@a,0 {
+ compatible = "pci8086,2e65.2",
+ "pci8086,2e65",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15000 0x0 0x0 0x0 0x0>;
+ };
+
+ serial@b,0 {
+ compatible = "pci8086,2e66.2",
+ "pci8086,2e66",
+ "pciclass070003",
+ "pciclass0700";
+
+ reg = <0x15800 0x0 0x0 0x0 0x0>;
+ interrupts = <14 1>;
+ };
+
+ gpio@b,1 {
+ compatible = "pci8086,2e67.2",
+ "pci8086,2e67",
+ "pciclassff0000",
+ "pciclassff00";
+
+ #gpio-cells = <2>;
+ reg = <0x15900 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+ gpio-controller;
+ };
+
+ i2c-controller@b,2 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "pci8086,2e68.2",
+ "pci8086,2e68",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15a00 0x0 0x0 0x0 0x0>;
+ interrupts = <16 1>;
+ ranges = <0 0 0x02000000 0 0xdffe0500 0x100
+ 1 0 0x02000000 0 0xdffe0600 0x100
+ 2 0 0x02000000 0 0xdffe0700 0x100>;
+
+ i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <0 0 0x100>;
+ };
+
+ i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <1 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <2 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+ };
+
+ smard-card@b,3 {
+ compatible = "pci8086,2e69.2",
+ "pci8086,2e69",
+ "pciclass070500",
+ "pciclass0705";
+
+ reg = <0x15b00 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+ };
+
+ spi-controller@b,4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible =
+ "pci8086,2e6a.2",
+ "pci8086,2e6a",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15c00 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+
+ dac@0 {
+ compatible = "ti,pcm1755";
+ reg = <0>;
+ spi-max-frequency = <115200>;
+ };
+
+ dac@1 {
+ compatible = "ti,pcm1609a";
+ reg = <1>;
+ spi-max-frequency = <115200>;
+ };
+
+ eeprom@2 {
+ compatible = "atmel,at93c46";
+ reg = <2>;
+ spi-max-frequency = <115200>;
+ };
+ };
+
+ multimedia@b,7 {
+ compatible = "pci8086,2e6d.2",
+ "pci8086,2e6d",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15f00 0x0 0x0 0x0 0x0>;
+ };
+
+ ethernet@c,0 {
+ compatible = "pci8086,2e6e.2",
+ "pci8086,2e6e",
+ "pciclass020000",
+ "pciclass0200";
+
+ reg = <0x16000 0x0 0x0 0x0 0x0>;
+ interrupts = <21 1>;
+ };
+
+ clock@c,1 {
+ compatible = "pci8086,2e6f.2",
+ "pci8086,2e6f",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x16100 0x0 0x0 0x0 0x0>;
+ interrupts = <3 1>;
+ };
+
+ usb@d,0 {
+ compatible = "pci8086,2e70.2",
+ "pci8086,2e70",
+ "pciclass0c0320",
+ "pciclass0c03";
+
+ reg = <0x16800 0x0 0x0 0x0 0x0>;
+ interrupts = <22 3>;
+ };
+
+ usb@d,1 {
+ compatible = "pci8086,2e70.2",
+ "pci8086,2e70",
+ "pciclass0c0320",
+ "pciclass0c03";
+
+ reg = <0x16900 0x0 0x0 0x0 0x0>;
+ interrupts = <22 3>;
+ };
+
+ sata@e,0 {
+ compatible = "pci8086,2e71.0",
+ "pci8086,2e71",
+ "pciclass010601",
+ "pciclass0106";
+
+ reg = <0x17000 0x0 0x0 0x0 0x0>;
+ interrupts = <23 3>;
+ };
+
+ flash@f,0 {
+ compatible = "pci8086,701.1",
+ "pci8086,701",
+ "pciclass050100",
+ "pciclass0501";
+
+ reg = <0x17800 0x0 0x0 0x0 0x0>;
+ interrupts = <13 1>;
+ };
+
+ entertainment-encryption@10,0 {
+ compatible = "pci8086,702.1",
+ "pci8086,702",
+ "pciclass101000",
+ "pciclass1010";
+
+ reg = <0x18000 0x0 0x0 0x0 0x0>;
+ };
+
+ co-processor@11,0 {
+ compatible = "pci8086,703.1",
+ "pci8086,703",
+ "pciclass0b4000",
+ "pciclass0b40";
+
+ reg = <0x18800 0x0 0x0 0x0 0x0>;
+ interrupts = <1 1>;
+ };
+
+ multimedia@12,0 {
+ compatible = "pci8086,704.0",
+ "pci8086,704",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x19000 0x0 0x0 0x0 0x0>;
+ };
+ };
+
+ isa@1f,0 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "isa";
+ ranges = <1 0 0 0 0 0x100>;
+
+ rtc@70 {
+ compatible = "intel,ce4100-rtc", "motorola,mc146818";
+ interrupts = <8 3>;
+ interrupt-parent = <&ioapic1>;
+ ctrl-reg = <2>;
+ freq-reg = <0x26>;
+ reg = <1 0x70 2>;
+ };
+ };
+ };
+ };
+};
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index ea6529e93c6f..5c0207bf959b 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -31,6 +31,7 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
#include <asm/io.h>
#include <asm/i8259.h>
#include <asm/intel_scu_ipc.h>
@@ -268,6 +269,7 @@ void __init x86_mrst_early_setup(void)
x86_platform.calibrate_tsc = mrst_calibrate_tsc;
x86_platform.i8042_detect = mrst_i8042_detect;
+ x86_init.timers.wallclock_init = mrst_rtc_init;
x86_init.pci.init = pci_mrst_init;
x86_init.pci.fixup_irqs = x86_init_noop;
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 32cd7edd71a0..04cf645feb92 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -100,22 +100,14 @@ int vrtc_set_mmss(unsigned long nowtime)
void __init mrst_rtc_init(void)
{
- unsigned long rtc_paddr;
- void __iomem *virt_base;
+ unsigned long vrtc_paddr = sfi_mrtc_array[0].phys_addr;
sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
- if (!sfi_mrtc_num)
+ if (!sfi_mrtc_num || !vrtc_paddr)
return;
- rtc_paddr = sfi_mrtc_array[0].phys_addr;
-
- /* vRTC's register address may not be page aligned */
- set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
-
- virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
- virt_base += rtc_paddr & ~PAGE_MASK;
- vrtc_virt_base = virt_base;
-
+ vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC,
+ vrtc_paddr);
x86_platform.get_wallclock = vrtc_get_time;
x86_platform.set_wallclock = vrtc_set_mmss;
}
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
index e797428b163b..c2a8cab65e5d 100644
--- a/arch/x86/platform/olpc/Makefile
+++ b/arch/x86/platform/olpc/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_OLPC) += olpc.o
obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
-obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
-obj-$(CONFIG_OLPC_OPENFIRMWARE_DT) += olpc_dt.o
+obj-$(CONFIG_OLPC) += olpc_ofw.o
+obj-$(CONFIG_OF_PROMTREE) += olpc_dt.o
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61ad574..13783a18c6f1 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1443,7 +1443,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
* early_ioremap fixmap slot, make sure it is RO.
*/
if (!is_early_ioremap_ptep(ptep) &&
- pfn >= e820_table_start && pfn < e820_table_end)
+ pfn >= pgt_buf_start && pfn < pgt_buf_end)
pte = pte_wrprotect(pte);
return pte;
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 1a5ff24e29c0..aaa7291c9259 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,9 +28,9 @@ ENTRY(startup_xen)
__FINIT
.pushsection .text
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
ENTRY(hypercall_page)
- .skip PAGE_SIZE_asm
+ .skip PAGE_SIZE
.popsection
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
index e39edf5c86f2..249619e7e7f2 100644
--- a/arch/xtensa/include/asm/rwsem.h
+++ b/arch/xtensa/include/asm/rwsem.h
@@ -17,44 +17,12 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -160,9 +128,4 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _XTENSA_RWSEM_H */
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 88ba96e2df25..db18807bb9e7 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -96,16 +96,12 @@ again:
update_process_times(user_mode(get_irq_regs()));
#endif
- write_seqlock(&xtime_lock);
-
- do_timer(1); /* Linux handler in kernel/timer.c */
+ xtime_update(1); /* Linux handler in kernel/time/timekeeping */
/* Note that writing CCOMPARE clears the interrupt. */
next += CCOUNT_PER_JIFFY;
set_linux_timer(next);
-
- write_sequnlock(&xtime_lock);
}
/* Allow platform to do something useful (Wdog). */