13 files changed, 521 insertions, 314 deletions
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index 90350f838f05..4a700f4b79ce 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -544,7 +544,8 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
 			     resp_len, ncpus, mask,
 			     DR_CPU_STAT_CONFIGURED);
 
-	mdesc_fill_in_cpu_data(*mask);
+	mdesc_populate_present_mask(mask);
+	mdesc_fill_in_cpu_data(mask);
 
 	for_each_cpu_mask(cpu, *mask) {
 		int err;
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index f0b4b516304f..6b4d8acc4c83 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -72,7 +72,7 @@ sun4e_notsup:
 	.align 4
 
 	/* The Sparc trap table, bootloader gives us control at _start. */
-	.section .text.head,"ax"
+	__HEAD
 	.globl	start, _stext, _start, __stext
 	.globl  trapbase
 _start:   /* danger danger */
@@ -735,7 +735,7 @@ go_to_highmem:
 		 nop
 
 /* The code above should be at beginning and we have to take care about
- * short jumps, as branching to .text.init section from .text is usually
+ * short jumps, as branching to .init.text section from .text is usually
  * impossible */
 		__INIT
 /* Acquire boot time privileged register values, this will help debugging.
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 3a1b7bf03cff..f8f21050448b 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -467,7 +467,7 @@ jump_to_sun4u_init:
 	jmpl    %g2 + %g0, %g0
 	 nop
 
-	.section	.text.init.refok
+	__REF
 sun4u_init:
 	BRANCH_IF_SUN4V(g1, sun4v_init)
 
@@ -641,28 +641,6 @@ tlb_fixup_done:
 	/* Not reached... */
 
 1:
-	/* If we boot on a non-zero cpu, all of the per-cpu
-	 * variable references we make before setting up the
-	 * per-cpu areas will use a bogus offset.  Put a
-	 * compensating factor into __per_cpu_base to handle
-	 * this cleanly.
-	 *
-	 * What the per-cpu code calculates is:
-	 *
-	 *	__per_cpu_base + (cpu << __per_cpu_shift)
-	 *
-	 * These two variables are zero initially, so to
-	 * make it all cancel out to zero we need to put
-	 * "0 - (cpu << 0)" into __per_cpu_base so that the
-	 * above formula evaluates to zero.
-	 *
-	 * We cannot even perform a printk() until this stuff
-	 * is setup as that calls cpu_clock() which uses
-	 * per-cpu variables.
-	 */
-	sub	%g0, %o0, %o1
-	sethi	%hi(__per_cpu_base), %o2
-	stx	%o1, [%o2 + %lo(__per_cpu_base)]
 #else
 	mov	0, %o0
 #endif
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 5deabe921a47..e5e78f9cfc95 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -318,10 +318,12 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 	}
 }
 
-static void sun4u_set_affinity(unsigned int virt_irq,
+static int sun4u_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
 	sun4u_irq_enable(virt_irq);
+
+	return 0;
 }
 
 /* Don't do anything.  The desc->status check for IRQ_DISABLED in
@@ -377,7 +379,7 @@ static void sun4v_irq_enable(unsigned int virt_irq)
 		       ino, err);
 }
 
-static void sun4v_set_affinity(unsigned int virt_irq,
+static int sun4v_set_affinity(unsigned int virt_irq,
 			       const struct cpumask *mask)
 {
 	unsigned int ino = virt_irq_table[virt_irq].dev_ino;
@@ -388,6 +390,8 @@ static void sun4v_set_affinity(unsigned int virt_irq,
 	if (err != HV_EOK)
 		printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
 		       "err(%d)\n", ino, cpuid, err);
+
+	return 0;
 }
 
 static void sun4v_irq_disable(unsigned int virt_irq)
@@ -445,7 +449,7 @@ static void sun4v_virq_enable(unsigned int virt_irq)
 		       dev_handle, dev_ino, err);
 }
 
-static void sun4v_virt_set_affinity(unsigned int virt_irq,
+static int sun4v_virt_set_affinity(unsigned int virt_irq,
 				    const struct cpumask *mask)
 {
 	unsigned long cpuid, dev_handle, dev_ino;
@@ -461,6 +465,8 @@ static void sun4v_virt_set_affinity(unsigned int virt_irq,
 		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
 		       "err(%d)\n",
 		       dev_handle, dev_ino, cpuid, err);
+
+	return 0;
 }
 
 static void sun4v_virq_disable(unsigned int virt_irq)
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index f0e6ed23a468..938da19dc065 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -574,7 +574,7 @@ static void __init report_platform_properties(void)
 	mdesc_release(hp);
 }
 
-static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
+static void __cpuinit fill_in_one_cache(cpuinfo_sparc *c,
 					struct mdesc_handle *hp,
 					u64 mp)
 {
@@ -619,8 +619,7 @@ static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
 	}
 }
 
-static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
-				    int core_id)
+static void __cpuinit mark_core_ids(struct mdesc_handle *hp, u64 mp, int core_id)
 {
 	u64 a;
 
@@ -653,7 +652,7 @@ static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
 	}
 }
 
-static void __devinit set_core_ids(struct mdesc_handle *hp)
+static void __cpuinit set_core_ids(struct mdesc_handle *hp)
 {
 	int idx;
 	u64 mp;
@@ -678,8 +677,7 @@ static void __devinit set_core_ids(struct mdesc_handle *hp)
 	}
 }
 
-static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
-				    int proc_id)
+static void __cpuinit mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
 {
 	u64 a;
 
@@ -698,8 +696,7 @@ static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
 	}
 }
 
-static void __devinit __set_proc_ids(struct mdesc_handle *hp,
-				     const char *exec_unit_name)
+static void __cpuinit __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name)
 {
 	int idx;
 	u64 mp;
@@ -720,13 +717,13 @@ static void __devinit __set_proc_ids(struct mdesc_handle *hp,
 	}
 }
 
-static void __devinit set_proc_ids(struct mdesc_handle *hp)
+static void __cpuinit set_proc_ids(struct mdesc_handle *hp)
 {
 	__set_proc_ids(hp, "exec_unit");
 	__set_proc_ids(hp, "exec-unit");
 }
 
-static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
+static void __cpuinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
 					 unsigned char def)
 {
 	u64 val;
@@ -745,7 +742,7 @@ use_default:
 	*mask = ((1U << def) * 64U) - 1U;
 }
 
-static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
+static void __cpuinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
 				     struct trap_per_cpu *tb)
 {
 	const u64 *val;
@@ -763,23 +760,15 @@ static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
 	get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
 }
 
-void __cpuinit mdesc_fill_in_cpu_data(cpumask_t mask)
+static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handle *, u64, int, void *), void *arg, cpumask_t *mask)
 {
 	struct mdesc_handle *hp = mdesc_grab();
+	void *ret = NULL;
 	u64 mp;
 
-	ncpus_probed = 0;
 	mdesc_for_each_node_by_name(hp, mp, "cpu") {
 		const u64 *id = mdesc_get_property(hp, mp, "id", NULL);
-		const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
-		struct trap_per_cpu *tb;
-		cpuinfo_sparc *c;
-		int cpuid;
-		u64 a;
-
-		ncpus_probed++;
-
-		cpuid = *id;
+		int cpuid = *id;
 
 #ifdef CONFIG_SMP
 		if (cpuid >= NR_CPUS) {
@@ -788,62 +777,104 @@ void __cpuinit mdesc_fill_in_cpu_data(cpumask_t mask)
 			       cpuid, NR_CPUS);
 			continue;
 		}
-		if (!cpu_isset(cpuid, mask))
+		if (!cpu_isset(cpuid, *mask))
 			continue;
-#else
-		/* On uniprocessor we only want the values for the
-		 * real physical cpu the kernel booted onto, however
-		 * cpu_data() only has one entry at index 0.
-		 */
-		if (cpuid != real_hard_smp_processor_id())
-			continue;
-		cpuid = 0;
 #endif
 
-		c = &cpu_data(cpuid);
-		c->clock_tick = *cfreq;
+		ret = func(hp, mp, cpuid, arg);
+		if (ret)
+			goto out;
+	}
+out:
+	mdesc_release(hp);
+	return ret;
+}
 
-		tb = &trap_block[cpuid];
-		get_mondo_data(hp, mp, tb);
+static void * __cpuinit record_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
+{
+	ncpus_probed++;
+#ifdef CONFIG_SMP
+	set_cpu_present(cpuid, true);
+#endif
+	return NULL;
+}
 
-		mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
-			u64 j, t = mdesc_arc_target(hp, a);
-			const char *t_name;
+void __cpuinit mdesc_populate_present_mask(cpumask_t *mask)
+{
+	if (tlb_type != hypervisor)
+		return;
 
-			t_name = mdesc_node_name(hp, t);
-			if (!strcmp(t_name, "cache")) {
-				fill_in_one_cache(c, hp, t);
-				continue;
-			}
+	ncpus_probed = 0;
+	mdesc_iterate_over_cpus(record_one_cpu, NULL, mask);
+}
 
-			mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
-				u64 n = mdesc_arc_target(hp, j);
-				const char *n_name;
+static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
+{
+	const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
+	struct trap_per_cpu *tb;
+	cpuinfo_sparc *c;
+	u64 a;
 
-				n_name = mdesc_node_name(hp, n);
-				if (!strcmp(n_name, "cache"))
-					fill_in_one_cache(c, hp, n);
-			}
+#ifndef CONFIG_SMP
+	/* On uniprocessor we only want the values for the
+	 * real physical cpu the kernel booted onto, however
+	 * cpu_data() only has one entry at index 0.
+	 */
+	if (cpuid != real_hard_smp_processor_id())
+		return NULL;
+	cpuid = 0;
+#endif
+
+	c = &cpu_data(cpuid);
+	c->clock_tick = *cfreq;
+
+	tb = &trap_block[cpuid];
+	get_mondo_data(hp, mp, tb);
+
+	mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+		u64 j, t = mdesc_arc_target(hp, a);
+		const char *t_name;
+
+		t_name = mdesc_node_name(hp, t);
+		if (!strcmp(t_name, "cache")) {
+			fill_in_one_cache(c, hp, t);
+			continue;
 		}
 
-#ifdef CONFIG_SMP
-		cpu_set(cpuid, cpu_present_map);
-#endif
+		mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
+			u64 n = mdesc_arc_target(hp, j);
+			const char *n_name;
 
-		c->core_id = 0;
-		c->proc_id = -1;
+			n_name = mdesc_node_name(hp, n);
+			if (!strcmp(n_name, "cache"))
+				fill_in_one_cache(c, hp, n);
+		}
 	}
 
+	c->core_id = 0;
+	c->proc_id = -1;
+
+	return NULL;
+}
+
+void __cpuinit mdesc_fill_in_cpu_data(cpumask_t *mask)
+{
+	struct mdesc_handle *hp;
+
+	mdesc_iterate_over_cpus(fill_in_one_cpu, NULL, mask);
+
 #ifdef CONFIG_SMP
 	sparc64_multi_core = 1;
 #endif
 
+	hp = mdesc_grab();
+
 	set_core_ids(hp);
 	set_proc_ids(hp);
 
-	smp_fill_in_sib_core_maps();
-
 	mdesc_release(hp);
+
+	smp_fill_in_sib_core_maps();
 }
 
 static ssize_t mdesc_read(struct file *file, char __user *buf,
@@ -887,7 +918,6 @@ void __init sun4v_mdesc_init(void)
 {
 	struct mdesc_handle *hp;
 	unsigned long len, real_len, status;
-	cpumask_t mask;
 
 	(void) sun4v_mach_desc(0UL, 0UL, &len);
 
@@ -911,7 +941,4 @@ void __init sun4v_mdesc_init(void)
 	cur_mdesc = hp;
 
 	report_platform_properties();
-
-	cpus_setall(mask);
-	mdesc_fill_in_cpu_data(mask);
 }
diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c
index 0a83bd737654..c8f14c1dc521 100644
--- a/arch/sparc/kernel/of_device_32.c
+++ b/arch/sparc/kernel/of_device_32.c
@@ -246,8 +246,25 @@ static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
 
 static int of_bus_sbus_match(struct device_node *np)
 {
-	return !strcmp(np->name, "sbus") ||
-		!strcmp(np->name, "sbi");
+	struct device_node *dp = np;
+
+	while (dp) {
+		if (!strcmp(dp->name, "sbus") ||
+		    !strcmp(dp->name, "sbi"))
+			return 1;
+
+		/* Have a look at use_1to1_mapping().  We're trying
+		 * to match SBUS if that's the top-level bus and we
+		 * don't have some intervening real bus that provides
+		 * ranges based translations.
+		 */
+		if (of_find_property(dp, "ranges", NULL) != NULL)
+			break;
+
+		dp = dp->parent;
+	}
+
+	return 0;
 }
 
 static void of_bus_sbus_count_cells(struct device_node *child,
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 27381f1baffc..5ac287ac03de 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -300,8 +300,25 @@ static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
 
 static int of_bus_sbus_match(struct device_node *np)
 {
-	return !strcmp(np->name, "sbus") ||
-		!strcmp(np->name, "sbi");
+	struct device_node *dp = np;
+
+	while (dp) {
+		if (!strcmp(dp->name, "sbus") ||
+		    !strcmp(dp->name, "sbi"))
+			return 1;
+
+		/* Have a look at use_1to1_mapping().  We're trying
+		 * to match SBUS if that's the top-level bus and we
+		 * don't have some intervening real bus that provides
+		 * ranges based translations.
+		 */
+		if (of_find_property(dp, "ranges", NULL) != NULL)
+			break;
+
+		dp = dp->parent;
+	}
+
+	return 0;
 }
 
 static void of_bus_sbus_count_cells(struct device_node *child,
diff --git a/arch/sparc/kernel/prom.h b/arch/sparc/kernel/prom.h
index bb0f0fda6cab..453397fe5e14 100644
--- a/arch/sparc/kernel/prom.h
+++ b/arch/sparc/kernel/prom.h
@@ -22,7 +22,6 @@ static inline int is_root_node(const struct device_node *dp)
 
 extern char *build_path_component(struct device_node *dp);
 extern void of_console_init(void);
-extern void of_fill_in_cpu_data(void);
 
 extern unsigned int prom_early_allocated;
 
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index ca55c7012f77..f65d62385cb4 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -374,75 +374,26 @@ static const char *get_mid_prop(void)
 	return (tlb_type == spitfire ? "upa-portid" : "portid");
 }
 
-struct device_node *of_find_node_by_cpuid(int cpuid)
-{
-	struct device_node *dp;
-	const char *mid_prop = get_mid_prop();
-
-	for_each_node_by_type(dp, "cpu") {
-		int id = of_getintprop_default(dp, mid_prop, -1);
-		const char *this_mid_prop = mid_prop;
-
-		if (id < 0) {
-			this_mid_prop = "cpuid";
-			id = of_getintprop_default(dp, this_mid_prop, -1);
-		}
-
-		if (id < 0) {
-			prom_printf("OF: Serious problem, cpu lacks "
-				    "%s property", this_mid_prop);
-			prom_halt();
-		}
-		if (cpuid == id)
-			return dp;
-	}
-	return NULL;
-}
-
-void __init of_fill_in_cpu_data(void)
+static void *of_iterate_over_cpus(void *(*func)(struct device_node *, int, void *), void *arg)
 {
 	struct device_node *dp;
 	const char *mid_prop;
 
-	if (tlb_type == hypervisor)
-		return;
-
 	mid_prop = get_mid_prop();
-	ncpus_probed = 0;
 	for_each_node_by_type(dp, "cpu") {
 		int cpuid = of_getintprop_default(dp, mid_prop, -1);
 		const char *this_mid_prop = mid_prop;
-		struct device_node *portid_parent;
-		int portid = -1;
+		void *ret;
 
-		portid_parent = NULL;
 		if (cpuid < 0) {
 			this_mid_prop = "cpuid";
 			cpuid = of_getintprop_default(dp, this_mid_prop, -1);
-			if (cpuid >= 0) {
-				int limit = 2;
-
-				portid_parent = dp;
-				while (limit--) {
-					portid_parent = portid_parent->parent;
-					if (!portid_parent)
-						break;
-					portid = of_getintprop_default(portid_parent,
-								       "portid", -1);
-					if (portid >= 0)
-						break;
-				}
-			}
 		}
-
 		if (cpuid < 0) {
 			prom_printf("OF: Serious problem, cpu lacks "
 				    "%s property", this_mid_prop);
 			prom_halt();
 		}
-
-		ncpus_probed++;
-
 #ifdef CONFIG_SMP
 		if (cpuid >= NR_CPUS) {
 			printk(KERN_WARNING "Ignoring CPU %d which is "
@@ -450,79 +401,144 @@ void __init of_fill_in_cpu_data(void)
 			       cpuid, NR_CPUS);
 			continue;
 		}
-#else
-		/* On uniprocessor we only want the values for the
-		 * real physical cpu the kernel booted onto, however
-		 * cpu_data() only has one entry at index 0.
-		 */
-		if (cpuid != real_hard_smp_processor_id())
-			continue;
-		cpuid = 0;
 #endif
+		ret = func(dp, cpuid, arg);
+		if (ret)
+			return ret;
+	}
+	return NULL;
+}
 
-		cpu_data(cpuid).clock_tick =
-			of_getintprop_default(dp, "clock-frequency", 0);
-
-		if (portid_parent) {
-			cpu_data(cpuid).dcache_size =
-				of_getintprop_default(dp, "l1-dcache-size",
-						      16 * 1024);
-			cpu_data(cpuid).dcache_line_size =
-				of_getintprop_default(dp, "l1-dcache-line-size",
-						      32);
-			cpu_data(cpuid).icache_size =
-				of_getintprop_default(dp, "l1-icache-size",
-						      8 * 1024);
-			cpu_data(cpuid).icache_line_size =
-				of_getintprop_default(dp, "l1-icache-line-size",
-						      32);
-			cpu_data(cpuid).ecache_size =
-				of_getintprop_default(dp, "l2-cache-size", 0);
-			cpu_data(cpuid).ecache_line_size =
-				of_getintprop_default(dp, "l2-cache-line-size", 0);
-			if (!cpu_data(cpuid).ecache_size ||
-			    !cpu_data(cpuid).ecache_line_size) {
-				cpu_data(cpuid).ecache_size =
-					of_getintprop_default(portid_parent,
-							      "l2-cache-size",
-							      (4 * 1024 * 1024));
-				cpu_data(cpuid).ecache_line_size =
-					of_getintprop_default(portid_parent,
-							      "l2-cache-line-size", 64);
-			}
-
-			cpu_data(cpuid).core_id = portid + 1;
-			cpu_data(cpuid).proc_id = portid;
+static void *check_cpu_node(struct device_node *dp, int cpuid, void *arg)
+{
+	int id = (int) (long) arg;
+
+	if (id == cpuid)
+		return dp;
+	return NULL;
+}
+
+struct device_node *of_find_node_by_cpuid(int cpuid)
+{
+	return of_iterate_over_cpus(check_cpu_node, (void *) (long) cpuid);
+}
+
+static void *record_one_cpu(struct device_node *dp, int cpuid, void *arg)
+{
+	ncpus_probed++;
 #ifdef CONFIG_SMP
-			sparc64_multi_core = 1;
+	set_cpu_present(cpuid, true);
+	set_cpu_possible(cpuid, true);
 #endif
-		} else {
-			cpu_data(cpuid).dcache_size =
-				of_getintprop_default(dp, "dcache-size", 16 * 1024);
-			cpu_data(cpuid).dcache_line_size =
-				of_getintprop_default(dp, "dcache-line-size", 32);
+	return NULL;
+}
+
+void __init of_populate_present_mask(void)
+{
+	if (tlb_type == hypervisor)
+		return;
+
+	ncpus_probed = 0;
+	of_iterate_over_cpus(record_one_cpu, NULL);
+}
 
-			cpu_data(cpuid).icache_size =
-				of_getintprop_default(dp, "icache-size", 16 * 1024);
-			cpu_data(cpuid).icache_line_size =
-				of_getintprop_default(dp, "icache-line-size", 32);
+static void *fill_in_one_cpu(struct device_node *dp, int cpuid, void *arg)
+{
+	struct device_node *portid_parent = NULL;
+	int portid = -1;
+
+	if (of_find_property(dp, "cpuid", NULL)) {
+		int limit = 2;
+
+		portid_parent = dp;
+		while (limit--) {
+			portid_parent = portid_parent->parent;
+			if (!portid_parent)
+				break;
+			portid = of_getintprop_default(portid_parent,
+						       "portid", -1);
+			if (portid >= 0)
+				break;
+		}
+	}
 
+#ifndef CONFIG_SMP
+	/* On uniprocessor we only want the values for the
+	 * real physical cpu the kernel booted onto, however
+	 * cpu_data() only has one entry at index 0.
+	 */
+	if (cpuid != real_hard_smp_processor_id())
+		return NULL;
+	cpuid = 0;
+#endif
+
+	cpu_data(cpuid).clock_tick =
+		of_getintprop_default(dp, "clock-frequency", 0);
+
+	if (portid_parent) {
+		cpu_data(cpuid).dcache_size =
+			of_getintprop_default(dp, "l1-dcache-size",
+					      16 * 1024);
+		cpu_data(cpuid).dcache_line_size =
+			of_getintprop_default(dp, "l1-dcache-line-size",
+					      32);
+		cpu_data(cpuid).icache_size =
+			of_getintprop_default(dp, "l1-icache-size",
+					      8 * 1024);
+		cpu_data(cpuid).icache_line_size =
+			of_getintprop_default(dp, "l1-icache-line-size",
+					      32);
+		cpu_data(cpuid).ecache_size =
+			of_getintprop_default(dp, "l2-cache-size", 0);
+		cpu_data(cpuid).ecache_line_size =
+			of_getintprop_default(dp, "l2-cache-line-size", 0);
+		if (!cpu_data(cpuid).ecache_size ||
+		    !cpu_data(cpuid).ecache_line_size) {
 			cpu_data(cpuid).ecache_size =
-				of_getintprop_default(dp, "ecache-size",
+				of_getintprop_default(portid_parent,
+						      "l2-cache-size",
 						      (4 * 1024 * 1024));
 			cpu_data(cpuid).ecache_line_size =
-				of_getintprop_default(dp, "ecache-line-size", 64);
-
-			cpu_data(cpuid).core_id = 0;
-			cpu_data(cpuid).proc_id = -1;
+				of_getintprop_default(portid_parent,
+						      "l2-cache-line-size", 64);
 		}
 
+		cpu_data(cpuid).core_id = portid + 1;
+		cpu_data(cpuid).proc_id = portid;
 #ifdef CONFIG_SMP
-		set_cpu_present(cpuid, true);
-		set_cpu_possible(cpuid, true);
+		sparc64_multi_core = 1;
 #endif
+	} else {
+		cpu_data(cpuid).dcache_size =
+			of_getintprop_default(dp, "dcache-size", 16 * 1024);
+		cpu_data(cpuid).dcache_line_size =
+			of_getintprop_default(dp, "dcache-line-size", 32);
+
+		cpu_data(cpuid).icache_size =
+			of_getintprop_default(dp, "icache-size", 16 * 1024);
+		cpu_data(cpuid).icache_line_size =
+			of_getintprop_default(dp, "icache-line-size", 32);
+
+		cpu_data(cpuid).ecache_size =
+			of_getintprop_default(dp, "ecache-size",
+					      (4 * 1024 * 1024));
+		cpu_data(cpuid).ecache_line_size =
+			of_getintprop_default(dp, "ecache-line-size", 64);
+
+		cpu_data(cpuid).core_id = 0;
+		cpu_data(cpuid).proc_id = -1;
 	}
 
+	return NULL;
+}
+
+void __init of_fill_in_cpu_data(void)
+{
+	if (tlb_type == hypervisor)
+		return;
+
+	of_iterate_over_cpus(fill_in_one_cpu, NULL);
+
 	smp_fill_in_sib_core_maps();
 }
 
diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
index ff7b591c8946..0fb5789d43c8 100644
--- a/arch/sparc/kernel/prom_common.c
+++ b/arch/sparc/kernel/prom_common.c
@@ -313,6 +313,4 @@ void __init prom_build_devicetree(void)
 
 	printk("PROM: Built device tree with %u bytes of memory.\n",
 	       prom_early_allocated);
-
-	of_fill_in_cpu_data();
 }
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index f7642e5a94db..1de47d2169c8 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -20,7 +20,8 @@
 #include <linux/cache.h>
 #include <linux/jiffies.h>
 #include <linux/profile.h>
-#include <linux/lmb.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
 #include <linux/cpu.h>
 
 #include <asm/head.h>
@@ -278,7 +279,7 @@ static unsigned long kimage_addr_to_ra(void *p)
 	return kern_base + (val - KERNBASE);
 }
 
-static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
+static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, void **descrp)
 {
 	extern unsigned long sparc64_ttable_tl0;
 	extern unsigned long kern_locked_tte_data;
@@ -298,12 +299,12 @@ static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread
 		       "hvtramp_descr.\n");
 		return;
 	}
+	*descrp = hdesc;
 
 	hdesc->cpu = cpu;
 	hdesc->num_mappings = num_kernel_image_mappings;
 
 	tb = &trap_block[cpu];
-	tb->hdesc = hdesc;
 
 	hdesc->fault_info_va = (unsigned long) &tb->fault_info;
 	hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
@@ -341,12 +342,12 @@ static struct thread_info *cpu_new_thread = NULL;
 
 static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
 {
-	struct trap_per_cpu *tb = &trap_block[cpu];
 	unsigned long entry =
 		(unsigned long)(&sparc64_cpu_startup);
 	unsigned long cookie =
 		(unsigned long)(&cpu_new_thread);
 	struct task_struct *p;
+	void *descr = NULL;
 	int timeout, ret;
 
 	p = fork_idle(cpu);
@@ -359,7 +360,8 @@ static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
 #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
 		if (ldom_domaining_enabled)
 			ldom_startcpu_cpuid(cpu,
-					    (unsigned long) cpu_new_thread);
+					    (unsigned long) cpu_new_thread,
+					    &descr);
 		else
 #endif
 			prom_startcpu_cpuid(cpu, entry, cookie);
@@ -383,10 +385,7 @@ static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
 	}
 	cpu_new_thread = NULL;
 
-	if (tb->hdesc) {
-		kfree(tb->hdesc);
-		tb->hdesc = NULL;
-	}
+	kfree(descr);
 
 	return ret;
 }
@@ -1373,36 +1372,171 @@ void smp_send_stop(void)
 {
 }
 
-unsigned long __per_cpu_base __read_mostly;
-unsigned long __per_cpu_shift __read_mostly;
+/**
+ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
+ * @cpu: cpu to allocate for
+ * @size: size allocation in bytes
+ * @align: alignment
+ *
+ * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
+ * does the right thing for NUMA regardless of the current
+ * configuration.
+ *
+ * RETURNS:
+ * Pointer to the allocated area on success, NULL on failure.
+ */
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
+					unsigned long align)
+{
+	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int node = cpu_to_node(cpu);
+	void *ptr;
+
+	if (!node_online(node) || !NODE_DATA(node)) {
+		ptr = __alloc_bootmem(size, align, goal);
+		pr_info("cpu %d has no node %d or node-local memory\n",
+			cpu, node);
+		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
+			 cpu, size, __pa(ptr));
+	} else {
+		ptr = __alloc_bootmem_node(NODE_DATA(node),
+					   size, align, goal);
+		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
+			 "%016lx\n", cpu, size, node, __pa(ptr));
+	}
+	return ptr;
+#else
+	return __alloc_bootmem(size, align, goal);
+#endif
+}
+
+static size_t pcpur_size __initdata;
+static void **pcpur_ptrs __initdata;
 
-EXPORT_SYMBOL(__per_cpu_base);
-EXPORT_SYMBOL(__per_cpu_shift);
+static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
+{
+	size_t off = (size_t)pageno << PAGE_SHIFT;
+
+	if (off >= pcpur_size)
+		return NULL;
+
+	return virt_to_page(pcpur_ptrs[cpu] + off);
+}
+
+#define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL)
 
-void __init real_setup_per_cpu_areas(void)
+static void __init pcpu_map_range(unsigned long start, unsigned long end,
+				  struct page *page)
 {
-	unsigned long paddr, goal, size, i;
-	char *ptr;
+	unsigned long pfn = page_to_pfn(page);
+	unsigned long pte_base;
+
+	BUG_ON((pfn<<PAGE_SHIFT)&(PCPU_CHUNK_SIZE - 1UL));
 
-	/* Copy section for each CPU (we discard the original) */
-	goal = PERCPU_ENOUGH_ROOM;
+	pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
+		    _PAGE_CP_4U | _PAGE_CV_4U |
+		    _PAGE_P_4U | _PAGE_W_4U);
+	if (tlb_type == hypervisor)
+		pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
+			    _PAGE_CP_4V | _PAGE_CV_4V |
+			    _PAGE_P_4V | _PAGE_W_4V);
+
+	while (start < end) {
+		pgd_t *pgd = pgd_offset_k(start);
+		unsigned long this_end;
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		pud = pud_offset(pgd, start);
+		if (pud_none(*pud)) {
+			pmd_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			pud_populate(&init_mm, pud, new);
+		}
+
+		pmd = pmd_offset(pud, start);
+		if (!pmd_present(*pmd)) {
+			pte_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			pmd_populate_kernel(&init_mm, pmd, new);
+		}
 
-	__per_cpu_shift = PAGE_SHIFT;
-	for (size = PAGE_SIZE; size < goal; size <<= 1UL)
-		__per_cpu_shift++;
+		pte = pte_offset_kernel(pmd, start);
+		this_end = (start + PMD_SIZE) & PMD_MASK;
+		if (this_end > end)
+			this_end = end;
 
-	paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
-	if (!paddr) {
-		prom_printf("Cannot allocate per-cpu memory.\n");
-		prom_halt();
+		while (start < this_end) {
+			unsigned long paddr = pfn << PAGE_SHIFT;
+
+			pte_val(*pte) = (paddr | pte_base);
+
+			start += PAGE_SIZE;
+			pte++;
+			pfn++;
+		}
 	}
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start;
+	static struct vm_struct vm;
+	unsigned long delta, cpu;
+	size_t pcpu_unit_size;
+	size_t ptrs_size;
 
-	ptr = __va(paddr);
-	__per_cpu_base = ptr - __per_cpu_start;
+	pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+			       PERCPU_DYNAMIC_RESERVE);
+	dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE;
 
-	for (i = 0; i < NR_CPUS; i++, ptr += size)
-		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+
+	ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
+	pcpur_ptrs = alloc_bootmem(ptrs_size);
+
+	for_each_possible_cpu(cpu) {
+		pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
+						     PCPU_CHUNK_SIZE);
+
+		free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
+			     PCPU_CHUNK_SIZE - pcpur_size);
+
+		memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
+	}
+
+	/* allocate address and map */
+	vm.flags = VM_ALLOC;
+	vm.size = num_possible_cpus() * PCPU_CHUNK_SIZE;
+	vm_area_register_early(&vm, PCPU_CHUNK_SIZE);
+
+	for_each_possible_cpu(cpu) {
+		unsigned long start = (unsigned long) vm.addr;
+		unsigned long end;
+
+		start += cpu * PCPU_CHUNK_SIZE;
+		end = start + PCPU_CHUNK_SIZE;
+		pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu]));
+	}
+
+	pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size,
+						PERCPU_MODULE_RESERVE, dyn_size,
+						PCPU_CHUNK_SIZE, vm.addr, NULL);
+
+	free_bootmem(__pa(pcpur_ptrs), ptrs_size);
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu) {
+		__per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
+	}
 
 	/* Setup %g5 for the boot cpu.  */
 	__local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
+
+	of_fill_in_cpu_data();
+	if (tlb_type == hypervisor)
+		mdesc_fill_in_cpu_data(CPU_MASK_ALL_PTR);
 }
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index d809c4ebb48f..10f7bb9fc140 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2509,6 +2509,7 @@ void do_getpsr(struct pt_regs *regs)
 }
 
 struct trap_per_cpu trap_block[NR_CPUS];
+EXPORT_SYMBOL(trap_block);
 
 /* This can get invoked before sched_init() so play it super safe
  * and use hard_smp_processor_id().
@@ -2530,84 +2531,97 @@ extern void tsb_config_offsets_are_bolixed_dave(void);
 void __init trap_init(void)
 {
 	/* Compile time sanity check. */
-	if (TI_TASK != offsetof(struct thread_info, task) ||
-	    TI_FLAGS != offsetof(struct thread_info, flags) ||
-	    TI_CPU != offsetof(struct thread_info, cpu) ||
-	    TI_FPSAVED != offsetof(struct thread_info, fpsaved) ||
-	    TI_KSP != offsetof(struct thread_info, ksp) ||
-	    TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) ||
-	    TI_KREGS != offsetof(struct thread_info, kregs) ||
-	    TI_UTRAPS != offsetof(struct thread_info, utraps) ||
-	    TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) ||
-	    TI_REG_WINDOW != offsetof(struct thread_info, reg_window) ||
-	    TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) ||
-	    TI_GSR != offsetof(struct thread_info, gsr) ||
-	    TI_XFSR != offsetof(struct thread_info, xfsr) ||
-	    TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) ||
-	    TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) ||
-	    TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) ||
-	    TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) ||
-	    TI_PCR != offsetof(struct thread_info, pcr_reg) ||
-	    TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) ||
-	    TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
-	    TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) ||
-	    TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) ||
-	    TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) ||
-	    TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) ||
-	    TI_FPREGS != offsetof(struct thread_info, fpregs) ||
-	    (TI_FPREGS & (64 - 1)))
-		thread_info_offsets_are_bolixed_dave();
-
-	if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) ||
-	    (TRAP_PER_CPU_PGD_PADDR !=
-	     offsetof(struct trap_per_cpu, pgd_paddr)) ||
-	    (TRAP_PER_CPU_CPU_MONDO_PA !=
-	     offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
-	    (TRAP_PER_CPU_DEV_MONDO_PA !=
-	     offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
-	    (TRAP_PER_CPU_RESUM_MONDO_PA !=
-	     offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
-	    (TRAP_PER_CPU_RESUM_KBUF_PA !=
-	     offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
-	    (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
-	     offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
-	    (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
-	     offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
-	    (TRAP_PER_CPU_FAULT_INFO !=
-	     offsetof(struct trap_per_cpu, fault_info)) ||
-	    (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
-	     offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
-	    (TRAP_PER_CPU_CPU_LIST_PA !=
-	     offsetof(struct trap_per_cpu, cpu_list_pa)) ||
-	    (TRAP_PER_CPU_TSB_HUGE !=
-	     offsetof(struct trap_per_cpu, tsb_huge)) ||
-	    (TRAP_PER_CPU_TSB_HUGE_TEMP !=
-	     offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
-	    (TRAP_PER_CPU_IRQ_WORKLIST_PA !=
-	     offsetof(struct trap_per_cpu, irq_worklist_pa)) ||
-	    (TRAP_PER_CPU_CPU_MONDO_QMASK !=
-	     offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
-	    (TRAP_PER_CPU_DEV_MONDO_QMASK !=
-	     offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
-	    (TRAP_PER_CPU_RESUM_QMASK !=
-	     offsetof(struct trap_per_cpu, resum_qmask)) ||
-	    (TRAP_PER_CPU_NONRESUM_QMASK !=
-	     offsetof(struct trap_per_cpu, nonresum_qmask)))
-		trap_per_cpu_offsets_are_bolixed_dave();
-
-	if ((TSB_CONFIG_TSB !=
-	     offsetof(struct tsb_config, tsb)) ||
-	    (TSB_CONFIG_RSS_LIMIT !=
-	     offsetof(struct tsb_config, tsb_rss_limit)) ||
-	    (TSB_CONFIG_NENTRIES !=
-	     offsetof(struct tsb_config, tsb_nentries)) ||
-	    (TSB_CONFIG_REG_VAL !=
-	     offsetof(struct tsb_config, tsb_reg_val)) ||
-	    (TSB_CONFIG_MAP_VADDR !=
-	     offsetof(struct tsb_config, tsb_map_vaddr)) ||
-	    (TSB_CONFIG_MAP_PTE !=
-	     offsetof(struct tsb_config, tsb_map_pte)))
-		tsb_config_offsets_are_bolixed_dave();
+	BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task) ||
+		     TI_FLAGS != offsetof(struct thread_info, flags) ||
+		     TI_CPU != offsetof(struct thread_info, cpu) ||
+		     TI_FPSAVED != offsetof(struct thread_info, fpsaved) ||
+		     TI_KSP != offsetof(struct thread_info, ksp) ||
+		     TI_FAULT_ADDR != offsetof(struct thread_info,
+					       fault_address) ||
+		     TI_KREGS != offsetof(struct thread_info, kregs) ||
+		     TI_UTRAPS != offsetof(struct thread_info, utraps) ||
+		     TI_EXEC_DOMAIN != offsetof(struct thread_info,
+						exec_domain) ||
+		     TI_REG_WINDOW != offsetof(struct thread_info,
+					       reg_window) ||
+		     TI_RWIN_SPTRS != offsetof(struct thread_info,
+					       rwbuf_stkptrs) ||
+		     TI_GSR != offsetof(struct thread_info, gsr) ||
+		     TI_XFSR != offsetof(struct thread_info, xfsr) ||
+		     TI_USER_CNTD0 != offsetof(struct thread_info,
+					       user_cntd0) ||
+		     TI_USER_CNTD1 != offsetof(struct thread_info,
+					       user_cntd1) ||
+		     TI_KERN_CNTD0 != offsetof(struct thread_info,
+					       kernel_cntd0) ||
+		     TI_KERN_CNTD1 != offsetof(struct thread_info,
+					       kernel_cntd1) ||
+		     TI_PCR != offsetof(struct thread_info, pcr_reg) ||
+		     TI_PRE_COUNT != offsetof(struct thread_info,
+					      preempt_count) ||
+		     TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
+		     TI_SYS_NOERROR != offsetof(struct thread_info,
+						syscall_noerror) ||
+		     TI_RESTART_BLOCK != offsetof(struct thread_info,
+						  restart_block) ||
+		     TI_KUNA_REGS != offsetof(struct thread_info,
+					      kern_una_regs) ||
+		     TI_KUNA_INSN != offsetof(struct thread_info,
+					      kern_una_insn) ||
+		     TI_FPREGS != offsetof(struct thread_info, fpregs) ||
+		     (TI_FPREGS & (64 - 1)));
+
+	BUILD_BUG_ON(TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu,
+						     thread) ||
+		     (TRAP_PER_CPU_PGD_PADDR !=
+		      offsetof(struct trap_per_cpu, pgd_paddr)) ||
+		     (TRAP_PER_CPU_CPU_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
+		     (TRAP_PER_CPU_DEV_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
+		     (TRAP_PER_CPU_RESUM_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
+		     (TRAP_PER_CPU_RESUM_KBUF_PA !=
+		      offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
+		     (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
+		     (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
+		      offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
+		     (TRAP_PER_CPU_FAULT_INFO !=
+		      offsetof(struct trap_per_cpu, fault_info)) ||
+		     (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
+		      offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
+		     (TRAP_PER_CPU_CPU_LIST_PA !=
+		      offsetof(struct trap_per_cpu, cpu_list_pa)) ||
+		     (TRAP_PER_CPU_TSB_HUGE !=
+		      offsetof(struct trap_per_cpu, tsb_huge)) ||
+		     (TRAP_PER_CPU_TSB_HUGE_TEMP !=
+		      offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
+		     (TRAP_PER_CPU_IRQ_WORKLIST_PA !=
+		      offsetof(struct trap_per_cpu, irq_worklist_pa)) ||
+		     (TRAP_PER_CPU_CPU_MONDO_QMASK !=
+		      offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
+		     (TRAP_PER_CPU_DEV_MONDO_QMASK !=
+		      offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
+		     (TRAP_PER_CPU_RESUM_QMASK !=
+		      offsetof(struct trap_per_cpu, resum_qmask)) ||
+		     (TRAP_PER_CPU_NONRESUM_QMASK !=
+		      offsetof(struct trap_per_cpu, nonresum_qmask)) ||
+		     (TRAP_PER_CPU_PER_CPU_BASE !=
+		      offsetof(struct trap_per_cpu, __per_cpu_base)));
+
+	BUILD_BUG_ON((TSB_CONFIG_TSB !=
+		      offsetof(struct tsb_config, tsb)) ||
+		     (TSB_CONFIG_RSS_LIMIT !=
+		      offsetof(struct tsb_config, tsb_rss_limit)) ||
+		     (TSB_CONFIG_NENTRIES !=
+		      offsetof(struct tsb_config, tsb_nentries)) ||
+		     (TSB_CONFIG_REG_VAL !=
+		      offsetof(struct tsb_config, tsb_reg_val)) ||
+		     (TSB_CONFIG_MAP_VADDR !=
+		      offsetof(struct tsb_config, tsb_map_vaddr)) ||
+		     (TSB_CONFIG_MAP_PTE !=
+		      offsetof(struct tsb_config, tsb_map_pte)));
 
 	/* Attach to the address space of init_task.  On SMP we
 	 * do this in smp.c:smp_callin for other cpus.
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 76267085b13b..fcbbd000ec08 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -41,7 +41,7 @@ SECTIONS
 	.text TEXTSTART :
 	{
 		_text = .;
-		*(.text.head)
+		HEAD_TEXT
 		TEXT_TEXT
 		SCHED_TEXT
 		LOCK_TEXT