15 files changed, 560 insertions, 425 deletions
diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile
index d97c803719ec..0e48bc61c272 100644
--- a/arch/sh/kernel/cpu/Makefile
+++ b/arch/sh/kernel/cpu/Makefile
@@ -17,5 +17,7 @@ obj-$(CONFIG_ARCH_SHMOBILE)	+= shmobile/
 
 obj-$(CONFIG_SH_ADC)		+= adc.o
 obj-$(CONFIG_SH_CLK_CPG)	+= clock-cpg.o
+obj-$(CONFIG_SH_FPU)		+= fpu.o
+obj-$(CONFIG_SH_FPU_EMU)	+= fpu.o
 
 obj-y	+= irq/ init.o clock.o hwblk.o
diff --git a/arch/sh/kernel/cpu/clock-cpg.c b/arch/sh/kernel/cpu/clock-cpg.c
index 6dfe2cced3fc..2827abb5d2ab 100644
--- a/arch/sh/kernel/cpu/clock-cpg.c
+++ b/arch/sh/kernel/cpu/clock-cpg.c
@@ -160,13 +160,81 @@ static unsigned long sh_clk_div4_recalc(struct clk *clk)
 	return clk->freq_table[idx].frequency;
 }
 
+static int sh_clk_div4_set_parent(struct clk *clk, struct clk *parent)
+{
+	struct clk_div_mult_table *table = clk->priv;
+	u32 value;
+	int ret;
+
+	if (!strcmp("pll_clk", parent->name))
+		value = __raw_readl(clk->enable_reg) & ~(1 << 7);
+	else
+		value = __raw_readl(clk->enable_reg) | (1 << 7);
+
+	ret = clk_reparent(clk, parent);
+	if (ret < 0)
+		return ret;
+
+	__raw_writel(value, clk->enable_reg);
+
+	/* Rebiuld the frequency table */
+	clk_rate_table_build(clk, clk->freq_table, table->nr_divisors,
+			     table, &clk->arch_flags);
+
+	return 0;
+}
+
+static int sh_clk_div4_set_rate(struct clk *clk, unsigned long rate, int algo_id)
+{
+	unsigned long value;
+	int idx = clk_rate_table_find(clk, clk->freq_table, rate);
+	if (idx < 0)
+		return idx;
+
+	value = __raw_readl(clk->enable_reg);
+	value &= ~0xf;
+	value |= idx;
+	__raw_writel(value, clk->enable_reg);
+
+	return 0;
+}
+
+static int sh_clk_div4_enable(struct clk *clk)
+{
+	__raw_writel(__raw_readl(clk->enable_reg) & ~(1 << 8), clk->enable_reg);
+	return 0;
+}
+
+static void sh_clk_div4_disable(struct clk *clk)
+{
+	__raw_writel(__raw_readl(clk->enable_reg) | (1 << 8), clk->enable_reg);
+}
+
 static struct clk_ops sh_clk_div4_clk_ops = {
 	.recalc		= sh_clk_div4_recalc,
+	.set_rate	= sh_clk_div4_set_rate,
 	.round_rate	= sh_clk_div_round_rate,
 };
 
-int __init sh_clk_div4_register(struct clk *clks, int nr,
-				struct clk_div_mult_table *table)
+static struct clk_ops sh_clk_div4_enable_clk_ops = {
+	.recalc		= sh_clk_div4_recalc,
+	.set_rate	= sh_clk_div4_set_rate,
+	.round_rate	= sh_clk_div_round_rate,
+	.enable		= sh_clk_div4_enable,
+	.disable	= sh_clk_div4_disable,
+};
+
+static struct clk_ops sh_clk_div4_reparent_clk_ops = {
+	.recalc		= sh_clk_div4_recalc,
+	.set_rate	= sh_clk_div4_set_rate,
+	.round_rate	= sh_clk_div_round_rate,
+	.enable		= sh_clk_div4_enable,
+	.disable	= sh_clk_div4_disable,
+	.set_parent	= sh_clk_div4_set_parent,
+};
+
+static int __init sh_clk_div4_register_ops(struct clk *clks, int nr,
+			struct clk_div_mult_table *table, struct clk_ops *ops)
 {
 	struct clk *clkp;
 	void *freq_table;
@@ -185,7 +253,7 @@ int __init sh_clk_div4_register(struct clk *clks, int nr,
 	for (k = 0; !ret && (k < nr); k++) {
 		clkp = clks + k;
 
-		clkp->ops = &sh_clk_div4_clk_ops;
+		clkp->ops = ops;
 		clkp->id = -1;
 		clkp->priv = table;
 
@@ -198,6 +266,26 @@ int __init sh_clk_div4_register(struct clk *clks, int nr,
 	return ret;
 }
 
+int __init sh_clk_div4_register(struct clk *clks, int nr,
+				struct clk_div_mult_table *table)
+{
+	return sh_clk_div4_register_ops(clks, nr, table, &sh_clk_div4_clk_ops);
+}
+
+int __init sh_clk_div4_enable_register(struct clk *clks, int nr,
+				struct clk_div_mult_table *table)
+{
+	return sh_clk_div4_register_ops(clks, nr, table,
+					&sh_clk_div4_enable_clk_ops);
+}
+
+int __init sh_clk_div4_reparent_register(struct clk *clks, int nr,
+				struct clk_div_mult_table *table)
+{
+	return sh_clk_div4_register_ops(clks, nr, table,
+					&sh_clk_div4_reparent_clk_ops);
+}
+
 #ifdef CONFIG_SH_CLK_CPG_LEGACY
 static struct clk master_clk = {
 	.name		= "master_clk",
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
new file mode 100644
index 000000000000..f059ed62cf57
--- /dev/null
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -0,0 +1,84 @@
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/fpu.h>
+
+int init_fpu(struct task_struct *tsk)
+{
+	if (tsk_used_math(tsk)) {
+		if ((boot_cpu_data.flags & CPU_HAS_FPU) && tsk == current)
+			unlazy_fpu(tsk, task_pt_regs(tsk));
+		return 0;
+	}
+
+	/*
+	 * Memory allocation at the first usage of the FPU and other state.
+	 */
+	if (!tsk->thread.xstate) {
+		tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
+						      GFP_KERNEL);
+		if (!tsk->thread.xstate)
+			return -ENOMEM;
+	}
+
+	if (boot_cpu_data.flags & CPU_HAS_FPU) {
+		struct sh_fpu_hard_struct *fp = &tsk->thread.xstate->hardfpu;
+		memset(fp, 0, xstate_size);
+		fp->fpscr = FPSCR_INIT;
+	} else {
+		struct sh_fpu_soft_struct *fp = &tsk->thread.xstate->softfpu;
+		memset(fp, 0, xstate_size);
+		fp->fpscr = FPSCR_INIT;
+	}
+
+	set_stopped_child_used_math(tsk);
+	return 0;
+}
+
+#ifdef CONFIG_SH_FPU
+void __fpu_state_restore(void)
+{
+	struct task_struct *tsk = current;
+
+	restore_fpu(tsk);
+
+	task_thread_info(tsk)->status |= TS_USEDFPU;
+	tsk->fpu_counter++;
+}
+
+void fpu_state_restore(struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+
+	if (unlikely(!user_mode(regs))) {
+		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
+		BUG();
+		return;
+	}
+
+	if (!tsk_used_math(tsk)) {
+		local_irq_enable();
+		/*
+		 * does a slab alloc which can sleep
+		 */
+		if (init_fpu(tsk)) {
+			/*
+			 * ran out of memory!
+			 */
+			do_group_exit(SIGKILL);
+			return;
+		}
+		local_irq_disable();
+	}
+
+	grab_fpu(regs);
+
+	__fpu_state_restore();
+}
+
+BUILD_TRAP_HANDLER(fpu_state_restore)
+{
+	TRAP_HANDLER_DECL;
+
+	fpu_state_restore(regs);
+}
+#endif /* CONFIG_SH_FPU */
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index 89b4b76c0d76..a5bb0550bbf3 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -24,22 +24,31 @@
 #include <asm/elf.h>
 #include <asm/io.h>
 #include <asm/smp.h>
-#ifdef CONFIG_SUPERH32
-#include <asm/ubc.h>
+
+#ifdef CONFIG_SH_FPU
+#define cpu_has_fpu	1
+#else
+#define cpu_has_fpu	0
+#endif
+
+#ifdef CONFIG_SH_DSP
+#define cpu_has_dsp	1
+#else
+#define cpu_has_dsp	0
 #endif
 
 /*
  * Generic wrapper for command line arguments to disable on-chip
  * peripherals (nofpu, nodsp, and so forth).
  */
-#define onchip_setup(x)				\
-static int x##_disabled __initdata = 0;		\
-						\
-static int __init x##_setup(char *opts)		\
-{						\
-	x##_disabled = 1;			\
-	return 1;				\
-}						\
+#define onchip_setup(x)					\
+static int x##_disabled __initdata = !cpu_has_##x;	\
+							\
+static int __init x##_setup(char *opts)			\
+{							\
+	x##_disabled = 1;				\
+	return 1;					\
+}							\
 __setup("no" __stringify(x), x##_setup);
 
 onchip_setup(fpu);
@@ -207,6 +216,18 @@ static void detect_cache_shape(void)
 		l2_cache_shape = -1; /* No S-cache */
 }
 
+static void __init fpu_init(void)
+{
+	/* Disable the FPU */
+	if (fpu_disabled && (current_cpu_data.flags & CPU_HAS_FPU)) {
+		printk("FPU Disabled\n");
+		current_cpu_data.flags &= ~CPU_HAS_FPU;
+	}
+
+	disable_fpu();
+	clear_used_math();
+}
+
 #ifdef CONFIG_SH_DSP
 static void __init release_dsp(void)
 {
@@ -244,28 +265,35 @@ static void __init dsp_init(void)
 	if (sr & SR_DSP)
 		current_cpu_data.flags |= CPU_HAS_DSP;
 
+	/* Disable the DSP */
+	if (dsp_disabled && (current_cpu_data.flags & CPU_HAS_DSP)) {
+		printk("DSP Disabled\n");
+		current_cpu_data.flags &= ~CPU_HAS_DSP;
+	}
+
 	/* Now that we've determined the DSP status, clear the DSP bit. */
 	release_dsp();
 }
+#else
+static inline void __init dsp_init(void) { }
 #endif /* CONFIG_SH_DSP */
 
 /**
  * sh_cpu_init
  *
- * This is our initial entry point for each CPU, and is invoked on the boot
- * CPU prior to calling start_kernel(). For SMP, a combination of this and
- * start_secondary() will bring up each processor to a ready state prior
- * to hand forking the idle loop.
+ * This is our initial entry point for each CPU, and is invoked on the
+ * boot CPU prior to calling start_kernel(). For SMP, a combination of
+ * this and start_secondary() will bring up each processor to a ready
+ * state prior to hand forking the idle loop.
  *
- * We do all of the basic processor init here, including setting up the
- * caches, FPU, DSP, kicking the UBC, etc. By the time start_kernel() is
- * hit (and subsequently platform_setup()) things like determining the
- * CPU subtype and initial configuration will all be done.
+ * We do all of the basic processor init here, including setting up
+ * the caches, FPU, DSP, etc. By the time start_kernel() is hit (and
+ * subsequently platform_setup()) things like determining the CPU
+ * subtype and initial configuration will all be done.
  *
  * Each processor family is still responsible for doing its own probing
  * and cache configuration in detect_cpu_and_cache_system().
  */
-
 asmlinkage void __init sh_cpu_init(void)
 {
 	current_thread_info()->cpu = hard_smp_processor_id();
@@ -302,18 +330,8 @@ asmlinkage void __init sh_cpu_init(void)
 		detect_cache_shape();
 	}
 
-	/* Disable the FPU */
-	if (fpu_disabled) {
-		printk("FPU Disabled\n");
-		current_cpu_data.flags &= ~CPU_HAS_FPU;
-	}
-
-	/* FPU initialization */
-	disable_fpu();
-	if ((current_cpu_data.flags & CPU_HAS_FPU)) {
-		current_thread_info()->status &= ~TS_USEDFPU;
-		clear_used_math();
-	}
+	fpu_init();
+	dsp_init();
 
 	/*
 	 * Initialize the per-CPU ASID cache very early, since the
@@ -321,18 +339,12 @@ asmlinkage void __init sh_cpu_init(void)
 	 */
 	current_cpu_data.asid_cache = NO_CONTEXT;
 
-#ifdef CONFIG_SH_DSP
-	/* Probe for DSP */
-	dsp_init();
-
-	/* Disable the DSP */
-	if (dsp_disabled) {
-		printk("DSP Disabled\n");
-		current_cpu_data.flags &= ~CPU_HAS_DSP;
-		release_dsp();
-	}
-#endif
-
 	speculative_execution_init();
 	expmask_init();
+
+	/*
+	 * Boot processor to setup the FP and extended state context info.
+	 */
+	if (raw_smp_processor_id() == 0)
+		init_thread_xstate();
 }
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
index d395ce5740e7..488d24e0cdf0 100644
--- a/arch/sh/kernel/cpu/sh2a/fpu.c
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -26,8 +26,7 @@
 /*
  * Save FPU registers onto task structure.
  */
-void
-save_fpu(struct task_struct *tsk)
+void save_fpu(struct task_struct *tsk)
 {
 	unsigned long dummy;
 
@@ -52,7 +51,7 @@ save_fpu(struct task_struct *tsk)
 		     "fmov.s	fr0, @-%0\n\t"
 		     "lds	%3, fpscr\n\t"
 		     : "=r" (dummy)
-		     : "0" ((char *)(&tsk->thread.fpu.hard.status)),
+		     : "0" ((char *)(&tsk->thread.xstate->hardfpu.status)),
 		       "r" (FPSCR_RCHG),
 		       "r" (FPSCR_INIT)
 		     : "memory");
@@ -60,8 +59,7 @@ save_fpu(struct task_struct *tsk)
 	disable_fpu();
 }
 
-static void
-restore_fpu(struct task_struct *tsk)
+void restore_fpu(struct task_struct *tsk)
 {
 	unsigned long dummy;
 
@@ -85,45 +83,12 @@ restore_fpu(struct task_struct *tsk)
 		     "lds.l	@%0+, fpscr\n\t"
 		     "lds.l	@%0+, fpul\n\t"
 		     : "=r" (dummy)
-		     : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
+		     : "0" (tsk->thread.xstate), "r" (FPSCR_RCHG)
 		     : "memory");
 	disable_fpu();
 }
 
 /*
- * Load the FPU with signalling NANS.  This bit pattern we're using
- * has the property that no matter wether considered as single or as
- * double precission represents signaling NANS.
- */
-
-static void
-fpu_init(void)
-{
-	enable_fpu();
-	asm volatile("lds	%0, fpul\n\t"
-		     "fsts	fpul, fr0\n\t"
-		     "fsts	fpul, fr1\n\t"
-		     "fsts	fpul, fr2\n\t"
-		     "fsts	fpul, fr3\n\t"
-		     "fsts	fpul, fr4\n\t"
-		     "fsts	fpul, fr5\n\t"
-		     "fsts	fpul, fr6\n\t"
-		     "fsts	fpul, fr7\n\t"
-		     "fsts	fpul, fr8\n\t"
-		     "fsts	fpul, fr9\n\t"
-		     "fsts	fpul, fr10\n\t"
-		     "fsts	fpul, fr11\n\t"
-		     "fsts	fpul, fr12\n\t"
-		     "fsts	fpul, fr13\n\t"
-		     "fsts	fpul, fr14\n\t"
-		     "fsts	fpul, fr15\n\t"
-		     "lds	%2, fpscr\n\t"
-		     : /* no output */
-		     : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
-	disable_fpu();
-}
-
-/*
  *	Emulate arithmetic ops on denormalized number for some FPU insns.
  */
 
@@ -490,9 +455,9 @@ ieee_fpe_handler (struct pt_regs *regs)
 	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
 		struct task_struct *tsk = current;
 
-		if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
+		if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_FPU_ERROR)) {
 			/* FPU error */
-			denormal_to_double (&tsk->thread.fpu.hard,
+			denormal_to_double (&tsk->thread.xstate->hardfpu,
 					    (finsn >> 8) & 0xf);
 		} else
 			return 0;
@@ -507,9 +472,9 @@ ieee_fpe_handler (struct pt_regs *regs)
 
 		n = (finsn >> 8) & 0xf;
 		m = (finsn >> 4) & 0xf;
-		hx = tsk->thread.fpu.hard.fp_regs[n];
-		hy = tsk->thread.fpu.hard.fp_regs[m];
-		fpscr = tsk->thread.fpu.hard.fpscr;
+		hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+		hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+		fpscr = tsk->thread.xstate->hardfpu.fpscr;
 		prec = fpscr & (1 << 19);
 
 		if ((fpscr & FPSCR_FPU_ERROR)
@@ -519,15 +484,15 @@ ieee_fpe_handler (struct pt_regs *regs)
 
 			/* FPU error because of denormal */
 			llx = ((long long) hx << 32)
-			       | tsk->thread.fpu.hard.fp_regs[n+1];
+			       | tsk->thread.xstate->hardfpu.fp_regs[n+1];
 			lly = ((long long) hy << 32)
-			       | tsk->thread.fpu.hard.fp_regs[m+1];
+			       | tsk->thread.xstate->hardfpu.fp_regs[m+1];
 			if ((hx & 0x7fffffff) >= 0x00100000)
 				llx = denormal_muld(lly, llx);
 			else
 				llx = denormal_muld(llx, lly);
-			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
-			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+			tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff;
 		} else if ((fpscr & FPSCR_FPU_ERROR)
 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
 				   || (hy & 0x7fffffff) < 0x00800000))) {
@@ -536,7 +501,7 @@ ieee_fpe_handler (struct pt_regs *regs)
 				hx = denormal_mulf(hy, hx);
 			else
 				hx = denormal_mulf(hx, hy);
-			tsk->thread.fpu.hard.fp_regs[n] = hx;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
 		} else
 			return 0;
 
@@ -550,9 +515,9 @@ ieee_fpe_handler (struct pt_regs *regs)
 
 		n = (finsn >> 8) & 0xf;
 		m = (finsn >> 4) & 0xf;
-		hx = tsk->thread.fpu.hard.fp_regs[n];
-		hy = tsk->thread.fpu.hard.fp_regs[m];
-		fpscr = tsk->thread.fpu.hard.fpscr;
+		hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+		hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+		fpscr = tsk->thread.xstate->hardfpu.fpscr;
 		prec = fpscr & (1 << 19);
 
 		if ((fpscr & FPSCR_FPU_ERROR)
@@ -562,15 +527,15 @@ ieee_fpe_handler (struct pt_regs *regs)
 
 			/* FPU error because of denormal */
 			llx = ((long long) hx << 32)
-			       | tsk->thread.fpu.hard.fp_regs[n+1];
+			       | tsk->thread.xstate->hardfpu.fp_regs[n+1];
 			lly = ((long long) hy << 32)
-			       | tsk->thread.fpu.hard.fp_regs[m+1];
+			       | tsk->thread.xstate->hardfpu.fp_regs[m+1];
 			if ((finsn & 0xf00f) == 0xf000)
 				llx = denormal_addd(llx, lly);
 			else
 				llx = denormal_addd(llx, lly ^ (1LL << 63));
-			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
-			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+			tsk->thread.xstate->hardfpu.fp_regs[n+1] = llx & 0xffffffff;
 		} else if ((fpscr & FPSCR_FPU_ERROR)
 		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
 				   || (hy & 0x7fffffff) < 0x00800000))) {
@@ -579,7 +544,7 @@ ieee_fpe_handler (struct pt_regs *regs)
 				hx = denormal_addf(hx, hy);
 			else
 				hx = denormal_addf(hx, hy ^ 0x80000000);
-			tsk->thread.fpu.hard.fp_regs[n] = hx;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
 		} else
 			return 0;
 
@@ -597,7 +562,7 @@ BUILD_TRAP_HANDLER(fpu_error)
 
 	__unlazy_fpu(tsk, regs);
 	if (ieee_fpe_handler(regs)) {
-		tsk->thread.fpu.hard.fpscr &=
+		tsk->thread.xstate->hardfpu.fpscr &=
 			~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
 		grab_fpu(regs);
 		restore_fpu(tsk);
@@ -607,33 +572,3 @@ BUILD_TRAP_HANDLER(fpu_error)
 
 	force_sig(SIGFPE, tsk);
 }
-
-void fpu_state_restore(struct pt_regs *regs)
-{
-	struct task_struct *tsk = current;
-
-	grab_fpu(regs);
-	if (unlikely(!user_mode(regs))) {
-		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
-		BUG();
-		return;
-	}
-
-	if (likely(used_math())) {
-		/* Using the FPU again.  */
-		restore_fpu(tsk);
-	} else	{
-		/* First time FPU user.  */
-		fpu_init();
-		set_used_math();
-	}
-	task_thread_info(tsk)->status |= TS_USEDFPU;
-	tsk->fpu_counter++;
-}
-
-BUILD_TRAP_HANDLER(fpu_state_restore)
-{
-	TRAP_HANDLER_DECL;
-
-	fpu_state_restore(regs);
-}
diff --git a/arch/sh/kernel/cpu/sh3/ex.S b/arch/sh/kernel/cpu/sh3/ex.S
index 46610c35c232..99b4d020179a 100644
--- a/arch/sh/kernel/cpu/sh3/ex.S
+++ b/arch/sh/kernel/cpu/sh3/ex.S
@@ -49,7 +49,7 @@ ENTRY(exception_handling_table)
 	.long	exception_error	! reserved_instruction (filled by trap_init) /* 180 */
 	.long	exception_error	! illegal_slot_instruction (filled by trap_init) /*1A0*/
 	.long	nmi_trap_handler	/* 1C0 */	! Allow trap to debugger
-	.long	break_point_trap	/* 1E0 */
+	.long	breakpoint_trap_handler	/* 1E0 */
 
 	/*
 	 * Pad the remainder of the table out, exceptions residing in far
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index e97857aec8a0..447482d7f65e 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -85,14 +85,14 @@ void save_fpu(struct task_struct *tsk)
 		      "fmov.s	fr1, @-%0\n\t"
 		      "fmov.s	fr0, @-%0\n\t"
 		      "lds	%3, fpscr\n\t":"=r" (dummy)
-		      :"0"((char *)(&tsk->thread.fpu.hard.status)),
+		      :"0"((char *)(&tsk->thread.xstate->hardfpu.status)),
 		      "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
 		      :"memory");
 
 	disable_fpu();
 }
 
-static void restore_fpu(struct task_struct *tsk)
+void restore_fpu(struct task_struct *tsk)
 {
 	unsigned long dummy;
 
@@ -135,62 +135,11 @@ static void restore_fpu(struct task_struct *tsk)
 		      "lds.l	@%0+, fpscr\n\t"
 		      "lds.l	@%0+, fpul\n\t"
 		      :"=r" (dummy)
-		      :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
+		      :"0" (tsk->thread.xstate), "r" (FPSCR_RCHG)
 		      :"memory");
 	disable_fpu();
 }
 
-/*
- * Load the FPU with signalling NANS.  This bit pattern we're using
- * has the property that no matter wether considered as single or as
- * double precision represents signaling NANS.
- */
-
-static void fpu_init(void)
-{
-	enable_fpu();
-	asm volatile (	"lds	%0, fpul\n\t"
-			"lds	%1, fpscr\n\t"
-			"fsts	fpul, fr0\n\t"
-			"fsts	fpul, fr1\n\t"
-			"fsts	fpul, fr2\n\t"
-			"fsts	fpul, fr3\n\t"
-			"fsts	fpul, fr4\n\t"
-			"fsts	fpul, fr5\n\t"
-			"fsts	fpul, fr6\n\t"
-			"fsts	fpul, fr7\n\t"
-			"fsts	fpul, fr8\n\t"
-			"fsts	fpul, fr9\n\t"
-			"fsts	fpul, fr10\n\t"
-			"fsts	fpul, fr11\n\t"
-			"fsts	fpul, fr12\n\t"
-			"fsts	fpul, fr13\n\t"
-			"fsts	fpul, fr14\n\t"
-			"fsts	fpul, fr15\n\t"
-			"frchg\n\t"
-			"fsts	fpul, fr0\n\t"
-			"fsts	fpul, fr1\n\t"
-			"fsts	fpul, fr2\n\t"
-			"fsts	fpul, fr3\n\t"
-			"fsts	fpul, fr4\n\t"
-			"fsts	fpul, fr5\n\t"
-			"fsts	fpul, fr6\n\t"
-			"fsts	fpul, fr7\n\t"
-			"fsts	fpul, fr8\n\t"
-			"fsts	fpul, fr9\n\t"
-			"fsts	fpul, fr10\n\t"
-			"fsts	fpul, fr11\n\t"
-			"fsts	fpul, fr12\n\t"
-			"fsts	fpul, fr13\n\t"
-			"fsts	fpul, fr14\n\t"
-			"fsts	fpul, fr15\n\t"
-			"frchg\n\t"
-			"lds	%2, fpscr\n\t"
-			:	/* no output */
-			:"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
-	disable_fpu();
-}
-
 /**
  *      denormal_to_double - Given denormalized float number,
  *                           store double float
@@ -282,9 +231,9 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 		/* fcnvsd */
 		struct task_struct *tsk = current;
 
-		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
+		if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR))
 			/* FPU error */
-			denormal_to_double(&tsk->thread.fpu.hard,
+			denormal_to_double(&tsk->thread.xstate->hardfpu,
 					   (finsn >> 8) & 0xf);
 		else
 			return 0;
@@ -300,9 +249,9 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 
 		n = (finsn >> 8) & 0xf;
 		m = (finsn >> 4) & 0xf;
-		hx = tsk->thread.fpu.hard.fp_regs[n];
-		hy = tsk->thread.fpu.hard.fp_regs[m];
-		fpscr = tsk->thread.fpu.hard.fpscr;
+		hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+		hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+		fpscr = tsk->thread.xstate->hardfpu.fpscr;
 		prec = fpscr & FPSCR_DBL_PRECISION;
 
 		if ((fpscr & FPSCR_CAUSE_ERROR)
@@ -312,18 +261,18 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 
 			/* FPU error because of denormal (doubles) */
 			llx = ((long long)hx << 32)
-			    | tsk->thread.fpu.hard.fp_regs[n + 1];
+			    | tsk->thread.xstate->hardfpu.fp_regs[n + 1];
 			lly = ((long long)hy << 32)
-			    | tsk->thread.fpu.hard.fp_regs[m + 1];
+			    | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
 			llx = float64_mul(llx, lly);
-			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
-			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+			tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff;
 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
 					 || (hy & 0x7fffffff) < 0x00800000))) {
 			/* FPU error because of denormal (floats) */
 			hx = float32_mul(hx, hy);
-			tsk->thread.fpu.hard.fp_regs[n] = hx;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
 		} else
 			return 0;
 
@@ -338,9 +287,9 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 
 		n = (finsn >> 8) & 0xf;
 		m = (finsn >> 4) & 0xf;
-		hx = tsk->thread.fpu.hard.fp_regs[n];
-		hy = tsk->thread.fpu.hard.fp_regs[m];
-		fpscr = tsk->thread.fpu.hard.fpscr;
+		hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+		hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+		fpscr = tsk->thread.xstate->hardfpu.fpscr;
 		prec = fpscr & FPSCR_DBL_PRECISION;
 
 		if ((fpscr & FPSCR_CAUSE_ERROR)
@@ -350,15 +299,15 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 
 			/* FPU error because of denormal (doubles) */
 			llx = ((long long)hx << 32)
-			    | tsk->thread.fpu.hard.fp_regs[n + 1];
+			    | tsk->thread.xstate->hardfpu.fp_regs[n + 1];
 			lly = ((long long)hy << 32)
-			    | tsk->thread.fpu.hard.fp_regs[m + 1];
+			    | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
 			if ((finsn & 0xf00f) == 0xf000)
 				llx = float64_add(llx, lly);
 			else
 				llx = float64_sub(llx, lly);
-			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
-			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+			tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff;
 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
 					 || (hy & 0x7fffffff) < 0x00800000))) {
@@ -367,7 +316,7 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 				hx = float32_add(hx, hy);
 			else
 				hx = float32_sub(hx, hy);
-			tsk->thread.fpu.hard.fp_regs[n] = hx;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
 		} else
 			return 0;
 
@@ -382,9 +331,9 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 
 		n = (finsn >> 8) & 0xf;
 		m = (finsn >> 4) & 0xf;
-		hx = tsk->thread.fpu.hard.fp_regs[n];
-		hy = tsk->thread.fpu.hard.fp_regs[m];
-		fpscr = tsk->thread.fpu.hard.fpscr;
+		hx = tsk->thread.xstate->hardfpu.fp_regs[n];
+		hy = tsk->thread.xstate->hardfpu.fp_regs[m];
+		fpscr = tsk->thread.xstate->hardfpu.fpscr;
 		prec = fpscr & FPSCR_DBL_PRECISION;
 
 		if ((fpscr & FPSCR_CAUSE_ERROR)
@@ -394,20 +343,20 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 
 			/* FPU error because of denormal (doubles) */
 			llx = ((long long)hx << 32)
-			    | tsk->thread.fpu.hard.fp_regs[n + 1];
+			    | tsk->thread.xstate->hardfpu.fp_regs[n + 1];
 			lly = ((long long)hy << 32)
-			    | tsk->thread.fpu.hard.fp_regs[m + 1];
+			    | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
 
 			llx = float64_div(llx, lly);
 
-			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
-			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = llx >> 32;
+			tsk->thread.xstate->hardfpu.fp_regs[n + 1] = llx & 0xffffffff;
 		} else if ((fpscr & FPSCR_CAUSE_ERROR)
 			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
 					 || (hy & 0x7fffffff) < 0x00800000))) {
 			/* FPU error because of denormal (floats) */
 			hx = float32_div(hx, hy);
-			tsk->thread.fpu.hard.fp_regs[n] = hx;
+			tsk->thread.xstate->hardfpu.fp_regs[n] = hx;
 		} else
 			return 0;
 
@@ -420,17 +369,17 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 		unsigned int hx;
 
 		m = (finsn >> 8) & 0x7;
-		hx = tsk->thread.fpu.hard.fp_regs[m];
+		hx = tsk->thread.xstate->hardfpu.fp_regs[m];
 
-		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
+		if ((tsk->thread.xstate->hardfpu.fpscr & FPSCR_CAUSE_ERROR)
 			&& ((hx & 0x7fffffff) < 0x00100000)) {
 			/* subnormal double to float conversion */
 			long long llx;
 
-			llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
-			    | tsk->thread.fpu.hard.fp_regs[m + 1];
+			llx = ((long long)tsk->thread.xstate->hardfpu.fp_regs[m] << 32)
+			    | tsk->thread.xstate->hardfpu.fp_regs[m + 1];
 
-			tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
+			tsk->thread.xstate->hardfpu.fpul = float64_to_float32(llx);
 		} else
 			return 0;
 
@@ -449,7 +398,7 @@ void float_raise(unsigned int flags)
 int float_rounding_mode(void)
 {
 	struct task_struct *tsk = current;
-	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
+	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.xstate->hardfpu.fpscr);
 	return roundingMode;
 }
 
@@ -461,16 +410,16 @@ BUILD_TRAP_HANDLER(fpu_error)
 	__unlazy_fpu(tsk, regs);
 	fpu_exception_flags = 0;
 	if (ieee_fpe_handler(regs)) {
-		tsk->thread.fpu.hard.fpscr &=
+		tsk->thread.xstate->hardfpu.fpscr &=
 		    ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
-		tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
+		tsk->thread.xstate->hardfpu.fpscr |= fpu_exception_flags;
 		/* Set the FPSCR flag as well as cause bits - simply
 		 * replicate the cause */
-		tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
+		tsk->thread.xstate->hardfpu.fpscr |= (fpu_exception_flags >> 10);
 		grab_fpu(regs);
 		restore_fpu(tsk);
 		task_thread_info(tsk)->status |= TS_USEDFPU;
-		if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
+		if ((((tsk->thread.xstate->hardfpu.fpscr & FPSCR_ENABLE_MASK) >> 7) &
 		     (fpu_exception_flags >> 2)) == 0) {
 			return;
 		}
@@ -478,33 +427,3 @@ BUILD_TRAP_HANDLER(fpu_error)
 
 	force_sig(SIGFPE, tsk);
 }
-
-void fpu_state_restore(struct pt_regs *regs)
-{
-	struct task_struct *tsk = current;
-
-	grab_fpu(regs);
-	if (unlikely(!user_mode(regs))) {
-		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
-		BUG();
-		return;
-	}
-
-	if (likely(used_math())) {
-		/* Using the FPU again.  */
-		restore_fpu(tsk);
-	} else {
-		/* First time FPU user.  */
-		fpu_init();
-		set_used_math();
-	}
-	task_thread_info(tsk)->status |= TS_USEDFPU;
-	tsk->fpu_counter++;
-}
-
-BUILD_TRAP_HANDLER(fpu_state_restore)
-{
-	TRAP_HANDLER_DECL;
-
-	fpu_state_restore(regs);
-}
diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c
index d36f0c45f55f..cc02b3145cca 100644
--- a/arch/sh/kernel/cpu/sh4/probe.c
+++ b/arch/sh/kernel/cpu/sh4/probe.c
@@ -71,11 +71,11 @@ int __init detect_cpu_and_cache_system(void)
 		boot_cpu_data.dcache.ways = 4;
 	} else {
 		/* And some SH-4 defaults.. */
-		boot_cpu_data.flags |= CPU_HAS_PTEA;
+		boot_cpu_data.flags |= CPU_HAS_PTEA | CPU_HAS_FPU;
 		boot_cpu_data.family = CPU_FAMILY_SH4;
 	}
 
-	/* FPU detection works for everyone */
+	/* FPU detection works for almost everyone */
 	if ((cvr & 0x20000000))
 		boot_cpu_data.flags |= CPU_HAS_FPU;
 
@@ -124,6 +124,7 @@ int __init detect_cpu_and_cache_system(void)
 		boot_cpu_data.type = CPU_SH7785;
 		break;
 	case 0x4004:
+	case 0x4005:
 		boot_cpu_data.type = CPU_SH7786;
 		boot_cpu_data.flags |= CPU_HAS_PTEAEX | CPU_HAS_L2_CACHE;
 		break;
@@ -160,6 +161,7 @@ int __init detect_cpu_and_cache_system(void)
 		break;
 	case 0x700:
 		boot_cpu_data.type = CPU_SH4_501;
+		boot_cpu_data.flags &= ~CPU_HAS_FPU;
 		boot_cpu_data.icache.ways = 2;
 		boot_cpu_data.dcache.ways = 2;
 		break;
@@ -227,7 +229,7 @@ int __init detect_cpu_and_cache_system(void)
 			 * Size calculation is much more sensible
 			 * than it is for the L1.
 			 *
-			 * Sizes are 128KB, 258KB, 512KB, and 1MB.
+			 * Sizes are 128KB, 256KB, 512KB, and 1MB.
 			 */
 			size = (cvr & 0xf) << 17;
 
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 33bab477d2e2..b144e8af89dc 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -41,7 +41,8 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7757)	:= pinmux-sh7757.o
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7785)	:= pinmux-sh7785.o
 pinmux-$(CONFIG_CPU_SUBTYPE_SH7786)	:= pinmux-sh7786.o
 
-obj-y				+= $(clock-y)
-obj-$(CONFIG_SMP)		+= $(smp-y)
-obj-$(CONFIG_GENERIC_GPIO)	+= $(pinmux-y)
-obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o
+obj-y					+= $(clock-y)
+obj-$(CONFIG_SMP)			+= $(smp-y)
+obj-$(CONFIG_GENERIC_GPIO)		+= $(pinmux-y)
+obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= ubc.o
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
index ea38b554dc05..860ee2bf4bf0 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c
@@ -117,12 +117,11 @@ static struct clk_div_mult_table div4_table = {
 	.nr_multipliers = ARRAY_SIZE(multipliers),
 };
 
-enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P,
-       DIV4_SIUA, DIV4_SIUB, DIV4_IRDA, DIV4_NR };
-
 #define DIV4(_str, _reg, _bit, _mask, _flags) \
   SH_CLK_DIV4(_str, &pll_clk, _reg, _bit, _mask, _flags)
 
+enum { DIV4_I, DIV4_U, DIV4_SH, DIV4_B, DIV4_B3, DIV4_P, DIV4_NR };
+
 struct clk div4_clks[DIV4_NR] = {
 	[DIV4_I] = DIV4("cpu_clk", FRQCR, 20, 0x1fef, CLK_ENABLE_ON_INIT),
 	[DIV4_U] = DIV4("umem_clk", FRQCR, 16, 0x1fff, CLK_ENABLE_ON_INIT),
@@ -130,9 +129,19 @@ struct clk div4_clks[DIV4_NR] = {
 	[DIV4_B] = DIV4("bus_clk", FRQCR, 8, 0x1fff, CLK_ENABLE_ON_INIT),
 	[DIV4_B3] = DIV4("b3_clk", FRQCR, 4, 0x1fff, CLK_ENABLE_ON_INIT),
 	[DIV4_P] = DIV4("peripheral_clk", FRQCR, 0, 0x1fff, 0),
+};
+
+enum { DIV4_IRDA, DIV4_ENABLE_NR };
+
+struct clk div4_enable_clks[DIV4_ENABLE_NR] = {
+	[DIV4_IRDA] = DIV4("irda_clk", IRDACLKCR, 0, 0x1fff, 0),
+};
+
+enum { DIV4_SIUA, DIV4_SIUB, DIV4_REPARENT_NR };
+
+struct clk div4_reparent_clks[DIV4_REPARENT_NR] = {
 	[DIV4_SIUA] = DIV4("siua_clk", SCLKACR, 0, 0x1fff, 0),
 	[DIV4_SIUB] = DIV4("siub_clk", SCLKBCR, 0, 0x1fff, 0),
-	[DIV4_IRDA] = DIV4("irda_clk", IRDACLKCR, 0, 0x1fff, 0),
 };
 
 struct clk div6_clks[] = {
@@ -189,6 +198,14 @@ int __init arch_clk_init(void)
 		ret = sh_clk_div4_register(div4_clks, DIV4_NR, &div4_table);
 
 	if (!ret)
+		ret = sh_clk_div4_enable_register(div4_enable_clks,
+					DIV4_ENABLE_NR, &div4_table);
+
+	if (!ret)
+		ret = sh_clk_div4_reparent_register(div4_reparent_clks,
+					DIV4_REPARENT_NR, &div4_table);
+
+	if (!ret)
 		ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks));
 
 	if (!ret)
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
index a0e8869071ac..494c636012bb 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7786.c
@@ -3,11 +3,7 @@
  *
  * SH7786 support for the clock framework
  *
- * Copyright (C) 2008, 2009  Renesas Solutions Corp.
- * Kuninori Morimoto <morimoto.kuninori@renesas.com>
- *
- * Based on SH7785
- *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2010  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -15,127 +11,123 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/io.h>
 #include <asm/clock.h>
 #include <asm/freq.h>
-#include <asm/io.h>
-
-static int ifc_divisors[] = { 1, 2, 4, 1 };
-static int sfc_divisors[] = { 1, 1, 4, 1 };
-static int bfc_divisors[] = { 1, 1, 1, 1, 1, 12, 16, 1,
-			     24, 32, 1, 1, 1, 1, 1, 1 };
-static int mfc_divisors[] = { 1, 1, 4, 1 };
-static int pfc_divisors[] = { 1, 1, 1, 1, 1, 1, 16, 1,
-			      24, 32, 1, 48, 1, 1, 1, 1 };
-
-static void master_clk_init(struct clk *clk)
-{
-	clk->rate *= pfc_divisors[ctrl_inl(FRQMR1) & 0x000f];
-}
 
-static struct clk_ops sh7786_master_clk_ops = {
-	.init		= master_clk_init,
+/*
+ * Default rate for the root input clock, reset this with clk_set_rate()
+ * from the platform code.
+ */
+static struct clk extal_clk = {
+	.name		= "extal",
+	.id		= -1,
+	.rate		= 33333333,
 };
 
-static unsigned long module_clk_recalc(struct clk *clk)
+static unsigned long pll_recalc(struct clk *clk)
 {
-	int idx = (ctrl_inl(FRQMR1) & 0x000f);
-	return clk->parent->rate / pfc_divisors[idx];
-}
+	int multiplier;
 
-static struct clk_ops sh7786_module_clk_ops = {
-	.recalc		= module_clk_recalc,
-};
+	/*
+	 * Clock modes 0, 1, and 2 use an x64 multiplier against PLL1,
+	 * while modes 3, 4, and 5 use an x32.
+	 */
+	multiplier = (sh_mv.mv_mode_pins() & 0xf) < 3 ? 64 : 32;
 
-static unsigned long bus_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 16) & 0x000f);
-	return clk->parent->rate / bfc_divisors[idx];
+	return clk->parent->rate * multiplier;
 }
 
-static struct clk_ops sh7786_bus_clk_ops = {
-	.recalc		= bus_clk_recalc,
+static struct clk_ops pll_clk_ops = {
+	.recalc		= pll_recalc,
 };
 
-static unsigned long cpu_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 28) & 0x0003);
-	return clk->parent->rate / ifc_divisors[idx];
-}
-
-static struct clk_ops sh7786_cpu_clk_ops = {
-	.recalc		= cpu_clk_recalc,
+static struct clk pll_clk = {
+	.name		= "pll_clk",
+	.id		= -1,
+	.ops		= &pll_clk_ops,
+	.parent		= &extal_clk,
+	.flags		= CLK_ENABLE_ON_INIT,
 };
 
-static struct clk_ops *sh7786_clk_ops[] = {
-	&sh7786_master_clk_ops,
-	&sh7786_module_clk_ops,
-	&sh7786_bus_clk_ops,
-	&sh7786_cpu_clk_ops,
+static struct clk *clks[] = {
+	&extal_clk,
+	&pll_clk,
 };
 
-void __init arch_init_clk_ops(struct clk_ops **ops, int idx)
-{
-	if (idx < ARRAY_SIZE(sh7786_clk_ops))
-		*ops = sh7786_clk_ops[idx];
-}
-
-static unsigned long shyway_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 20) & 0x0003);
-	return clk->parent->rate / sfc_divisors[idx];
-}
+static unsigned int div2[] = { 1, 2, 4, 6, 8, 12, 16, 18,
+			       24, 32, 36, 48 };
 
-static struct clk_ops sh7786_shyway_clk_ops = {
-	.recalc		= shyway_clk_recalc,
+static struct clk_div_mult_table div4_table = {
+	.divisors = div2,
+	.nr_divisors = ARRAY_SIZE(div2),
 };
 
-static struct clk sh7786_shyway_clk = {
-	.name		= "shyway_clk",
-	.flags		= CLK_ENABLE_ON_INIT,
-	.ops		= &sh7786_shyway_clk_ops,
-};
+enum { DIV4_I, DIV4_SH, DIV4_B, DIV4_DDR, DIV4_DU, DIV4_P, DIV4_NR };
 
-static unsigned long ddr_clk_recalc(struct clk *clk)
-{
-	int idx = ((ctrl_inl(FRQMR1) >> 12) & 0x0003);
-	return clk->parent->rate / mfc_divisors[idx];
-}
+#define DIV4(_str, _bit, _mask, _flags) \
+  SH_CLK_DIV4(_str, &pll_clk, FRQMR1, _bit, _mask, _flags)
 
-static struct clk_ops sh7786_ddr_clk_ops = {
-	.recalc		= ddr_clk_recalc,
+struct clk div4_clks[DIV4_NR] = {
+	[DIV4_P] = DIV4("peripheral_clk", 0, 0x0b40, 0),
+	[DIV4_DU] = DIV4("du_clk", 4, 0x0010, 0),
+	[DIV4_DDR] = DIV4("ddr_clk", 12, 0x0002, CLK_ENABLE_ON_INIT),
+	[DIV4_B] = DIV4("bus_clk", 16, 0x0360, CLK_ENABLE_ON_INIT),
+	[DIV4_SH] = DIV4("shyway_clk", 20, 0x0002, CLK_ENABLE_ON_INIT),
+	[DIV4_I] = DIV4("cpu_clk", 28, 0x0006, CLK_ENABLE_ON_INIT),
 };
 
-static struct clk sh7786_ddr_clk = {
-	.name		= "ddr_clk",
-	.flags		= CLK_ENABLE_ON_INIT,
-	.ops		= &sh7786_ddr_clk_ops,
-};
-
-/*
- * Additional SH7786-specific on-chip clocks that aren't already part of the
- * clock framework
- */
-static struct clk *sh7786_onchip_clocks[] = {
-	&sh7786_shyway_clk,
-	&sh7786_ddr_clk,
+#define MSTPCR0		0xffc40030
+#define MSTPCR1		0xffc40034
+
+static struct clk mstp_clks[] = {
+	/* MSTPCR0 */
+	SH_CLK_MSTP32("scif_fck", 5, &div4_clks[DIV4_P], MSTPCR0, 29, 0),
+	SH_CLK_MSTP32("scif_fck", 4, &div4_clks[DIV4_P], MSTPCR0, 28, 0),
+	SH_CLK_MSTP32("scif_fck", 3, &div4_clks[DIV4_P], MSTPCR0, 27, 0),
+	SH_CLK_MSTP32("scif_fck", 2, &div4_clks[DIV4_P], MSTPCR0, 26, 0),
+	SH_CLK_MSTP32("scif_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 25, 0),
+	SH_CLK_MSTP32("scif_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 24, 0),
+	SH_CLK_MSTP32("ssi_fck", 3, &div4_clks[DIV4_P], MSTPCR0, 23, 0),
+	SH_CLK_MSTP32("ssi_fck", 2, &div4_clks[DIV4_P], MSTPCR0, 22, 0),
+	SH_CLK_MSTP32("ssi_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 21, 0),
+	SH_CLK_MSTP32("ssi_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 20, 0),
+	SH_CLK_MSTP32("hac_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 17, 0),
+	SH_CLK_MSTP32("hac_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 16, 0),
+	SH_CLK_MSTP32("i2c_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 15, 0),
+	SH_CLK_MSTP32("i2c_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 14, 0),
+	SH_CLK_MSTP32("tmu9_11_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 11, 0),
+	SH_CLK_MSTP32("tmu678_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 10, 0),
+	SH_CLK_MSTP32("tmu345_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 9, 0),
+	SH_CLK_MSTP32("tmu012_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 8, 0),
+	SH_CLK_MSTP32("sdif_fck", 1, &div4_clks[DIV4_P], MSTPCR0, 5, 0),
+	SH_CLK_MSTP32("sdif_fck", 0, &div4_clks[DIV4_P], MSTPCR0, 4, 0),
+	SH_CLK_MSTP32("hspi_fck", -1, &div4_clks[DIV4_P], MSTPCR0, 2, 0),
+
+	/* MSTPCR1 */
+	SH_CLK_MSTP32("usb_fck", -1, NULL, MSTPCR1, 12, 0),
+	SH_CLK_MSTP32("pcie_fck", 2, NULL, MSTPCR1, 10, 0),
+	SH_CLK_MSTP32("pcie_fck", 1, NULL, MSTPCR1, 9, 0),
+	SH_CLK_MSTP32("pcie_fck", 0, NULL, MSTPCR1, 8, 0),
+	SH_CLK_MSTP32("dmac_11_6_fck", -1, NULL, MSTPCR1, 5, 0),
+	SH_CLK_MSTP32("dmac_5_0_fck", -1, NULL, MSTPCR1, 4, 0),
+	SH_CLK_MSTP32("du_fck", -1, NULL, MSTPCR1, 3, 0),
+	SH_CLK_MSTP32("ether_fck", -1, NULL, MSTPCR1, 2, 0),
 };
 
 int __init arch_clk_init(void)
 {
-	struct clk *clk;
 	int i, ret = 0;
 
-	cpg_clk_init();
-
-	clk = clk_get(NULL, "master_clk");
-	for (i = 0; i < ARRAY_SIZE(sh7786_onchip_clocks); i++) {
-		struct clk *clkp = sh7786_onchip_clocks[i];
-
-		clkp->parent = clk;
-		ret |= clk_register(clkp);
-	}
+	for (i = 0; i < ARRAY_SIZE(clks); i++)
+		ret |= clk_register(clks[i]);
 
-	clk_put(clk);
+	if (!ret)
+		ret = sh_clk_div4_register(div4_clks, ARRAY_SIZE(div4_clks),
+					   &div4_table);
+	if (!ret)
+		ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks));
 
 	return ret;
 }
diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index 5863e0c4d02f..11bf4c1e25c0 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -78,7 +78,10 @@ void __init plat_prepare_cpus(unsigned int max_cpus)
 
 void plat_start_cpu(unsigned int cpu, unsigned long entry_point)
 {
-	__raw_writel(entry_point, RESET_REG(cpu));
+	if (__in_29bit_mode())
+		__raw_writel(entry_point, RESET_REG(cpu));
+	else
+		__raw_writel(virt_to_phys(entry_point), RESET_REG(cpu));
 
 	if (!(__raw_readl(STBCR_REG(cpu)) & STBCR_MSTP))
 		__raw_writel(STBCR_MSTP, STBCR_REG(cpu));
diff --git a/arch/sh/kernel/cpu/sh4a/ubc.c b/arch/sh/kernel/cpu/sh4a/ubc.c
new file mode 100644
index 000000000000..efb2745bcb36
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/ubc.c
@@ -0,0 +1,133 @@
+/*
+ * arch/sh/kernel/cpu/sh4a/ubc.c
+ *
+ * On-chip UBC support for SH-4A CPUs.
+ *
+ * Copyright (C) 2009 - 2010  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <asm/hw_breakpoint.h>
+
+#define UBC_CBR(idx)	(0xff200000 + (0x20 * idx))
+#define UBC_CRR(idx)	(0xff200004 + (0x20 * idx))
+#define UBC_CAR(idx)	(0xff200008 + (0x20 * idx))
+#define UBC_CAMR(idx)	(0xff20000c + (0x20 * idx))
+
+#define UBC_CCMFR	0xff200600
+#define UBC_CBCR	0xff200620
+
+/* CRR */
+#define UBC_CRR_PCB	(1 << 1)
+#define UBC_CRR_BIE	(1 << 0)
+
+/* CBR */
+#define UBC_CBR_CE	(1 << 0)
+
+static struct sh_ubc sh4a_ubc;
+
+static void sh4a_ubc_enable(struct arch_hw_breakpoint *info, int idx)
+{
+	__raw_writel(UBC_CBR_CE | info->len | info->type, UBC_CBR(idx));
+	__raw_writel(info->address, UBC_CAR(idx));
+}
+
+static void sh4a_ubc_disable(struct arch_hw_breakpoint *info, int idx)
+{
+	__raw_writel(0, UBC_CBR(idx));
+	__raw_writel(0, UBC_CAR(idx));
+}
+
+static void sh4a_ubc_enable_all(unsigned long mask)
+{
+	int i;
+
+	for (i = 0; i < sh4a_ubc.num_events; i++)
+		if (mask & (1 << i))
+			__raw_writel(__raw_readl(UBC_CBR(i)) | UBC_CBR_CE,
+				     UBC_CBR(i));
+}
+
+static void sh4a_ubc_disable_all(void)
+{
+	int i;
+
+	for (i = 0; i < sh4a_ubc.num_events; i++)
+		__raw_writel(__raw_readl(UBC_CBR(i)) & ~UBC_CBR_CE,
+			     UBC_CBR(i));
+}
+
+static unsigned long sh4a_ubc_active_mask(void)
+{
+	unsigned long active = 0;
+	int i;
+
+	for (i = 0; i < sh4a_ubc.num_events; i++)
+		if (__raw_readl(UBC_CBR(i)) & UBC_CBR_CE)
+			active |= (1 << i);
+
+	return active;
+}
+
+static unsigned long sh4a_ubc_triggered_mask(void)
+{
+	return __raw_readl(UBC_CCMFR);
+}
+
+static void sh4a_ubc_clear_triggered_mask(unsigned long mask)
+{
+	__raw_writel(__raw_readl(UBC_CCMFR) & ~mask, UBC_CCMFR);
+}
+
+static struct sh_ubc sh4a_ubc = {
+	.name			= "SH-4A",
+	.num_events		= 2,
+	.trap_nr		= 0x1e0,
+	.enable			= sh4a_ubc_enable,
+	.disable		= sh4a_ubc_disable,
+	.enable_all		= sh4a_ubc_enable_all,
+	.disable_all		= sh4a_ubc_disable_all,
+	.active_mask		= sh4a_ubc_active_mask,
+	.triggered_mask		= sh4a_ubc_triggered_mask,
+	.clear_triggered_mask	= sh4a_ubc_clear_triggered_mask,
+};
+
+static int __init sh4a_ubc_init(void)
+{
+	struct clk *ubc_iclk = clk_get(NULL, "ubc0");
+	int i;
+
+	/*
+	 * The UBC MSTP bit is optional, as not all platforms will have
+	 * it. Just ignore it if we can't find it.
+	 */
+	if (IS_ERR(ubc_iclk))
+		ubc_iclk = NULL;
+
+	clk_enable(ubc_iclk);
+
+	__raw_writel(0, UBC_CBCR);
+
+	for (i = 0; i < sh4a_ubc.num_events; i++) {
+		__raw_writel(0, UBC_CAMR(i));
+		__raw_writel(0, UBC_CBR(i));
+
+		__raw_writel(UBC_CRR_BIE | UBC_CRR_PCB, UBC_CRR(i));
+
+		/* dummy read for write posting */
+		(void)__raw_readl(UBC_CRR(i));
+	}
+
+	clk_disable(ubc_iclk);
+
+	sh4a_ubc.clk = ubc_iclk;
+
+	return register_sh_ubc(&sh4a_ubc);
+}
+arch_initcall(sh4a_ubc_init);
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index 8f13f73cb2cb..6b80295dd7a4 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -187,7 +187,7 @@ trap_jtable:
 	.rept 6
 		.long do_exception_error	/* 0x880 - 0x920 */
 	.endr
-	.long	do_software_break_point	/* 0x940 */
+	.long	breakpoint_trap_handler	/* 0x940 */
 	.long	do_exception_error		/* 0x960 */
 	.long	do_single_step		/* 0x980 */
 
@@ -1124,7 +1124,7 @@ fpu_error_or_IRQA:
 	pta	its_IRQ, tr0
 	beqi/l	r4, EVENT_INTERRUPT, tr0
 #ifdef CONFIG_SH_FPU
-	movi	do_fpu_state_restore, r6
+	movi	fpu_state_restore_trap_handler, r6
 #else
 	movi	do_exception_error, r6
 #endif
@@ -1135,7 +1135,7 @@ fpu_error_or_IRQB:
 	pta	its_IRQ, tr0
 	beqi/l	r4, EVENT_INTERRUPT, tr0
 #ifdef CONFIG_SH_FPU
-	movi	do_fpu_state_restore, r6
+	movi	fpu_state_restore_trap_handler, r6
 #else
 	movi	do_exception_error, r6
 #endif
diff --git a/arch/sh/kernel/cpu/sh5/fpu.c b/arch/sh/kernel/cpu/sh5/fpu.c
index 4648ccee6c4d..4b3bb35e99f3 100644
--- a/arch/sh/kernel/cpu/sh5/fpu.c
+++ b/arch/sh/kernel/cpu/sh5/fpu.c
@@ -15,24 +15,6 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <asm/processor.h>
-#include <asm/user.h>
-#include <asm/io.h>
-#include <asm/fpu.h>
-
-/*
- * Initially load the FPU with signalling NANS.  This bit pattern
- * has the property that no matter whether considered as single or as
- * double precision, it still represents a signalling NAN.
- */
-#define sNAN64		0xFFFFFFFFFFFFFFFFULL
-#define sNAN32		0xFFFFFFFFUL
-
-static union sh_fpu_union init_fpuregs = {
-	.hard = {
-		.fp_regs = { [0 ... 63] = sNAN32 },
-		.fpscr = FPSCR_INIT
-	}
-};
 
 void save_fpu(struct task_struct *tsk)
 {
@@ -72,12 +54,11 @@ void save_fpu(struct task_struct *tsk)
 		     "fgetscr   fr63\n\t"
 		     "fst.s     %0, (32*8), fr63\n\t"
 		: /* no output */
-		: "r" (&tsk->thread.fpu.hard)
+		: "r" (&tsk->thread.xstate->hardfpu)
 		: "memory");
 }
 
-static inline void
-fpload(struct sh_fpu_hard_struct *fpregs)
+void restore_fpu(struct task_struct *tsk)
 {
 	asm volatile("fld.p     %0, (0*8), fp0\n\t"
 		     "fld.p     %0, (1*8), fp2\n\t"
@@ -116,16 +97,11 @@ fpload(struct sh_fpu_hard_struct *fpregs)
 
 		     "fld.p     %0, (31*8), fp62\n\t"
 		: /* no output */
-		: "r" (fpregs) );
-}
-
-void fpinit(struct sh_fpu_hard_struct *fpregs)
-{
-	*fpregs = init_fpuregs.hard;
+		: "r" (&tsk->thread.xstate->hardfpu)
+		: "memory");
 }
 
-asmlinkage void
-do_fpu_error(unsigned long ex, struct pt_regs *regs)
+asmlinkage void do_fpu_error(unsigned long ex, struct pt_regs *regs)
 {
 	struct task_struct *tsk = current;
 
@@ -133,35 +109,6 @@ do_fpu_error(unsigned long ex, struct pt_regs *regs)
 
 	tsk->thread.trap_no = 11;
 	tsk->thread.error_code = 0;
-	force_sig(SIGFPE, tsk);
-}
-
-
-asmlinkage void
-do_fpu_state_restore(unsigned long ex, struct pt_regs *regs)
-{
-	void die(const char *str, struct pt_regs *regs, long err);
-
-	if (! user_mode(regs))
-		die("FPU used in kernel", regs, ex);
 
-	regs->sr &= ~SR_FD;
-
-	if (last_task_used_math == current)
-		return;
-
-	enable_fpu();
-	if (last_task_used_math != NULL)
-		/* Other processes fpu state, save away */
-		save_fpu(last_task_used_math);
-
-        last_task_used_math = current;
-        if (used_math()) {
-                fpload(&current->thread.fpu.hard);
-        } else {
-		/* First time FPU user.  */
-		fpload(&init_fpuregs.hard);
-                set_used_math();
-        }
-	disable_fpu();
+	force_sig(SIGFPE, tsk);
 }