Merge remote-tracking branch 'arc/for-next'

author: Stephen Rothwell <sfr@canb.auug.org.au> 2013-08-01 10:54:39 +1000
committer: Stephen Rothwell <sfr@canb.auug.org.au> 2013-08-01 10:54:39 +1000
commit: 30d2e943e732372bfde208edfe3af56cf2e35358 (patch)
tree: 951e7b92c4752edfbd61fb1936999db9ae4aab57 /arch
parent: 9e2ba7744ebf1aab59c64c827fca391b3f2fb11e (diff)
parent: 524a105dead4d424faaa6a78a72bf3343a747ecf (diff)
15 files changed, 415 insertions, 415 deletions
diff --git a/arch/arc/boot/.gitignore b/arch/arc/boot/.gitignore
new file mode 100644
index 000000000000..5d65b54bf17a
--- /dev/null
+++ b/arch/arc/boot/.gitignore
@@ -0,0 +1 @@
+*.dtb*
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index df57611652e5..884081099f80 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -365,7 +365,7 @@
  * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
  *
  * Before saving the full regfile - this reg is restored back, only
- * to be saved again on kernel mode stack, as part of ptregs.
+ * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro EXCPN_PROLOG_FREEUP_REG	reg
 #ifdef CONFIG_SMP
@@ -384,6 +384,28 @@
 .endm
 
 /*--------------------------------------------------------------
+ * Exception Entry prologue
+ * -Switches stack to K mode (if not already)
+ * -Saves the register file
+ *
+ * After this it is safe to call the "C" handlers
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+	/* Need at least 1 reg to code the early exception prologue */
+	EXCPN_PROLOG_FREEUP_REG r9
+
+	/* U/K mode at time of exception (stack not switched if already K) */
+	lr  r9, [erstatus]
+
+	/* ARC700 doesn't provide auto-stack switching */
+	SWITCH_TO_KERNEL_STK
+
+	/* save the regfile */
+	SAVE_ALL_SYS
+.endm
+
+/*--------------------------------------------------------------
  * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc)
  * Requires SP to be already switched to kernel mode Stack
  * sp points to the next free element on the stack at exit of this macro.
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index d99f79bcf865..b68b53f458d1 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -157,13 +157,6 @@ static inline void arch_unmask_irq(unsigned int irq)
 	flag	\scratch
 .endm
 
-.macro IRQ_DISABLE_SAVE  scratch, save
-	lr	\scratch, [status32]
-	mov	\save, \scratch		/* Make a copy */
-	bic	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
-	flag	\scratch
-.endm
-
 .macro IRQ_ENABLE  scratch
 	lr	\scratch, [status32]
 	or	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 7c03fe61759c..baf923f689c1 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -32,6 +32,8 @@
 /* Error code if probe fails */
 #define TLB_LKUP_ERR		0x80000000
 
+#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x00000001)
+
 /* TLB Commands */
 #define TLBWrite    0x1
 #define TLBRead     0x2
@@ -46,10 +48,7 @@
 #ifndef __ASSEMBLY__
 
 typedef struct {
-	unsigned long asid;	/* Pvt Addr-Space ID for mm */
-#ifdef CONFIG_ARC_TLB_DBG
-	struct task_struct *tsk;
-#endif
+	unsigned long asid[NR_CPUS];	/* Hw PID + Generation cycle */
 } mm_context_t;
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 0d71fb11b57c..97949e70d690 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -30,99 +30,67 @@
  * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
  *
  * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cache.
+ * of H/w ASID is done using software tracker @asid_cpu.
  * When it reaches max 255, the allocation cycle starts afresh by flushing
  * the entire TLB and wrapping ASID back to zero.
  *
- * For book-keeping, Linux uses a couple of data-structures:
- *  -mm_struct has an @asid field to keep a note of task's ASID (needed at the
- *   time of say switch_mm( )
- *  -An array of mm structs @asid_mm_map[] for asid->mm the reverse mapping,
- *  given an ASID, finding the mm struct associated.
- *
- * The round-robin allocation algorithm allows for ASID stealing.
- * If asid tracker is at "x-1", a new req will allocate "x", even if "x" was
- * already assigned to another (switched-out) task. Obviously the prev owner
- * is marked with an invalid ASID to make it request for a new ASID when it
- * gets scheduled next time. However its TLB entries (with ASID "x") could
- * exist, which must be cleared before the same ASID is used by the new owner.
- * Flushing them would be plausible but costly solution. Instead we force a
- * allocation policy quirk, which ensures that a stolen ASID won't have any
- * TLB entries associates, alleviating the need to flush.
- * The quirk essentially is not allowing ASID allocated in prev cycle
- * to be used past a roll-over in the next cycle.
- * When this happens (i.e. task ASID > asid tracker), task needs to refresh
- * its ASID, aligning it to current value of tracker. If the task doesn't get
- * scheduled past a roll-over, hence its ASID is not yet realigned with
- * tracker, such ASID is anyways safely reusable because it is
- * gauranteed that TLB entries with that ASID wont exist.
+ * A new allocation cycle, post rollover, could potentially reassign an ASID
+ * to a different task. Thus the rule is to refresh the ASID in a new cycle.
+ * The 32 bit @asid_cpu (and mm->asid) have 8 bits MMU PID and rest 24 bits
+ * serve as cycle/generation indicator and natural 32 bit unsigned math
+ * automagically increments the generation when lower 8 bits rollover.
  */
 
-#define FIRST_ASID  0
-#define MAX_ASID    255			/* 8 bit PID field in PID Aux reg */
-#define NO_ASID     (MAX_ASID + 1)	/* ASID Not alloc to mmu ctxt */
-#define NUM_ASID    ((MAX_ASID - FIRST_ASID) + 1)
+#define MM_CTXT_ASID_MASK	0x000000ff /* MMU PID reg :8 bit PID */
+#define MM_CTXT_CYCLE_MASK	(~MM_CTXT_ASID_MASK)
+
+#define MM_CTXT_FIRST_CYCLE	(MM_CTXT_ASID_MASK + 1)
+#define MM_CTXT_NO_ASID		0UL
 
-/* ASID to mm struct mapping */
-extern struct mm_struct *asid_mm_map[NUM_ASID + 1];
+#define asid_mm(mm, cpu)	mm->context.asid[cpu]
+#define hw_pid(mm, cpu)		(asid_mm(mm, cpu) & MM_CTXT_ASID_MASK)
 
-extern int asid_cache;
+DECLARE_PER_CPU(unsigned int, asid_cache);
+#define asid_cpu(cpu)		per_cpu(asid_cache, cpu)
 
 /*
- * Assign a new ASID to task. If the task already has an ASID, it is
- * relinquished.
+ * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
+ * Also set the MMU PID register to existing/updated ASID
  */
 static inline void get_new_mmu_context(struct mm_struct *mm)
 {
-	struct mm_struct *prev_owner;
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	local_irq_save(flags);
 
 	/*
-	 * Relinquish the currently owned ASID (if any).
-	 * Doing unconditionally saves a cmp-n-branch; for already unused
-	 * ASID slot, the value was/remains NULL
+	 * Move to new ASID if it was not from current alloc cycle/generation.
+	 * Callers needing new ASID unconditionally, independent of alloc-cycle
+	 * (local_flush_tlb_mm() for forking  parent) first need to destroy the
+	 * context, setting it to invalid value, which the check below would
+	 * catch too
 	 */
-	asid_mm_map[mm->context.asid] = (struct mm_struct *)NULL;
+	if (!((asid_mm(mm, cpu) ^ asid_cpu(cpu)) & MM_CTXT_CYCLE_MASK))
+		goto set_hw;
 
 	/* move to new ASID */
-	if (++asid_cache > MAX_ASID) {	/* ASID roll-over */
-		asid_cache = FIRST_ASID;
-		flush_tlb_all();
+	if (!(++asid_cpu(cpu) & MM_CTXT_ASID_MASK)) {	/* ASID roll-over */
+		local_flush_tlb_all();
 	}
 
-	/*
-	 * Is next ASID already owned by some-one else (we are stealing it).
-	 * If so, let the orig owner be aware of this, so when it runs, it
-	 * asks for a brand new ASID. This would only happen for a long-lived
-	 * task with ASID from prev allocation cycle (before ASID roll-over).
-	 *
-	 * This might look wrong - if we are re-using some other task's ASID,
-	 * won't we use it's stale TLB entries too. Actually switch_mm( ) takes
-	 * care of such a case: it ensures that task with ASID from prev alloc
-	 * cycle, when scheduled will refresh it's ASID: see switch_mm( ) below
-	 * The stealing scenario described here will only happen if that task
-	 * didn't get a chance to refresh it's ASID - implying stale entries
-	 * won't exist.
+	/* Above was rollover of 8 bit ASID in 32 bit container.
+	 * If the container itself wrapped around, set it to a non zero
+	 * "generation" to distinguish from no context
 	 */
-	prev_owner = asid_mm_map[asid_cache];
-	if (prev_owner)
-		prev_owner->context.asid = NO_ASID;
+	if (!asid_cpu(cpu))
+		asid_cpu(cpu) = MM_CTXT_FIRST_CYCLE;
 
 	/* Assign new ASID to tsk */
-	asid_mm_map[asid_cache] = mm;
-	mm->context.asid = asid_cache;
-
-#ifdef CONFIG_ARC_TLB_DBG
-	pr_info("ARC_TLB_DBG: NewMM=0x%x OldMM=0x%x task_struct=0x%x Task: %s,"
-	       " pid:%u, assigned asid:%lu\n",
-	       (unsigned int)mm, (unsigned int)prev_owner,
-	       (unsigned int)(mm->context.tsk), (mm->context.tsk)->comm,
-	       (mm->context.tsk)->pid, mm->context.asid);
-#endif
+	asid_mm(mm, cpu) = asid_cpu(cpu);
 
-	write_aux_reg(ARC_REG_PID, asid_cache | MMU_ENABLE);
+set_hw:
+	write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
 
 	local_irq_restore(flags);
 }
@@ -134,60 +102,66 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-	mm->context.asid = NO_ASID;
-#ifdef CONFIG_ARC_TLB_DBG
-	mm->context.tsk = tsk;
-#endif
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		asid_mm(mm, i) = MM_CTXT_NO_ASID;
+
 	return 0;
 }
 
+static inline void destroy_context(struct mm_struct *mm)
+{
+	asid_mm(mm, smp_processor_id()) = MM_CTXT_NO_ASID;
+}
+
 /* Prepare the MMU for task: setup PID reg with allocated ASID
     If task doesn't have an ASID (never alloc or stolen, get a new ASID)
 */
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 			     struct task_struct *tsk)
 {
-#ifndef CONFIG_SMP
-	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
+	int migrating = 0;
+
+#ifdef CONFIG_SMP
+	const int cpu = smp_processor_id();
 
 	/*
-	 * Get a new ASID if task doesn't have a valid one. Possible when
-	 *  -task never had an ASID (fresh after fork)
-	 *  -it's ASID was stolen - past an ASID roll-over.
-	 *  -There's a third obscure scenario (if this task is running for the
-	 *   first time afer an ASID rollover), where despite having a valid
-	 *   ASID, we force a get for new ASID - see comments at top.
-	 *
-	 * Both the non-alloc scenario and first-use-after-rollover can be
-	 * detected using the single condition below:  NO_ASID = 256
-	 * while asid_cache is always a valid ASID value (0-255).
+	 * If @next is migrating to a different CPU, force an ASID refresh (by
+	 * 	relinquishing current value as required by new implementation
+	 * 	of get_new_mmu_context()
+	 * Use Case:
+	 *	Task t1 migrates to a different core, forks, migrates back to
+	 *	orig core. COW semantics requires it to have a new ASID now
+	 *	so that pre-fork TLB entries can't be used.
 	 */
-	if (next->context.asid > asid_cache) {
-		get_new_mmu_context(next);
-	} else {
-		/*
-		 * XXX: This will never happen given the chks above
-		 * BUG_ON(next->context.asid > MAX_ASID);
-		 */
-		write_aux_reg(ARC_REG_PID, next->context.asid | MMU_ENABLE);
-	}
-
-}
-
-static inline void destroy_context(struct mm_struct *mm)
-{
-	unsigned long flags;
+	cpumask_clear_cpu(cpu, mm_cpumask(prev));
+	migrating = !cpumask_test_and_set_cpu(cpu, mm_cpumask(next));
+	if (migrating)
+		destroy_context(next);
+#endif
 
-	local_irq_save(flags);
+	/* threads of same process (and not migrating cores in SMP) */
+	if ((prev == next) && !migrating)
+		return;
 
-	asid_mm_map[mm->context.asid] = NULL;
-	mm->context.asid = NO_ASID;
+#ifndef CONFIG_SMP
+	/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
+	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
+#endif
 
-	local_irq_restore(flags);
+	get_new_mmu_context(next);
 }
 
+/*
+ * Called at the time of execve() to get a new ASID
+ * Note the subtlety here: get_new_mmu_context() behaves differently here
+ * vs. in switch_mm(). Here it always returns a new ASID, because mm has
+ * an unallocated "initial" value, while in latter, it moves to a new ASID,
+ * only if it was unallocated
+ */
+#define activate_mm(prev, next)		switch_mm(prev, next, NULL)
+
 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
  * for retiring-mm. However destroy_context( ) still needs to do that because
  * between mm_release( ) = >deactive_mm( ) and
@@ -197,17 +171,6 @@ static inline void destroy_context(struct mm_struct *mm)
  */
 #define deactivate_mm(tsk, mm)   do { } while (0)
 
-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
-{
-#ifndef CONFIG_SMP
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
-
-	/* Unconditionally get a new ASID */
-	get_new_mmu_context(next);
-
-}
-
 #define enter_lazy_tlb(mm, tsk)
 
 #endif /* __ASM_ARC_MMU_CONTEXT_H */
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 4749a0eee1cf..6b0b7f7ef783 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -57,43 +57,31 @@
 
 #define _PAGE_ACCESSED      (1<<1)	/* Page is accessed (S) */
 #define _PAGE_CACHEABLE     (1<<2)	/* Page is cached (H) */
-#define _PAGE_U_EXECUTE     (1<<3)	/* Page has user execute perm (H) */
-#define _PAGE_U_WRITE       (1<<4)	/* Page has user write perm (H) */
-#define _PAGE_U_READ        (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_K_EXECUTE     (1<<6)	/* Page has kernel execute perm (H) */
-#define _PAGE_K_WRITE       (1<<7)	/* Page has kernel write perm (H) */
-#define _PAGE_K_READ        (1<<8)	/* Page has kernel perm (H) */
-#define _PAGE_GLOBAL        (1<<9)	/* Page is global (H) */
-#define _PAGE_MODIFIED      (1<<10)	/* Page modified (dirty) (S) */
-#define _PAGE_FILE          (1<<10)	/* page cache/ swap (S) */
-#define _PAGE_PRESENT       (1<<11)	/* TLB entry is valid (H) */
+#define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
+#define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
+#define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
+#define _PAGE_MODIFIED      (1<<6)	/* Page modified (dirty) (S) */
+#define _PAGE_FILE          (1<<7)	/* page cache/ swap (S) */
+#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
+#define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
 
-#else
+#else	/* MMU v3 onwards */
 
-/* PD1 */
 #define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
-#define _PAGE_U_EXECUTE     (1<<1)	/* Page has user execute perm (H) */
-#define _PAGE_U_WRITE       (1<<2)	/* Page has user write perm (H) */
-#define _PAGE_U_READ        (1<<3)	/* Page has user read perm (H) */
-#define _PAGE_K_EXECUTE     (1<<4)	/* Page has kernel execute perm (H) */
-#define _PAGE_K_WRITE       (1<<5)	/* Page has kernel write perm (H) */
-#define _PAGE_K_READ        (1<<6)	/* Page has kernel perm (H) */
-#define _PAGE_ACCESSED      (1<<7)	/* Page is accessed (S) */
-
-/* PD0 */
+#define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
+#define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
+#define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
+#define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
+#define _PAGE_MODIFIED      (1<<5)	/* Page modified (dirty) (S) */
+#define _PAGE_FILE          (1<<6)	/* page cache/ swap (S) */
 #define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
 #define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
-#define _PAGE_SHARED_CODE   (1<<10)	/* Shared Code page with cmn vaddr
+#define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
 					   usable for shared TLB entries (H) */
-
-#define _PAGE_MODIFIED      (1<<11)	/* Page modified (dirty) (S) */
-#define _PAGE_FILE          (1<<12)	/* page cache/ swap (S) */
-
-#define _PAGE_SHARED_CODE_H (1<<31)	/* Hardware counterpart of above */
 #endif
 
-/* Kernel allowed all permissions for all pages */
-#define _K_PAGE_PERMS  (_PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ | \
+/* vmalloc permissions */
+#define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
 			_PAGE_GLOBAL | _PAGE_PRESENT)
 
 #ifdef CONFIG_ARC_CACHE_PAGES
@@ -109,10 +97,6 @@
  */
 #define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE)
 
-#define _PAGE_READ	(_PAGE_U_READ    | _PAGE_K_READ)
-#define _PAGE_WRITE	(_PAGE_U_WRITE   | _PAGE_K_WRITE)
-#define _PAGE_EXECUTE	(_PAGE_U_EXECUTE | _PAGE_K_EXECUTE)
-
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED)
 
@@ -126,8 +110,8 @@
 
 #define PAGE_SHARED	PAGE_U_W_R
 
-/* While kernel runs out of unstrslated space, vmalloc/modules use a chunk of
- * kernel vaddr space - visible in all addr spaces, but kernel mode only
+/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
+ * user vaddr space - visible in all addr spaces, but kernel mode only
  * Thus Global, all-kernel-access, no-user-access, cached
  */
 #define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE)
@@ -136,10 +120,9 @@
 #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
 
 /* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0	(_PAGE_GLOBAL | _PAGE_PRESENT)
-#define PTE_BITS_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE | \
-			 _PAGE_U_EXECUTE | _PAGE_U_WRITE | _PAGE_U_READ | \
-			 _PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ)
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT)
+#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
 
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index c9938e7a7dbd..1bfeec2c0558 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -20,27 +20,17 @@ struct pt_regs {
 
 	/* Real registers */
 	long bta;	/* bta_l1, bta_l2, erbta */
-	long lp_start;
-	long lp_end;
-	long lp_count;
+
+	long lp_start, lp_end, lp_count;
+
 	long status32;	/* status32_l1, status32_l2, erstatus */
 	long ret;	/* ilink1, ilink2 or eret */
 	long blink;
 	long fp;
 	long r26;	/* gp */
-	long r12;
-	long r11;
-	long r10;
-	long r9;
-	long r8;
-	long r7;
-	long r6;
-	long r5;
-	long r4;
-	long r3;
-	long r2;
-	long r1;
-	long r0;
+
+	long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
+
 	long sp;	/* user/kernel sp depending on where we came from  */
 	long orig_r0;
 
@@ -70,19 +60,7 @@ struct pt_regs {
 /* Callee saved registers - need to be saved only when you are scheduled out */
 
 struct callee_regs {
-	long r25;
-	long r24;
-	long r23;
-	long r22;
-	long r21;
-	long r20;
-	long r19;
-	long r18;
-	long r17;
-	long r16;
-	long r15;
-	long r14;
-	long r13;
+	long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
 };
 
 #define instruction_pointer(regs)	((regs)->ret)
diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h
index b2f9bc7f68c8..71c7b2e4b874 100644
--- a/arch/arc/include/asm/tlbflush.h
+++ b/arch/arc/include/asm/tlbflush.h
@@ -18,11 +18,18 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void local_flush_tlb_range(struct vm_area_struct *vma,
 			   unsigned long start, unsigned long end);
 
-/* XXX: Revisit for SMP */
+#ifndef CONFIG_SMP
 #define flush_tlb_range(vma, s, e)	local_flush_tlb_range(vma, s, e)
 #define flush_tlb_page(vma, page)	local_flush_tlb_page(vma, page)
 #define flush_tlb_kernel_range(s, e)	local_flush_tlb_kernel_range(s, e)
 #define flush_tlb_all()			local_flush_tlb_all()
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
-
+#else
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+							 unsigned long end);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+#endif /* CONFIG_SMP */
 #endif
diff --git a/arch/arc/kernel/.gitignore b/arch/arc/kernel/.gitignore
new file mode 100644
index 000000000000..c5f676c3c224
--- /dev/null
+++ b/arch/arc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1d7165156e17..b908dde8a331 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -267,12 +267,7 @@ ARC_EXIT handle_interrupt_level1
 
 ARC_ENTRY instr_service
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
@@ -289,15 +284,13 @@ ARC_EXIT instr_service
 
 ARC_ENTRY mem_service
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN r9
+
 	bl  do_memory_error
 	b   ret_from_exception
 ARC_EXIT mem_service
@@ -308,11 +301,7 @@ ARC_EXIT mem_service
 
 ARC_ENTRY EV_MachineCheck
 
-	EXCPN_PROLOG_FREEUP_REG r9
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r2, [ecr]
 	lr  r0, [efa]
@@ -342,13 +331,7 @@ ARC_EXIT EV_MachineCheck
 
 ARC_ENTRY EV_TLBProtV
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	;Which mode (user/kernel) was the system in when Exception occured
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	;---------(3) Save some more regs-----------------
 	;  vineetg: Mar 6th: Random Seg Fault issue #1
@@ -406,12 +389,7 @@ ARC_EXIT EV_TLBProtV
 ; ---------------------------------------------
 ARC_ENTRY EV_PrivilegeV
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
@@ -427,14 +405,13 @@ ARC_EXIT EV_PrivilegeV
 ; ---------------------------------------------
 ARC_ENTRY EV_Extension
 
-	EXCPN_PROLOG_FREEUP_REG r9
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN r9
+
 	bl  do_extension_fault
 	b   ret_from_exception
 ARC_EXIT EV_Extension
@@ -526,14 +503,7 @@ trap_with_param:
 
 ARC_ENTRY EV_Trap
 
-	; Need at least 1 reg to code the early exception prolog
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	;Which mode (user/kernel) was the system in when intr occured
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	;------- (4) What caused the Trap --------------
 	lr     r12, [ecr]
@@ -642,6 +612,9 @@ resume_kernel_mode:
 
 #ifdef CONFIG_PREEMPT
 
+	; This is a must for preempt_schedule_irq()
+	IRQ_DISABLE	r9
+
 	; Can't preempt if preemption disabled
 	GET_CURR_THR_INFO_FROM_SP   r10
 	ld  r8, [r10, THREAD_INFO_PREEMPT_COUNT]
@@ -651,8 +624,6 @@ resume_kernel_mode:
 	ld  r9, [r10, THREAD_INFO_FLAGS]
 	bbit0  r9, TIF_NEED_RESCHED, restore_regs
 
-	IRQ_DISABLE	r9
-
 	; Invoke PREEMPTION
 	bl      preempt_schedule_irq
 
@@ -665,12 +636,11 @@ resume_kernel_mode:
 ;
 ; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
 ; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
 
 restore_regs :
 
-	; Disable Interrupts while restoring reg-file back
-	; XXX can this be optimised out
-	IRQ_DISABLE_SAVE    r9, r10	;@r10 has prisitine (pre-disable) copy
+	lr	r10, [status32]
 
 	; Restore REG File. In case multiple Events outstanding,
 	; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index bca3052c956d..482a42bdc051 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -128,6 +128,7 @@ void start_kernel_secondary(void)
 	atomic_inc(&mm->mm_users);
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile
index ac95cc239c1e..618561270518 100644
--- a/arch/arc/mm/Makefile
+++ b/arch/arc/mm/Makefile
@@ -8,3 +8,4 @@
 
 obj-y	:= extable.o ioremap.o dma.o fault.o init.o
 obj-y	+= tlb.o tlbex.o cache_arc700.o mmap.o
+obj-$(CONFIG_SMP)		+= tlbflush.o
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 7957dc4e4d4a..8fed015a7116 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -52,6 +52,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/bug.h>
 #include <asm/arcregs.h>
 #include <asm/setup.h>
 #include <asm/mmu_context.h>
@@ -99,48 +100,45 @@
 
 
 /* A copy of the ASID from the PID reg is kept in asid_cache */
-int asid_cache = FIRST_ASID;
-
-/* ASID to mm struct mapping. We have one extra entry corresponding to
- * NO_ASID to save us a compare when clearing the mm entry for old asid
- * see get_new_mmu_context (asm-arc/mmu_context.h)
- */
-struct mm_struct *asid_mm_map[NUM_ASID + 1];
+DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
 /*
  * Utility Routine to erase a J-TLB entry
- * The procedure is to look it up in the MMU. If found, ERASE it by
- *  issuing a TlbWrite CMD with PD0 = PD1 = 0
+ * Caller needs to setup Index Reg (manually or via getIndex)
  */
-
-static void __tlb_entry_erase(void)
+static inline void __tlb_entry_erase(void)
 {
 	write_aux_reg(ARC_REG_TLBPD1, 0);
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
-static void tlb_entry_erase(unsigned int vaddr_n_asid)
+static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
 {
 	unsigned int idx;
 
-	/* Locate the TLB entry for this vaddr + ASID */
 	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid);
+
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
 	idx = read_aux_reg(ARC_REG_TLBINDEX);
 
+	return idx;
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+	unsigned int idx;
+
+	/* Locate the TLB entry for this vaddr + ASID */
+	idx = tlb_entry_lkup(vaddr_n_asid);
+
 	/* No error means entry found, zero it out */
 	if (likely(!(idx & TLB_LKUP_ERR))) {
 		__tlb_entry_erase();
-	} else {		/* Some sort of Error */
-
+	} else {
 		/* Duplicate entry error */
-		if (idx & 0x1) {
-			/* TODO we need to handle this case too */
-			pr_emerg("unhandled Duplicate flush for %x\n",
-			       vaddr_n_asid);
-		}
-		/* else entry not found so nothing to do */
+		WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n",
+					   vaddr_n_asid);
 	}
 }
 
@@ -159,7 +157,7 @@ static void utlb_invalidate(void)
 {
 #if (CONFIG_ARC_MMU_VER >= 2)
 
-#if (CONFIG_ARC_MMU_VER < 3)
+#if (CONFIG_ARC_MMU_VER == 2)
 	/* MMU v2 introduced the uTLB Flush command.
 	 * There was however an obscure hardware bug, where uTLB flush would
 	 * fail when a prior probe for J-TLB (both totally unrelated) would
@@ -182,6 +180,37 @@ static void utlb_invalidate(void)
 
 }
 
+static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+{
+	unsigned int idx;
+
+	/*
+	 * First verify if entry for this vaddr+ASID already exists
+	 * This also sets up PD0 (vaddr, ASID..) for final commit
+	 */
+	idx = tlb_entry_lkup(pd0);
+
+	/*
+	 * If Not already present get a free slot from MMU.
+	 * Otherwise, Probe would have located the entry and set INDEX Reg
+	 * with existing location. This will cause Write CMD to over-write
+	 * existing entry with new PD0 and PD1
+	 */
+	if (likely(idx & TLB_LKUP_ERR))
+		write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
+
+	/* setup the other half of TLB entry (pfn, rwx..) */
+	write_aux_reg(ARC_REG_TLBPD1, pd1);
+
+	/*
+	 * Commit the Entry to MMU
+	 * It doesnt sound safe to use the TLBWriteNI cmd here
+	 * which doesn't flush uTLBs. I'd rather be safe than sorry.
+	 */
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+}
+
+
 /*
  * Un-conditionally (without lookup) erase the entire MMU contents
  */
@@ -224,13 +253,14 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
 		return;
 
 	/*
-	 * Workaround for Android weirdism:
-	 * A binder VMA could end up in a task such that vma->mm != tsk->mm
-	 * old code would cause h/w - s/w ASID to get out of sync
+	 * - Move to a new ASID, but only if the mm is still wired in
+	 *   (Android Binders ended up calling this for vma->mm != tsk->mm,
+	 *    causing h/w - s/w ASID to get out of sync)
+	 * - Also get_new_mmu_context() new implementation allocates a new
+	 *   ASID only if it is not allocated already - so unallocate first
 	 */
-	if (current->mm != mm)
-		destroy_context(mm);
-	else
+	destroy_context(mm);
+	if (current->mm == mm)
 		get_new_mmu_context(mm);
 }
 
@@ -245,8 +275,8 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			   unsigned long end)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
-	unsigned int asid;
 
 	/* If range @start to @end is more than 32 TLB entries deep,
 	 * its better to move to a new ASID rather than searching for
@@ -268,11 +298,10 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 	start &= PAGE_MASK;
 
 	local_irq_save(flags);
-	asid = vma->vm_mm->context.asid;
 
-	if (asid != NO_ASID) {
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
 		while (start < end) {
-			tlb_entry_erase(start | (asid & 0xff));
+			tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu));
 			start += PAGE_SIZE;
 		}
 	}
@@ -319,6 +348,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 {
+	const unsigned int cpu = smp_processor_id();
 	unsigned long flags;
 
 	/* Note that it is critical that interrupts are DISABLED between
@@ -326,9 +356,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 	 */
 	local_irq_save(flags);
 
-	if (vma->vm_mm->context.asid != NO_ASID) {
-		tlb_entry_erase((page & PAGE_MASK) |
-				(vma->vm_mm->context.asid & 0xff));
+	if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) {
+		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu));
 		utlb_invalidate();
 	}
 
@@ -341,8 +370,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
 	unsigned long flags;
-	unsigned int idx, asid_or_sasid;
-	unsigned long pd0_flags;
+	unsigned int asid_or_sasid, rwx;
+	unsigned long pd0, pd1;
 
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
@@ -374,47 +403,30 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 
 	local_irq_save(flags);
 
-	tlb_paranoid_check(vma->vm_mm->context.asid, address);
+	tlb_paranoid_check(hw_pid(vma->vm_mm), address);
 
 	address &= PAGE_MASK;
 
 	/* update this PTE credentials */
 	pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED);
 
-	/* Create HW TLB entry Flags (in PD0) from PTE Flags */
-#if (CONFIG_ARC_MMU_VER <= 2)
-	pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0) >> 1);
-#else
-	pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0));
-#endif
+	/* Create HW TLB(PD0,PD1) from PTE  */
 
 	/* ASID for this task */
 	asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
 
-	write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid);
+	pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0);
 
-	/* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */
-	write_aux_reg(ARC_REG_TLBPD1, (pte_val(*ptep) & PTE_BITS_IN_PD1));
+	rwx = pte_val(*ptep) & PTE_BITS_RWX;
 
-	/* First verify if entry for this vaddr+ASID already exists */
-	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
-	idx = read_aux_reg(ARC_REG_TLBINDEX);
+	if (pte_val(*ptep) & _PAGE_GLOBAL)
+		rwx <<= 3;		/* r w x => Kr Kw Kx 0 0 0 */
+	else
+		rwx |= (rwx << 3);	/* r w x => Kr Kw Kx Ur Uw Ux */
 
-	/*
-	 * If Not already present get a free slot from MMU.
-	 * Otherwise, Probe would have located the entry and set INDEX Reg
-	 * with existing location. This will cause Write CMD to over-write
-	 * existing entry with new PD0 and PD1
-	 */
-	if (likely(idx & TLB_LKUP_ERR))
-		write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
+	pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1);
 
-	/*
-	 * Commit the Entry to MMU
-	 * It doesnt sound safe to use the TLBWriteNI cmd here
-	 * which doesn't flush uTLBs. I'd rather be safe than sorry.
-	 */
-	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+	tlb_entry_insert(pd0, pd1);
 
 	local_irq_restore(flags);
 }
@@ -553,11 +565,6 @@ void arc_mmu_init(void)
 	if (mmu->pg_sz != PAGE_SIZE)
 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
 
-	/*
-	 * ASID mgmt data structures are compile time init
-	 *  asid_cache = FIRST_ASID and asid_mm_map[] all zeroes
-	 */
-
 	local_flush_tlb_all();
 
 	/* Enable the MMU */
@@ -674,7 +681,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 void print_asid_mismatch(int is_fast_path)
 {
 	int pid_sw, pid_hw;
-	pid_sw = current->active_mm->context.asid;
+	pid_sw = hw_pid(current->active_mm, smp_processor_id());
 	pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
 
 	pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
@@ -689,7 +696,8 @@ void tlb_paranoid_check(unsigned int pid_sw, unsigned long addr)
 
 	pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
 
-	if (addr < 0x70000000 && ((pid_hw != pid_sw) || (pid_sw == NO_ASID)))
+	if (addr < 0x70000000 && ((pid_hw != pid_sw) ||
+	    (pid_sw == MM_CTXT_NO_ASID)))
 		print_asid_mismatch(0);
 }
 #endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 5c5bb23001b0..50e83ca96b96 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -44,17 +44,36 @@
 #include <asm/arcregs.h>
 #include <asm/cache.h>
 #include <asm/processor.h>
-#if (CONFIG_ARC_MMU_VER == 1)
 #include <asm/tlb-mmu1.h>
-#endif
 
-;--------------------------------------------------------------------------
-; scratch memory to save the registers (r0-r3) used to code TLB refill Handler
-; For details refer to comments before TLBMISS_FREEUP_REGS below
+;-----------------------------------------------------------------
+; ARC700 Exception Handling doesn't auto-switch stack and it only provides
+; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
+;
+; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
+; "global" is used to free-up FIRST core reg to be able to code the rest of
+; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
+; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
+; need to be saved as well by extending the "global" to be 4 words. Hence
+;	".size   ex_saved_reg1, 16"
+; [All of this dance is to avoid stack switching for each TLB Miss, since we
+; only need to save only a handful of regs, as opposed to complete reg file]
+;
+; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
+; core reg as it will not be SMP safe.
+; Thus scratch AUX reg is used (and no longer used to cache task PGD).
+; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
+; Epilogue thus has to locate the "per-cpu" storage for regs.
+; To avoid cache line bouncing the per-cpu global is aligned/sized per
+; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
+;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
+
+; As simple as that....
 ;--------------------------------------------------------------------------
 
+; scratch memory to save [r0-r3] used to code TLB refill Handler
 ARCFP_DATA ex_saved_reg1
-	.align 1 << L1_CACHE_SHIFT	; IMP: Must be Cache Line aligned
+	.align 1 << L1_CACHE_SHIFT
 	.type   ex_saved_reg1, @object
 #ifdef CONFIG_SMP
 	.size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
@@ -66,6 +85,44 @@ ex_saved_reg1:
 	.zero 16
 #endif
 
+.macro TLBMISS_FREEUP_REGS
+#ifdef CONFIG_SMP
+	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
+	GET_CPU_ID  r0			; get to per cpu scratch mem,
+	lsl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
+	add r0, @ex_saved_reg1, r0
+#else
+	st    r0, [@ex_saved_reg1]
+	mov_s r0, @ex_saved_reg1
+#endif
+	st_s  r1, [r0, 4]
+	st_s  r2, [r0, 8]
+	st_s  r3, [r0, 12]
+
+	; VERIFY if the ASID in MMU-PID Reg is same as
+	; one in Linux data structures
+
+	DBG_ASID_MISMATCH
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+#ifdef CONFIG_SMP
+	GET_CPU_ID  r0			; get to per cpu scratch mem
+	lsl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
+	add r0, @ex_saved_reg1, r0
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	lr    r0, [ARC_REG_SCRATCH_DATA0]
+#else
+	mov_s r0, @ex_saved_reg1
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	ld_s  r0, [r0]
+#endif
+.endm
+
 ;============================================================================
 ;  Troubleshooting Stuff
 ;============================================================================
@@ -161,13 +218,17 @@ ex_saved_reg1:
 ; IN: r0 = PTE, r1 = ptr to PTE
 
 .macro CONV_PTE_TO_TLB
-	and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE
-	sr  r3, [ARC_REG_TLBPD1]    ; these go in PD1
+	and    r3, r0, PTE_BITS_RWX	;       r w x
+	lsl    r2, r3, 3		; r w x 0 0 0
+	and.f  0,  r0, _PAGE_GLOBAL
+	or.z   r2, r2, r3		; r w x r w x
+
+	and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE
+	or  r3, r3, r2
+
+	sr  r3, [ARC_REG_TLBPD1]    	; these go in PD1
 
 	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
-#if (CONFIG_ARC_MMU_VER <= 2)   /* Neednot be done with v3 onwards */
-	lsr r2, r2                  ; shift PTE flags to match layout in PD0
-#endif
 
 	lr  r3,[ARC_REG_TLBPD0]     ; MMU prepares PD0 with vaddr and asid
 
@@ -191,68 +252,6 @@ ex_saved_reg1:
 #endif
 .endm
 
-;-----------------------------------------------------------------
-; ARC700 Exception Handling doesn't auto-switch stack and it only provides
-; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
-;
-; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
-; "global" is used to free-up FIRST core reg to be able to code the rest of
-; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
-; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
-; need to be saved as well by extending the "global" to be 4 words. Hence
-;	".size   ex_saved_reg1, 16"
-; [All of this dance is to avoid stack switching for each TLB Miss, since we
-; only need to save only a handful of regs, as opposed to complete reg file]
-;
-; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
-; core reg as it will not be SMP safe.
-; Thus scratch AUX reg is used (and no longer used to cache task PGD).
-; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
-; Epilogue thus has to locate the "per-cpu" storage for regs.
-; To avoid cache line bouncing the per-cpu global is aligned/sized per
-; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
-;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
-
-; As simple as that....
-
-.macro TLBMISS_FREEUP_REGS
-#ifdef CONFIG_SMP
-	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
-	GET_CPU_ID  r0			; get to per cpu scratch mem,
-	lsl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
-	add r0, @ex_saved_reg1, r0
-#else
-	st    r0, [@ex_saved_reg1]
-	mov_s r0, @ex_saved_reg1
-#endif
-	st_s  r1, [r0, 4]
-	st_s  r2, [r0, 8]
-	st_s  r3, [r0, 12]
-
-	; VERIFY if the ASID in MMU-PID Reg is same as
-	; one in Linux data structures
-
-	DBG_ASID_MISMATCH
-.endm
-
-;-----------------------------------------------------------------
-.macro TLBMISS_RESTORE_REGS
-#ifdef CONFIG_SMP
-	GET_CPU_ID  r0			; get to per cpu scratch mem
-	lsl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
-	add r0, @ex_saved_reg1, r0
-	ld_s  r3, [r0,12]
-	ld_s  r2, [r0, 8]
-	ld_s  r1, [r0, 4]
-	lr    r0, [ARC_REG_SCRATCH_DATA0]
-#else
-	mov_s r0, @ex_saved_reg1
-	ld_s  r3, [r0,12]
-	ld_s  r2, [r0, 8]
-	ld_s  r1, [r0, 4]
-	ld_s  r0, [r0]
-#endif
-.endm
 
 ARCFP_CODE	;Fast Path Code, candidate for ICCM
 
@@ -277,8 +276,8 @@ ARC_ENTRY EV_TLBMissI
 	;----------------------------------------------------------------
 	; VERIFY_PTE: Check if PTE permissions approp for executing code
 	cmp_s   r2, VMALLOC_START
-	mov.lo  r2, (_PAGE_PRESENT | _PAGE_U_EXECUTE)
-	mov.hs  r2, (_PAGE_PRESENT | _PAGE_K_EXECUTE)
+	mov_s   r2, (_PAGE_PRESENT | _PAGE_EXECUTE)
+	or.hs   r2, r2, _PAGE_GLOBAL
 
 	and     r3, r0, r2  ; Mask out NON Flag bits from PTE
 	xor.f   r3, r3, r2  ; check ( ( pte & flags_test ) == flags_test )
@@ -317,26 +316,21 @@ ARC_ENTRY EV_TLBMissD
 	;----------------------------------------------------------------
 	; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)
 
-	mov_s   r2, 0
+	cmp_s	r2, VMALLOC_START
+	mov_s   r2, _PAGE_PRESENT	; common bit for K/U PTE
+	or.hs	r2, r2, _PAGE_GLOBAL	; kernel PTE only
+
+	; Linux PTE [RWX] bits are semantically overloaded:
+	; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc)
+	; -Otherwise they are user-mode permissions, and those are exactly
+	;  same for kernel mode as well (e.g. copy_(to|from)_user)
+
 	lr      r3, [ecr]
 	btst_s  r3, ECR_C_BIT_DTLB_LD_MISS	; Read Access
-	or.nz   r2, r2, _PAGE_U_READ      	; chk for Read flag in PTE
+	or.nz   r2, r2, _PAGE_READ      	; chk for Read flag in PTE
 	btst_s  r3, ECR_C_BIT_DTLB_ST_MISS	; Write Access
-	or.nz   r2, r2, _PAGE_U_WRITE     	; chk for Write flag in PTE
-	; Above laddering takes care of XCHG access
-	;   which is both Read and Write
-
-	; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx
-	; For copy_(to|from)_user, despite exception taken in kernel mode,
-	; this code is not hit, because EFA would still be the user mode
-	; address (EFA < 0x6000_0000).
-	; This code is for legit kernel mode faults, vmalloc specifically
-	; (EFA: 0x7000_0000 to 0x7FFF_FFFF)
-
-	lr      r3, [efa]
-	cmp     r3, VMALLOC_START - 1   ; If kernel mode access
-	asl.hi  r2, r2, 3               ; make _PAGE_xx flags as _PAGE_K_xx
-	or      r2, r2, _PAGE_PRESENT   ; Common flag for K/U mode
+	or.nz   r2, r2, _PAGE_WRITE     	; chk for Write flag in PTE
+	; Above laddering takes care of XCHG access (both R and W)
 
 	; By now, r2 setup with all the Flags we need to check in PTE
 	and     r3, r0, r2              ; Mask out NON Flag bits from PTE
@@ -371,13 +365,7 @@ do_slow_path_pf:
 
 	; Slow path TLB Miss handled as a regular ARC Exception
 	; (stack switching / save the complete reg-file).
-	; That requires freeing up r9
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	; ------- setup args for Linux Page fault Hanlder ---------
 	mov_s r0, sp
diff --git a/arch/arc/mm/tlbflush.c b/arch/arc/mm/tlbflush.c
new file mode 100644
index 000000000000..a0abe481815e
--- /dev/null
+++ b/arch/arc/mm/tlbflush.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <asm/tlbflush.h>
+
+struct tlb_args {
+       struct vm_area_struct *ta_vma;
+       unsigned long ta_start;
+       unsigned long ta_end;
+};
+
+static inline void ipi_flush_tlb_all(void *ignored)
+{
+       local_flush_tlb_all();
+}
+
+static inline void ipi_flush_tlb_mm(void *arg)
+{
+       struct mm_struct *mm = (struct mm_struct *)arg;
+
+       local_flush_tlb_mm(mm);
+}
+
+static inline void ipi_flush_tlb_page(void *arg)
+{
+       struct tlb_args *ta = (struct tlb_args *)arg;
+
+       local_flush_tlb_page(ta->ta_vma, ta->ta_start);
+}
+
+static inline void ipi_flush_tlb_range(void *arg)
+{
+       struct tlb_args *ta = (struct tlb_args *)arg;
+
+       local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+
+static inline void ipi_flush_tlb_kernel_range(void *arg)
+{
+       struct tlb_args *ta = (struct tlb_args *)arg;
+
+       local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
+}
+
+void flush_tlb_all(void)
+{
+       on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+       on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+       struct tlb_args ta;
+       ta.ta_vma = vma;
+       ta.ta_start = uaddr;
+       on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+                     unsigned long start, unsigned long end)
+{
+       struct tlb_args ta;
+       ta.ta_vma = vma;
+       ta.ta_start = start;
+       ta.ta_end = end;
+       on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+       struct tlb_args ta;
+       ta.ta_start = start;
+       ta.ta_end = end;
+       on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
+}
author	Stephen Rothwell <sfr@canb.auug.org.au>	2013-08-01 10:54:39 +1000
committer	Stephen Rothwell <sfr@canb.auug.org.au>	2013-08-01 10:54:39 +1000
commit	30d2e943e732372bfde208edfe3af56cf2e35358 (patch)
tree	951e7b92c4752edfbd61fb1936999db9ae4aab57 /arch
parent	9e2ba7744ebf1aab59c64c827fca391b3f2fb11e (diff)
parent	524a105dead4d424faaa6a78a72bf3343a747ecf (diff)