From f1f3347da9440eedd2350f4f5d13d8860f570b92 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Jan 2013 15:12:19 +0530 Subject: ARC: MMU Context Management ARC700 MMU provides for tagging TLB entries with a 8-bit ASID to avoid having to flush the TLB every task switch. It also allows for a quick way to invalidate all the TLB entries for task useful for: * COW sementics during fork() * task exit()ing Signed-off-by: Vineet Gupta --- arch/arc/mm/tlb.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 arch/arc/mm/tlb.c (limited to 'arch/arc/mm/tlb.c') diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c new file mode 100644 index 000000000000..f1edae2410a7 --- /dev/null +++ b/arch/arc/mm/tlb.c @@ -0,0 +1,23 @@ +/* + * TLB Management (flush/create/diagnostics) for ARC700 + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +/* A copy of the ASID from the PID reg is kept in asid_cache */ +int asid_cache = FIRST_ASID; + +/* ASID to mm struct mapping. We have one extra entry corresponding to + * NO_ASID to save us a compare when clearing the mm entry for old asid + * see get_new_mmu_context (asm-arc/mmu_context.h) + */ +struct mm_struct *asid_mm_map[NUM_ASID + 1]; -- cgit v1.2.3 From cc562d2eae93bc2768a6575d31c089719e8939e8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Jan 2013 15:12:19 +0530 Subject: ARC: MMU Exception Handling * MMU I-TLB / D-TLB Miss Exceptions - Fast Path TLB Refill Handler - slowpath TLB creation via do_page_fault() -> update_mmu_cache() * Duplicate PD Exception Handler Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 91 +++++++++++ arch/arc/include/asm/tlb-mmu1.h | 104 ++++++++++++ arch/arc/include/asm/tlb.h | 41 +++++ arch/arc/mm/tlb.c | 267 ++++++++++++++++++++++++++++++ arch/arc/mm/tlbex.S | 351 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 854 insertions(+) create mode 100644 arch/arc/include/asm/tlb-mmu1.h create mode 100644 arch/arc/include/asm/tlb.h create mode 100644 arch/arc/mm/tlbex.S (limited to 'arch/arc/mm/tlb.c') diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index c12eb9b4f449..1c24485fd04b 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -13,6 +13,7 @@ /* Build Configuration Registers */ #define ARC_REG_VECBASE_BCR 0x68 +#define ARC_REG_MMU_BCR 0x6f /* status32 Bits Positions */ #define STATUS_H_BIT 0 /* CPU Halted */ @@ -36,6 +37,35 @@ #define STATUS_U_MASK (1<= 2) +#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ +#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ +#else +#undef TLBWriteNI /* These cmds don't exist on older MMU */ +#undef TLBIVUTLB +#endif + /* Instruction cache related Auxiliary registers */ #define ARC_REG_IC_BCR 0x77 /* Build Config reg */ #define ARC_REG_IC_IVIC 0x10 @@ -205,6 +273,24 @@ struct arc_fpu { * Build Configuration Registers, with encoded hardware config */ +struct bcr_mmu_1_2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8; +#else + unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8; +#endif +}; + +struct bcr_mmu_3 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, + u_itlb:4, u_dtlb:4; +#else + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, + ways:4, ver:8; +#endif +}; + struct bcr_cache { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; @@ -218,12 +304,17 @@ struct bcr_cache { * Generic structures to hold build configuration used at runtime */ +struct cpuinfo_arc_mmu { + unsigned int ver, pg_sz, sets, ways, u_dtlb, u_itlb, num_tlb; +}; + struct cpuinfo_arc_cache { unsigned int has_aliasing, sz, line_len, assoc, ver; }; struct cpuinfo_arc { struct cpuinfo_arc_cache icache, dcache; + struct cpuinfo_arc_mmu mmu; }; extern struct cpuinfo_arc cpuinfo_arc700[]; diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h new file mode 100644 index 000000000000..a5ff961b1efc --- /dev/null +++ b/arch/arc/include/asm/tlb-mmu1.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_TLB_MMU_V1_H__ +#define __ASM_TLB_MMU_V1_H__ + +#if defined(__ASSEMBLY__) && defined(CONFIG_ARC_MMU_VER == 1) + +#include + +.macro TLB_WRITE_HEURISTICS + +#define JH_HACK1 +#undef JH_HACK2 +#undef JH_HACK3 + +#ifdef JH_HACK3 +; Calculate set index for 2-way MMU +; -avoiding use of GetIndex from MMU +; and its unpleasant LFSR pseudo-random sequence +; +; r1 = TLBPD0 from TLB_RELOAD above +; +; -- jh_ex_way_set not cleared on startup +; didn't want to change setup.c +; hence extra instruction to clean +; +; -- should be in cache since in same line +; as r0/r1 saves above +; +ld r0,[jh_ex_way_sel] ; victim pointer +and r0,r0,1 ; clean +xor.f r0,r0,1 ; flip +st r0,[jh_ex_way_sel] ; store back +asr r0,r1,12 ; get set # <<1, note bit 12=R=0 +or.nz r0,r0,1 ; set way bit +and r0,r0,0xff ; clean +sr r0,[ARC_REG_TLBINDEX] +#endif + +#ifdef JH_HACK2 +; JH hack #2 +; Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU +; Slower in thrash case (where it matters) because more code is executed +; Inefficient due to two-register paradigm of this miss handler +; +/* r1 = data TLBPD0 at this point */ +lr r0,[eret] /* instruction address */ +xor r0,r0,r1 /* compare set # */ +and.f r0,r0,0x000fe000 /* 2-way MMU mask */ +bne 88f /* not in same set - no need to probe */ + +lr r0,[eret] /* instruction address */ +and r0,r0,PAGE_MASK /* VPN of instruction address */ +; lr r1,[ARC_REG_TLBPD0] /* Data VPN+ASID - already in r1 from TLB_RELOAD*/ +and r1,r1,0xff /* Data ASID */ +or r0,r0,r1 /* Instruction address + Data ASID */ + +lr r1,[ARC_REG_TLBPD0] /* save TLBPD0 containing data TLB*/ +sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */ +sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */ +lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */ +sr r1,[ARC_REG_TLBPD0] /* restore TLBPD0 */ + +xor r0,r0,1 /* flip bottom bit of data index */ +b.d 89f +sr r0,[ARC_REG_TLBINDEX] /* and put it back */ +88: +sr TLBGetIndex, [ARC_REG_TLBCOMMAND] +89: +#endif + +#ifdef JH_HACK1 +; +; Always checks whether instruction will be kicked out by dtlb miss +; +mov_s r3, r1 ; save PD0 prepared by TLB_RELOAD in r3 +lr r0,[eret] /* instruction address */ +and r0,r0,PAGE_MASK /* VPN of instruction address */ +bmsk r1,r3,7 /* Data ASID, bits 7-0 */ +or_s r0,r0,r1 /* Instruction address + Data ASID */ + +sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */ +sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */ +lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */ +sr r3,[ARC_REG_TLBPD0] /* restore TLBPD0 */ + +sr TLBGetIndex, [ARC_REG_TLBCOMMAND] +lr r1,[ARC_REG_TLBINDEX] /* r1 = index where MMU wants to put data */ +cmp r0,r1 /* if no match on indices, go around */ +xor.eq r1,r1,1 /* flip bottom bit of data index */ +sr r1,[ARC_REG_TLBINDEX] /* and put it back */ +#endif + +.endm + +#endif + +#endif diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h new file mode 100644 index 000000000000..b571e121929b --- /dev/null +++ b/arch/arc/include/asm/tlb.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_ARC_TLB_H +#define _ASM_ARC_TLB_H + +#ifdef __KERNEL__ + +#include + +/* Masks for actual TLB "PD"s */ +#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT) +#define PTE_BITS_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE | \ + _PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \ + _PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ) + +#ifndef __ASSEMBLY__ + +#include +#include + +#ifdef CONFIG_ARC_DBG_TLB_PARANOIA +void tlb_paranoid_check(unsigned int pid_sw, unsigned long address); +#else +#define tlb_paranoid_check(a, b) +#endif + +void arc_mmu_init(void); +extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); +void __init read_decode_mmu_bcr(void); + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_ARC_TLB_H */ diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index f1edae2410a7..404e5be4f704 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -21,3 +21,270 @@ int asid_cache = FIRST_ASID; * see get_new_mmu_context (asm-arc/mmu_context.h) */ struct mm_struct *asid_mm_map[NUM_ASID + 1]; + + +/* + * Routine to create a TLB entry + */ +void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +{ + unsigned long flags; + unsigned int idx, asid_or_sasid; + unsigned long pd0_flags; + + /* + * create_tlb() assumes that current->mm == vma->mm, since + * -it ASID for TLB entry is fetched from MMU ASID reg (valid for curr) + * -completes the lazy write to SASID reg (again valid for curr tsk) + * + * Removing the assumption involves + * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg. + * -Fix the TLB paranoid debug code to not trigger false negatives. + * -More importantly it makes this handler inconsistent with fast-path + * TLB Refill handler which always deals with "current" + * + * Lets see the use cases when current->mm != vma->mm and we land here + * 1. execve->copy_strings()->__get_user_pages->handle_mm_fault + * Here VM wants to pre-install a TLB entry for user stack while + * current->mm still points to pre-execve mm (hence the condition). + * However the stack vaddr is soon relocated (randomization) and + * move_page_tables() tries to undo that TLB entry. + * Thus not creating TLB entry is not any worse. + * + * 2. ptrace(POKETEXT) causes a CoW - debugger(current) inserting a + * breakpoint in debugged task. Not creating a TLB now is not + * performance critical. + * + * Both the cases above are not good enough for code churn. + */ + if (current->active_mm != vma->vm_mm) + return; + + local_irq_save(flags); + + tlb_paranoid_check(vma->vm_mm->context.asid, address); + + address &= PAGE_MASK; + + /* update this PTE credentials */ + pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); + + /* Create HW TLB entry Flags (in PD0) from PTE Flags */ +#if (CONFIG_ARC_MMU_VER <= 2) + pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0) >> 1); +#else + pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0)); +#endif + + /* ASID for this task */ + asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; + + write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid); + + /* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */ + write_aux_reg(ARC_REG_TLBPD1, (pte_val(*ptep) & PTE_BITS_IN_PD1)); + + /* First verify if entry for this vaddr+ASID already exists */ + write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); + idx = read_aux_reg(ARC_REG_TLBINDEX); + + /* + * If Not already present get a free slot from MMU. + * Otherwise, Probe would have located the entry and set INDEX Reg + * with existing location. This will cause Write CMD to over-write + * existing entry with new PD0 and PD1 + */ + if (likely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); + + /* + * Commit the Entry to MMU + * It doesnt sound safe to use the TLBWriteNI cmd here + * which doesn't flush uTLBs. I'd rather be safe than sorry. + */ + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + + local_irq_restore(flags); +} + +/* arch hook called by core VM at the end of handle_mm_fault( ), + * when a new PTE is entered in Page Tables or an existing one + * is modified. We aggresively pre-install a TLB entry + */ + +void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddress, + pte_t *ptep) +{ + + create_tlb(vma, vaddress, ptep); +} + +/* Read the Cache Build Confuration Registers, Decode them and save into + * the cpuinfo structure for later use. + * No Validation is done here, simply read/convert the BCRs + */ +void __init read_decode_mmu_bcr(void) +{ + unsigned int tmp; + struct bcr_mmu_1_2 *mmu2; /* encoded MMU2 attr */ + struct bcr_mmu_3 *mmu3; /* encoded MMU3 attr */ + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + + tmp = read_aux_reg(ARC_REG_MMU_BCR); + mmu->ver = (tmp >> 24); + + if (mmu->ver <= 2) { + mmu2 = (struct bcr_mmu_1_2 *)&tmp; + mmu->pg_sz = PAGE_SIZE; + mmu->sets = 1 << mmu2->sets; + mmu->ways = 1 << mmu2->ways; + mmu->u_dtlb = mmu2->u_dtlb; + mmu->u_itlb = mmu2->u_itlb; + } else { + mmu3 = (struct bcr_mmu_3 *)&tmp; + mmu->pg_sz = 512 << mmu3->pg_sz; + mmu->sets = 1 << mmu3->sets; + mmu->ways = 1 << mmu3->ways; + mmu->u_dtlb = mmu3->u_dtlb; + mmu->u_itlb = mmu3->u_itlb; + } + + mmu->num_tlb = mmu->sets * mmu->ways; +} + +void __init arc_mmu_init(void) +{ + /* + * ASID mgmt data structures are compile time init + * asid_cache = FIRST_ASID and asid_mm_map[] all zeroes + */ + + local_flush_tlb_all(); + + /* Enable the MMU */ + write_aux_reg(ARC_REG_PID, MMU_ENABLE); +} + +/* + * TLB Programmer's Model uses Linear Indexes: 0 to {255, 511} for 128 x {2,4} + * The mapping is Column-first. + * --------------------- ----------- + * |way0|way1|way2|way3| |way0|way1| + * --------------------- ----------- + * [set0] | 0 | 1 | 2 | 3 | | 0 | 1 | + * [set1] | 4 | 5 | 6 | 7 | | 2 | 3 | + * ~ ~ ~ ~ + * [set127] | 508| 509| 510| 511| | 254| 255| + * --------------------- ----------- + * For normal operations we don't(must not) care how above works since + * MMU cmd getIndex(vaddr) abstracts that out. + * However for walking WAYS of a SET, we need to know this + */ +#define SET_WAY_TO_IDX(mmu, set, way) ((set) * mmu->ways + (way)) + +/* Handling of Duplicate PD (TLB entry) in MMU. + * -Could be due to buggy customer tapeouts or obscure kernel bugs + * -MMU complaints not at the time of duplicate PD installation, but at the + * time of lookup matching multiple ways. + * -Ideally these should never happen - but if they do - workaround by deleting + * the duplicate one. + * -Knob to be verbose abt it.(TODO: hook them up to debugfs) + */ +volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */ + +void do_tlb_overlap_fault(unsigned long cause, unsigned long address, + struct pt_regs *regs) +{ + int set, way, n; + unsigned int pd0[4], pd1[4]; /* assume max 4 ways */ + unsigned long flags, is_valid; + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + + local_irq_save(flags); + + /* re-enable the MMU */ + write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID)); + + /* loop thru all sets of TLB */ + for (set = 0; set < mmu->sets; set++) { + + /* read out all the ways of current set */ + for (way = 0, is_valid = 0; way < mmu->ways; way++) { + write_aux_reg(ARC_REG_TLBINDEX, + SET_WAY_TO_IDX(mmu, set, way)); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); + pd0[way] = read_aux_reg(ARC_REG_TLBPD0); + pd1[way] = read_aux_reg(ARC_REG_TLBPD1); + is_valid |= pd0[way] & _PAGE_PRESENT; + } + + /* If all the WAYS in SET are empty, skip to next SET */ + if (!is_valid) + continue; + + /* Scan the set for duplicate ways: needs a nested loop */ + for (way = 0; way < mmu->ways; way++) { + if (!pd0[way]) + continue; + + for (n = way + 1; n < mmu->ways; n++) { + if ((pd0[way] & PAGE_MASK) == + (pd0[n] & PAGE_MASK)) { + + if (dup_pd_verbose) { + pr_info("Duplicate PD's @" + "[%d:%d]/[%d:%d]\n", + set, way, set, n); + pr_info("TLBPD0[%u]: %08x\n", + way, pd0[way]); + } + + /* + * clear entry @way and not @n. This is + * critical to our optimised loop + */ + pd0[way] = pd1[way] = 0; + write_aux_reg(ARC_REG_TLBINDEX, + SET_WAY_TO_IDX(mmu, set, way)); + __tlb_entry_erase(); + } + } + } + } + + local_irq_restore(flags); +} + +/*********************************************************************** + * Diagnostic Routines + * -Called from Low Level TLB Hanlders if things don;t look good + **********************************************************************/ + +#ifdef CONFIG_ARC_DBG_TLB_PARANOIA + +/* + * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS + * don't match + */ +void print_asid_mismatch(int is_fast_path) +{ + int pid_sw, pid_hw; + pid_sw = current->active_mm->context.asid; + pid_hw = read_aux_reg(ARC_REG_PID) & 0xff; + + pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n", + is_fast_path ? "Fast" : "Slow", pid_sw, pid_hw); + + __asm__ __volatile__("flag 1"); +} + +void tlb_paranoid_check(unsigned int pid_sw, unsigned long addr) +{ + unsigned int pid_hw; + + pid_hw = read_aux_reg(ARC_REG_PID) & 0xff; + + if (addr < 0x70000000 && ((pid_hw != pid_sw) || (pid_sw == NO_ASID))) + print_asid_mismatch(0); +} +#endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S new file mode 100644 index 000000000000..fc5b97129e55 --- /dev/null +++ b/arch/arc/mm/tlbex.S @@ -0,0 +1,351 @@ +/* + * TLB Exception Handling for ARC + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Vineetg: April 2011 : + * -MMU v1: moved out legacy code into a seperate file + * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, + * helps avoid a shift when preparing PD0 from PTE + * + * Vineetg: July 2009 + * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB + * entry, so that it doesn't knock out it's I-TLB entry + * -Some more fine tuning: + * bmsk instead of add, asl.cc instead of branch, delay slot utilise etc + * + * Vineetg: July 2009 + * -Practically rewrote the I/D TLB Miss handlers + * Now 40 and 135 instructions a peice as compared to 131 and 449 resp. + * Hence Leaner by 1.5 K + * Used Conditional arithmetic to replace excessive branching + * Also used short instructions wherever possible + * + * Vineetg: Aug 13th 2008 + * -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing + * more information in case of a Fatality + * + * Vineetg: March 25th Bug #92690 + * -Added Debug Code to check if sw-ASID == hw-ASID + + * Rahul Trivedi, Amit Bhor: Codito Technologies 2004 + */ + + .cpu A7 + +#include +#include +#include +#include +#include +#include +#include +#if (CONFIG_ARC_MMU_VER == 1) +#include +#endif + +;-------------------------------------------------------------------------- +; scratch memory to save the registers (r0-r3) used to code TLB refill Handler +; For details refer to comments before TLBMISS_FREEUP_REGS below +;-------------------------------------------------------------------------- + + .section .data + .global ex_saved_reg1 + .align 1 << L1_CACHE_SHIFT ; IMP: Must be Cache Line aligned + .type ex_saved_reg1, @object + .size ex_saved_reg1, 16 +ex_saved_reg1: + .zero 16 + +;============================================================================ +; Troubleshooting Stuff +;============================================================================ + +; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid +; When Creating TLB Entries, instead of doing 3 dependent loads from memory, +; we use the MMU PID Reg to get current ASID. +; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. +; So we try to detect this in TLB Mis shandler + + +.macro DBG_ASID_MISMATCH + +#ifdef CONFIG_ARC_DBG_TLB_PARANOIA + + ; make sure h/w ASID is same as s/w ASID + + GET_CURR_TASK_ON_CPU r3 + ld r0, [r3, TASK_ACT_MM] + ld r0, [r0, MM_CTXT+MM_CTXT_ASID] + + lr r1, [ARC_REG_PID] + and r1, r1, 0xFF + breq r1, r0, 5f + + ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode + lr r0, [erstatus] + bbit0 r0, STATUS_U_BIT, 5f + + ; We sure are in troubled waters, Flag the error, but to do so + ; need to switch to kernel mode stack to call error routine + GET_TSK_STACK_BASE r3, sp + + ; Call printk to shoutout aloud + mov r0, 1 + j print_asid_mismatch + +5: ; ASIDs match so proceed normally + nop + +#endif + +.endm + +;============================================================================ +;TLB Miss handling Code +;============================================================================ + +;----------------------------------------------------------------------------- +; This macro does the page-table lookup for the faulting address. +; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address +.macro LOAD_FAULT_PTE + + lr r2, [efa] + + lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd + + lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD + ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr + and.f r1, r1, PAGE_MASK ; Ignoring protection and other flags + ; contains Ptr to Page Table + bz.d do_slow_path_pf ; if no Page Table, do page fault + + ; Get the PTE entry: The idea is + ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr + ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index + ; (3) z = pgtbl[y] + ; To avoid the multiply by in end, we do the -2, <<2 below + + lsr r0, r2, (PAGE_SHIFT - 2) + and r0, r0, ( (PTRS_PER_PTE - 1) << 2) + ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr + +.endm + +;----------------------------------------------------------------- +; Convert Linux PTE entry into TLB entry +; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu +; IN: r0 = PTE, r1 = ptr to PTE + +.macro CONV_PTE_TO_TLB + and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE + sr r3, [ARC_REG_TLBPD1] ; these go in PD1 + + and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb +#if (CONFIG_ARC_MMU_VER <= 2) /* Neednot be done with v3 onwards */ + lsr r2, r2 ; shift PTE flags to match layout in PD0 +#endif + + lr r3,[ARC_REG_TLBPD0] ; MMU prepares PD0 with vaddr and asid + + or r3, r3, r2 ; S | vaddr | {sasid|asid} + sr r3,[ARC_REG_TLBPD0] ; rewrite PD0 +.endm + +;----------------------------------------------------------------- +; Commit the TLB entry into MMU + +.macro COMMIT_ENTRY_TO_MMU + + /* Get free TLB slot: Set = computed from vaddr, way = random */ + sr TLBGetIndex, [ARC_REG_TLBCOMMAND] + + /* Commit the Write */ +#if (CONFIG_ARC_MMU_VER >= 2) /* introduced in v2 */ + sr TLBWriteNI, [ARC_REG_TLBCOMMAND] +#else + sr TLBWrite, [ARC_REG_TLBCOMMAND] +#endif +.endm + +;----------------------------------------------------------------- +; ARC700 Exception Handling doesn't auto-switch stack and it only provides +; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" +; +; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a +; "global" is used to free-up FIRST core reg to be able to code the rest of +; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). +; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 +; need to be saved as well by extending the "global" to be 4 words. Hence +; ".size ex_saved_reg1, 16" +; [All of this dance is to avoid stack switching for each TLB Miss, since we +; only need to save only a handful of regs, as opposed to complete reg file] + +; As simple as that.... + +.macro TLBMISS_FREEUP_REGS + st r0, [@ex_saved_reg1] + mov_s r0, @ex_saved_reg1 + st_s r1, [r0, 4] + st_s r2, [r0, 8] + st_s r3, [r0, 12] + + ; VERIFY if the ASID in MMU-PID Reg is same as + ; one in Linux data structures + + DBG_ASID_MISMATCH +.endm + +;----------------------------------------------------------------- +.macro TLBMISS_RESTORE_REGS + mov_s r0, @ex_saved_reg1 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + ld_s r0, [r0] +.endm + +.section .text, "ax",@progbits ;Fast Path Code, candidate for ICCM + +;----------------------------------------------------------------------------- +; I-TLB Miss Exception Handler +;----------------------------------------------------------------------------- + +ARC_ENTRY EV_TLBMissI + + TLBMISS_FREEUP_REGS + + ;---------------------------------------------------------------- + ; Get the PTE corresponding to V-addr accessed + LOAD_FAULT_PTE + + ;---------------------------------------------------------------- + ; VERIFY_PTE: Check if PTE permissions approp for executing code + cmp_s r2, VMALLOC_START + mov.lo r2, (_PAGE_PRESENT | _PAGE_READ | _PAGE_EXECUTE) + mov.hs r2, (_PAGE_PRESENT | _PAGE_K_READ | _PAGE_K_EXECUTE) + + and r3, r0, r2 ; Mask out NON Flag bits from PTE + xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test ) + bnz do_slow_path_pf + + ; Let Linux VM know that the page was accessed + or r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; set Accessed Bit + st_s r0, [r1] ; Write back PTE + + CONV_PTE_TO_TLB + COMMIT_ENTRY_TO_MMU + TLBMISS_RESTORE_REGS + rtie + +ARC_EXIT EV_TLBMissI + +;----------------------------------------------------------------------------- +; D-TLB Miss Exception Handler +;----------------------------------------------------------------------------- + +ARC_ENTRY EV_TLBMissD + + TLBMISS_FREEUP_REGS + + ;---------------------------------------------------------------- + ; Get the PTE corresponding to V-addr accessed + ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE + LOAD_FAULT_PTE + + ;---------------------------------------------------------------- + ; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W) + + mov_s r2, 0 + lr r3, [ecr] + btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access + or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE + btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access + or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE + ; Above laddering takes care of XCHG access + ; which is both Read and Write + + ; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx + ; For copy_(to|from)_user, despite exception taken in kernel mode, + ; this code is not hit, because EFA would still be the user mode + ; address (EFA < 0x6000_0000). + ; This code is for legit kernel mode faults, vmalloc specifically + ; (EFA: 0x7000_0000 to 0x7FFF_FFFF) + + lr r3, [efa] + cmp r3, VMALLOC_START - 1 ; If kernel mode access + asl.hi r2, r2, 3 ; make _PAGE_xx flags as _PAGE_K_xx + or r2, r2, _PAGE_PRESENT ; Common flag for K/U mode + + ; By now, r2 setup with all the Flags we need to check in PTE + and r3, r0, r2 ; Mask out NON Flag bits from PTE + brne.d r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test) + + ;---------------------------------------------------------------- + ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty + lr r3, [ecr] + or r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; Accessed bit always + btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? + or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well + st_s r0, [r1] ; Write back PTE + + CONV_PTE_TO_TLB + +#if (CONFIG_ARC_MMU_VER == 1) + ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of + ; memcpy where 3 parties contend for 2 ways, ensuing a livelock. + ; But only for old MMU or one with Metal Fix + TLB_WRITE_HEURISTICS +#endif + + COMMIT_ENTRY_TO_MMU + TLBMISS_RESTORE_REGS + rtie + +;-------- Common routine to call Linux Page Fault Handler ----------- +do_slow_path_pf: + + ; Restore the 4-scratch regs saved by fast path miss handler + TLBMISS_RESTORE_REGS + + ; Slow path TLB Miss handled as a regular ARC Exception + ; (stack switching / save the complete reg-file). + ; That requires freeing up r9 + EXCPN_PROLOG_FREEUP_REG r9 + + lr r9, [erstatus] + + SWITCH_TO_KERNEL_STK + SAVE_ALL_SYS + + ; ------- setup args for Linux Page fault Hanlder --------- + mov_s r0, sp + lr r2, [efa] + lr r3, [ecr] + + ; Both st and ex imply WRITE access of some sort, hence do_page_fault( ) + ; invoked with write=1 for DTLB-st/ex Miss and write=0 for ITLB miss or + ; DTLB-ld Miss + ; DTLB Miss Cause code is ld = 0x01 , st = 0x02, ex = 0x03 + ; Following code uses that fact that st/ex have one bit in common + + btst_s r3, ECR_C_BIT_DTLB_ST_MISS + mov.z r1, 0 + mov.nz r1, 1 + + ; We don't want exceptions to be disabled while the fault is handled. + ; Now that we have saved the context we return from exception hence + ; exceptions get re-enable + + FAKE_RET_FROM_EXCPN r9 + + bl do_page_fault + b ret_from_exception + +ARC_EXIT EV_TLBMissD + +ARC_ENTRY EV_TLBMissB ; Bogus entry to measure sz of DTLBMiss hdlr -- cgit v1.2.3 From d79e678d746d3d4234477f08ce7d27d55ebe283a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Jan 2013 15:12:20 +0530 Subject: ARC: TLB flush Handling Signed-off-by: Vineet Gupta --- arch/arc/include/asm/tlb.h | 17 +++ arch/arc/include/asm/tlbflush.h | 28 ++++ arch/arc/mm/tlb.c | 311 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 356 insertions(+) create mode 100644 arch/arc/include/asm/tlbflush.h (limited to 'arch/arc/mm/tlb.c') diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h index b571e121929b..3eb2ce0bdfa3 100644 --- a/arch/arc/include/asm/tlb.h +++ b/arch/arc/include/asm/tlb.h @@ -21,6 +21,23 @@ #ifndef __ASSEMBLY__ +#define tlb_flush(tlb) local_flush_tlb_mm((tlb)->mm) + +/* + * This pair is called at time of munmap/exit to flush cache and TLB entries + * for mappings being torn down. + * 1) cache-flush part -implemented via tlb_start_vma( ) can be NOP (for now) + * as we don't support aliasing configs in our VIPT D$. + * 2) tlb-flush part - implemted via tlb_end_vma( ) can be NOP as well- + * albiet for difft reasons - its better handled by moving to new ASID + * + * Note, read http://lkml.org/lkml/2004/1/15/6 + */ +#define tlb_start_vma(tlb, vma) +#define tlb_end_vma(tlb, vma) + +#define __tlb_remove_tlb_entry(tlb, ptep, address) + #include #include diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h new file mode 100644 index 000000000000..b2f9bc7f68c8 --- /dev/null +++ b/arch/arc/include/asm/tlbflush.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_ARC_TLBFLUSH__ +#define __ASM_ARC_TLBFLUSH__ + +#include + +void local_flush_tlb_all(void); +void local_flush_tlb_mm(struct mm_struct *mm); +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); +void local_flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end); + +/* XXX: Revisit for SMP */ +#define flush_tlb_range(vma, s, e) local_flush_tlb_range(vma, s, e) +#define flush_tlb_page(vma, page) local_flush_tlb_page(vma, page) +#define flush_tlb_kernel_range(s, e) local_flush_tlb_kernel_range(s, e) +#define flush_tlb_all() local_flush_tlb_all() +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) + +#endif diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 404e5be4f704..232a0ff80a5e 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -6,13 +6,97 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * + * vineetg: Aug 2011 + * -Reintroduce duplicate PD fixup - some customer chips still have the issue + * + * vineetg: May 2011 + * -No need to flush_cache_page( ) for each call to update_mmu_cache() + * some of the LMBench tests improved amazingly + * = page-fault thrice as fast (75 usec to 28 usec) + * = mmap twice as fast (9.6 msec to 4.6 msec), + * = fork (5.3 msec to 3.7 msec) + * + * vineetg: April 2011 : + * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, + * helps avoid a shift when preparing PD0 from PTE + * + * vineetg: April 2011 : Preparing for MMU V3 + * -MMU v2/v3 BCRs decoded differently + * -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512 + * -tlb_entry_erase( ) can be void + * -local_flush_tlb_range( ): + * = need not "ceil" @end + * = walks MMU only if range spans < 32 entries, as opposed to 256 + * + * Vineetg: Sept 10th 2008 + * -Changes related to MMU v2 (Rel 4.8) + * + * Vineetg: Aug 29th 2008 + * -In TLB Flush operations (Metal Fix MMU) there is a explict command to + * flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd, + * it fails. Thus need to load it with ANY valid value before invoking + * TLBIVUTLB cmd + * + * Vineetg: Aug 21th 2008: + * -Reduced the duration of IRQ lockouts in TLB Flush routines + * -Multiple copies of TLB erase code seperated into a "single" function + * -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID + * in interrupt-safe region. + * + * Vineetg: April 23rd Bug #93131 + * Problem: tlb_flush_kernel_range() doesnt do anything if the range to + * flush is more than the size of TLB itself. + * + * Rahul Trivedi : Codito Technologies 2004 */ #include #include +#include #include #include +/* Need for ARC MMU v2 + * + * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc. + * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages + * map into same set, there would be contention for the 2 ways causing severe + * Thrashing. + * + * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has + * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways. + * Given this, the thrasing problem should never happen because once the 3 + * J-TLB entries are created (even though 3rd will knock out one of the prev + * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy + * + * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs. + * This is a simple design for keeping them in sync. So what do we do? + * The solution which James came up was pretty neat. It utilised the assoc + * of uTLBs by not invalidating always but only when absolutely necessary. + * + * - Existing TLB commands work as before + * - New command (TLBWriteNI) for TLB write without clearing uTLBs + * - New command (TLBIVUTLB) to invalidate uTLBs. + * + * The uTLBs need only be invalidated when pages are being removed from the + * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB + * as a result of a miss, the removed entry is still allowed to exist in the + * uTLBs as it is still valid and present in the OS page table. This allows the + * full associativity of the uTLBs to hide the limited associativity of the main + * TLB. + * + * During a miss handler, the new "TLBWriteNI" command is used to load + * entries without clearing the uTLBs. + * + * When the OS page table is updated, TLB entries that may be associated with a + * removed page are removed (flushed) from the TLB using TLBWrite. In this + * circumstance, the uTLBs must also be cleared. This is done by using the + * existing TLBWrite command. An explicit IVUTLB is also required for those + * corner cases when TLBWrite was not executed at all because the corresp + * J-TLB entry got evicted/replaced. + */ + /* A copy of the ASID from the PID reg is kept in asid_cache */ int asid_cache = FIRST_ASID; @@ -22,6 +106,233 @@ int asid_cache = FIRST_ASID; */ struct mm_struct *asid_mm_map[NUM_ASID + 1]; +/* + * Utility Routine to erase a J-TLB entry + * The procedure is to look it up in the MMU. If found, ERASE it by + * issuing a TlbWrite CMD with PD0 = PD1 = 0 + */ + +static void __tlb_entry_erase(void) +{ + write_aux_reg(ARC_REG_TLBPD1, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + unsigned int idx; + + /* Locate the TLB entry for this vaddr + ASID */ + write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); + idx = read_aux_reg(ARC_REG_TLBINDEX); + + /* No error means entry found, zero it out */ + if (likely(!(idx & TLB_LKUP_ERR))) { + __tlb_entry_erase(); + } else { /* Some sort of Error */ + + /* Duplicate entry error */ + if (idx & 0x1) { + /* TODO we need to handle this case too */ + pr_emerg("unhandled Duplicate flush for %x\n", + vaddr_n_asid); + } + /* else entry not found so nothing to do */ + } +} + +/**************************************************************************** + * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs) + * + * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB + * + * utlb_invalidate ( ) + * -For v2 MMU calls Flush uTLB Cmd + * -For v1 MMU does nothing (except for Metal Fix v1 MMU) + * This is because in v1 TLBWrite itself invalidate uTLBs + ***************************************************************************/ + +static void utlb_invalidate(void) +{ +#if (CONFIG_ARC_MMU_VER >= 2) + +#if (CONFIG_ARC_MMU_VER < 3) + /* MMU v2 introduced the uTLB Flush command. + * There was however an obscure hardware bug, where uTLB flush would + * fail when a prior probe for J-TLB (both totally unrelated) would + * return lkup err - because the entry didnt exist in MMU. + * The Workround was to set Index reg with some valid value, prior to + * flush. This was fixed in MMU v3 hence not needed any more + */ + unsigned int idx; + + /* make sure INDEX Reg is valid */ + idx = read_aux_reg(ARC_REG_TLBINDEX); + + /* If not write some dummy val */ + if (unlikely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBINDEX, 0xa); +#endif + + write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); +#endif + +} + +/* + * Un-conditionally (without lookup) erase the entire MMU contents + */ + +noinline void local_flush_tlb_all(void) +{ + unsigned long flags; + unsigned int entry; + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + + local_irq_save(flags); + + /* Load PD0 and PD1 with template for a Blank Entry */ + write_aux_reg(ARC_REG_TLBPD1, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); + + for (entry = 0; entry < mmu->num_tlb; entry++) { + /* write this entry to the TLB */ + write_aux_reg(ARC_REG_TLBINDEX, entry); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + } + + utlb_invalidate(); + + local_irq_restore(flags); +} + +/* + * Flush the entrie MM for userland. The fastest way is to move to Next ASID + */ +noinline void local_flush_tlb_mm(struct mm_struct *mm) +{ + /* + * Small optimisation courtesy IA64 + * flush_mm called during fork,exit,munmap etc, multiple times as well. + * Only for fork( ) do we need to move parent to a new MMU ctxt, + * all other cases are NOPs, hence this check. + */ + if (atomic_read(&mm->mm_users) == 0) + return; + + /* + * Workaround for Android weirdism: + * A binder VMA could end up in a task such that vma->mm != tsk->mm + * old code would cause h/w - s/w ASID to get out of sync + */ + if (current->mm != mm) + destroy_context(mm); + else + get_new_mmu_context(mm); +} + +/* + * Flush a Range of TLB entries for userland. + * @start is inclusive, while @end is exclusive + * Difference between this and Kernel Range Flush is + * -Here the fastest way (if range is too large) is to move to next ASID + * without doing any explicit Shootdown + * -In case of kernel Flush, entry has to be shot down explictly + */ +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned long flags; + unsigned int asid; + + /* If range @start to @end is more than 32 TLB entries deep, + * its better to move to a new ASID rather than searching for + * individual entries and then shooting them down + * + * The calc above is rough, doesn't account for unaligned parts, + * since this is heuristics based anyways + */ + if (unlikely((end - start) >= PAGE_SIZE * 32)) { + local_flush_tlb_mm(vma->vm_mm); + return; + } + + /* + * @start moved to page start: this alone suffices for checking + * loop end condition below, w/o need for aligning @end to end + * e.g. 2000 to 4001 will anyhow loop twice + */ + start &= PAGE_MASK; + + local_irq_save(flags); + asid = vma->vm_mm->context.asid; + + if (asid != NO_ASID) { + while (start < end) { + tlb_entry_erase(start | (asid & 0xff)); + start += PAGE_SIZE; + } + } + + utlb_invalidate(); + + local_irq_restore(flags); +} + +/* Flush the kernel TLB entries - vmalloc/modules (Global from MMU perspective) + * @start, @end interpreted as kvaddr + * Interestingly, shared TLB entries can also be flushed using just + * @start,@end alone (interpreted as user vaddr), although technically SASID + * is also needed. However our smart TLbProbe lookup takes care of that. + */ +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + + /* exactly same as above, except for TLB entry not taking ASID */ + + if (unlikely((end - start) >= PAGE_SIZE * 32)) { + local_flush_tlb_all(); + return; + } + + start &= PAGE_MASK; + + local_irq_save(flags); + while (start < end) { + tlb_entry_erase(start); + start += PAGE_SIZE; + } + + utlb_invalidate(); + + local_irq_restore(flags); +} + +/* + * Delete TLB entry in MMU for a given page (??? address) + * NOTE One TLB entry contains translation for single PAGE + */ + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + unsigned long flags; + + /* Note that it is critical that interrupts are DISABLED between + * checking the ASID and using it flush the TLB entry + */ + local_irq_save(flags); + + if (vma->vm_mm->context.asid != NO_ASID) { + tlb_entry_erase((page & PAGE_MASK) | + (vma->vm_mm->context.asid & 0xff)); + utlb_invalidate(); + } + + local_irq_restore(flags); +} /* * Routine to create a TLB entry -- cgit v1.2.3 From 41195d236e84458bebd4fdc218610a92231ac791 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Jan 2013 15:12:23 +0530 Subject: ARC: SMP support ARC common code to enable a SMP system + ISS provided SMP extensions. ARC700 natively lacks SMP support, hence some of the core features are are only enabled if SoCs have the necessary h/w pixie-dust. This includes: -Inter Processor Interrupts (IPI) -Cache coherency -load-locked/store-conditional ... The low level exception handling would be completely broken in SMP because we don't have hardware assisted stack switching. Thus a fair bit of this code is repurposing the MMU_SCRATCH reg for event handler prologues to keep them re-entrant. Many thanks to Rajeshwar Ranga for his initial "major" contributions to SMP Port (back in 2008), and to Noam Camus and Gilad Ben-Yossef for help with resurrecting that in 3.2 kernel (2012). Note that this platform code is again singleton design pattern - so multiple SMP platforms won't build at the moment - this deficiency is addressed in subsequent patches within this series. Signed-off-by: Vineet Gupta Cc: Arnd Bergmann Cc: Thomas Gleixner Cc: Rajeshwar Ranga Cc: Noam Camus Cc: Gilad Ben-Yossef --- arch/arc/Kconfig | 39 +++- arch/arc/Makefile | 3 + arch/arc/include/asm/entry.h | 49 +++++ arch/arc/include/asm/mmu_context.h | 4 + arch/arc/include/asm/mutex.h | 9 + arch/arc/include/asm/pgtable.h | 4 + arch/arc/include/asm/processor.h | 8 + arch/arc/include/asm/smp.h | 107 +++++++++++ arch/arc/kernel/Makefile | 1 + arch/arc/kernel/ctx_sw.c | 11 ++ arch/arc/kernel/entry.S | 4 + arch/arc/kernel/head.S | 33 ++++ arch/arc/kernel/irq.c | 5 + arch/arc/kernel/setup.c | 4 + arch/arc/kernel/smp.c | 320 +++++++++++++++++++++++++++++++ arch/arc/mm/tlb.c | 6 + arch/arc/mm/tlbex.S | 38 ++++ arch/arc/plat-arcfpga/Kconfig | 14 ++ arch/arc/plat-arcfpga/Makefile | 1 + arch/arc/plat-arcfpga/include/plat/irq.h | 10 +- arch/arc/plat-arcfpga/include/plat/smp.h | 115 +++++++++++ arch/arc/plat-arcfpga/irq.c | 10 + arch/arc/plat-arcfpga/smp.c | 167 ++++++++++++++++ 23 files changed, 960 insertions(+), 2 deletions(-) create mode 100644 arch/arc/kernel/smp.c create mode 100644 arch/arc/plat-arcfpga/include/plat/smp.h create mode 100644 arch/arc/plat-arcfpga/smp.c (limited to 'arch/arc/mm/tlb.c') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 68350aa3d297..52f5c072f6da 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -116,9 +116,42 @@ config CPU_BIG_ENDIAN help Build kernel for Big Endian Mode of ARC CPU +config SMP + bool "Symmetric Multi-Processing (Incomplete)" + default n + select USE_GENERIC_SMP_HELPERS + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, like most personal computers, say N. If + you have a system with more than one CPU, say Y. + +if SMP + +config ARC_HAS_COH_CACHES + def_bool n + +config ARC_HAS_COH_LLSC + def_bool n + +config ARC_HAS_COH_RTSC + def_bool n + +config ARC_HAS_REENTRANT_IRQ_LV2 + def_bool n + +endif + +config NR_CPUS + int "Maximum number of CPUs (2-32)" + range 2 32 + depends on SMP + default "2" + menuconfig ARC_CACHE bool "Enable Cache Support" default y + # if SMP, cache enabled ONLY if ARC implementation has cache coherency + depends on !SMP || ARC_HAS_COH_CACHES if ARC_CACHE @@ -213,6 +246,8 @@ config ARC_COMPACT_IRQ_LEVELS default n # Timer HAS to be high priority, for any other high priority config select ARC_IRQ3_LV2 + # if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy + depends on !SMP || ARC_HAS_REENTRANT_IRQ_LV2 if ARC_COMPACT_IRQ_LEVELS @@ -261,6 +296,8 @@ config ARC_HAS_RTSC bool "Insn: RTSC (64-bit r/o cycle counter)" default y depends on ARC_CPU_REL_4_10 + # if SMP, enable RTSC only if counter is coherent across cores + depends on !SMP || ARC_HAS_COH_RTSC endmenu # "ARC CPU Configuration" @@ -309,7 +346,7 @@ menuconfig ARC_DBG config ARC_DBG_TLB_PARANOIA bool "Paranoia Checks in Low Level TLB Handlers" - depends on ARC_DBG + depends on ARC_DBG && !SMP default n config ARC_DBG_TLB_MISS_COUNT diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 642c0406d600..fae66bfc2bdb 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -133,3 +133,6 @@ archclean: # Thus forcing all exten calls in this file to be long calls export CFLAGS_decompress_inflate.o = -mmedium-calls export CFLAGS_initramfs.o = -mmedium-calls +ifdef CONFIG_SMP +export CFLAGS_core.o = -mmedium-calls +endif diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 23ef2de1e09f..23daa326fc9b 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -389,11 +389,19 @@ * to be saved again on kernel mode stack, as part of ptregs. *-------------------------------------------------------------*/ .macro EXCPN_PROLOG_FREEUP_REG reg +#ifdef CONFIG_SMP + sr \reg, [ARC_REG_SCRATCH_DATA0] +#else st \reg, [@ex_saved_reg1] +#endif .endm .macro EXCPN_PROLOG_RESTORE_REG reg +#ifdef CONFIG_SMP + lr \reg, [ARC_REG_SCRATCH_DATA0] +#else ld \reg, [@ex_saved_reg1] +#endif .endm /*-------------------------------------------------------------- @@ -508,7 +516,11 @@ /* restore original r9 , saved in int1_saved_reg * It will be saved on stack in macro: SAVE_CALLER_SAVED */ +#ifdef CONFIG_SMP + lr r9, [ARC_REG_SCRATCH_DATA0] +#else ld r9, [@int1_saved_reg] +#endif /* now we are ready to save the remaining context :) */ st orig_r8_IS_IRQ1, [sp, 8] /* Event Type */ @@ -639,6 +651,41 @@ bmsk \reg, \reg, 7 .endm +#ifdef CONFIG_SMP + +/*------------------------------------------------- + * Retrieve the current running task on this CPU + * 1. Determine curr CPU id. + * 2. Use it to index into _current_task[ ] + */ +.macro GET_CURR_TASK_ON_CPU reg + GET_CPU_ID \reg + ld.as \reg, [@_current_task, \reg] +.endm + +/*------------------------------------------------- + * Save a new task as the "current" task on this CPU + * 1. Determine curr CPU id. + * 2. Use it to index into _current_task[ ] + * + * Coded differently than GET_CURR_TASK_ON_CPU (which uses LD.AS) + * because ST r0, [r1, offset] can ONLY have s9 @offset + * while LD can take s9 (4 byte insn) or LIMM (8 byte insn) + */ + +.macro SET_CURR_TASK_ON_CPU tsk, tmp + GET_CPU_ID \tmp + add2 \tmp, @_current_task, \tmp + st \tsk, [\tmp] +#ifdef CONFIG_ARC_CURR_IN_REG + mov r25, \tsk +#endif + +.endm + + +#else /* Uniprocessor implementation of macros */ + .macro GET_CURR_TASK_ON_CPU reg ld \reg, [@_current_task] .endm @@ -650,6 +697,8 @@ #endif .endm +#endif /* SMP / UNI */ + /* ------------------------------------------------------------------ * Get the ptr to some field of Current Task at @off in task struct * -Uses r25 for Current task ptr if that is enabled diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h index d12f3dec8b70..0d71fb11b57c 100644 --- a/arch/arc/include/asm/mmu_context.h +++ b/arch/arc/include/asm/mmu_context.h @@ -147,8 +147,10 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { +#ifndef CONFIG_SMP /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd); +#endif /* * Get a new ASID if task doesn't have a valid one. Possible when @@ -197,7 +199,9 @@ static inline void destroy_context(struct mm_struct *mm) static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { +#ifndef CONFIG_SMP write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd); +#endif /* Unconditionally get a new ASID */ get_new_mmu_context(next); diff --git a/arch/arc/include/asm/mutex.h b/arch/arc/include/asm/mutex.h index 3be5e64da139..a2f88ff9f506 100644 --- a/arch/arc/include/asm/mutex.h +++ b/arch/arc/include/asm/mutex.h @@ -6,4 +6,13 @@ * published by the Free Software Foundation. */ +/* + * xchg() based mutex fast path maintains a state of 0 or 1, as opposed to + * atomic dec based which can "count" any number of lock contenders. + * This ideally needs to be fixed in core, but for now switching to dec ver. + */ +#if defined(CONFIG_SMP) && (CONFIG_NR_CPUS > 2) +#include +#else #include +#endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index dcb0701528aa..b7e36684c091 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -354,11 +354,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * Thus use this macro only when you are certain that "current" is current * e.g. when dealing with signal frame setup code etc */ +#ifndef CONFIG_SMP #define pgd_offset_fast(mm, addr) \ ({ \ pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0); \ pgd_base + pgd_index(addr); \ }) +#else +#define pgd_offset_fast(mm, addr) pgd_offset(mm, addr) +#endif extern void paging_init(void); extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index b7b155610067..5f26b2c1cba0 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -58,7 +58,15 @@ unsigned long thread_saved_pc(struct task_struct *t); /* Prepare to copy thread state - unlazy all lazy status */ #define prepare_to_copy(tsk) do { } while (0) +/* + * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise + * get optimised away by gcc + */ +#ifdef CONFIG_SMP +#define cpu_relax() __asm__ __volatile__ ("" : : : "memory") +#else #define cpu_relax() do { } while (0) +#endif #define copy_segments(tsk, mm) do { } while (0) #define release_segments(mm) do { } while (0) diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 4341f3ba7d92..f91f1946272f 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -9,6 +9,69 @@ #ifndef __ASM_ARC_SMP_H #define __ASM_ARC_SMP_H +#ifdef CONFIG_SMP + +#include +#include +#include + +#define raw_smp_processor_id() (current_thread_info()->cpu) + +/* including cpumask.h leads to cyclic deps hence this Forward declaration */ +struct cpumask; + +/* + * APIs provided by arch SMP code to generic code + */ +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +/* + * APIs provided by arch SMP code to rest of arch code + */ +extern void __init smp_init_cpus(void); +extern void __init first_lines_of_secondary(void); + +/* + * API expected BY platform smp code (FROM arch smp code) + * + * smp_ipi_irq_setup: + * Takes @cpu and @irq to which the arch-common ISR is hooked up + */ +extern int smp_ipi_irq_setup(int cpu, int irq); + +/* + * APIs expected FROM platform smp code + * + * arc_platform_smp_cpuinfo: + * returns a string containing info for /proc/cpuinfo + * + * arc_platform_smp_init_cpu: + * Called from start_kernel_secondary to do any CPU local setup + * such as starting a timer, setting up IPI etc + * + * arc_platform_smp_wait_to_boot: + * Called from early bootup code for non-Master CPUs to "park" them + * + * arc_platform_smp_wakeup_cpu: + * Called from __cpu_up (Master CPU) to kick start another one + * + * arc_platform_ipi_send: + * Takes @cpumask to which IPI(s) would be sent. + * The actual msg-id/buffer is manager in arch-common code + * + * arc_platform_ipi_clear: + * Takes @cpu which got IPI at @irq to do any IPI clearing + */ +extern const char *arc_platform_smp_cpuinfo(void); +extern void arc_platform_smp_init_cpu(void); +extern void arc_platform_smp_wait_to_boot(int cpu); +extern void arc_platform_smp_wakeup_cpu(int cpu, unsigned long pc); +extern void arc_platform_ipi_send(const struct cpumask *callmap); +extern void arc_platform_ipi_clear(int cpu, int irq); + +#endif /* CONFIG_SMP */ + /* * ARC700 doesn't support atomic Read-Modify-Write ops. * Originally Interrupts had to be disabled around code to gaurantee atomicity. @@ -18,10 +81,52 @@ * * (1) These insn were introduced only in 4.10 release. So for older released * support needed. + * + * (2) In a SMP setup, the LLOCK/SCOND atomiticity across CPUs needs to be + * gaurantted by the platform (not something which core handles). + * Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ + * disabling for atomicity. + * + * However exported spinlock API is not usable due to cyclic hdr deps + * (even after system.h disintegration upstream) + * asm/bitops.h -> linux/spinlock.h -> linux/preempt.h + * -> linux/thread_info.h -> linux/bitops.h -> asm/bitops.h + * + * So the workaround is to use the lowest level arch spinlock API. + * The exported spinlock API is smart enough to be NOP for !CONFIG_SMP, + * but same is not true for ARCH backend, hence the need for 2 variants */ #ifndef CONFIG_ARC_HAS_LLSC #include +#ifdef CONFIG_SMP + +#include + +extern arch_spinlock_t smp_atomic_ops_lock; +extern arch_spinlock_t smp_bitops_lock; + +#define atomic_ops_lock(flags) do { \ + local_irq_save(flags); \ + arch_spin_lock(&smp_atomic_ops_lock); \ +} while (0) + +#define atomic_ops_unlock(flags) do { \ + arch_spin_unlock(&smp_atomic_ops_lock); \ + local_irq_restore(flags); \ +} while (0) + +#define bitops_lock(flags) do { \ + local_irq_save(flags); \ + arch_spin_lock(&smp_bitops_lock); \ +} while (0) + +#define bitops_unlock(flags) do { \ + arch_spin_unlock(&smp_bitops_lock); \ + local_irq_restore(flags); \ +} while (0) + +#else /* !CONFIG_SMP */ #define atomic_ops_lock(flags) local_irq_save(flags) #define atomic_ops_unlock(flags) local_irq_restore(flags) @@ -29,6 +134,8 @@ #define bitops_lock(flags) local_irq_save(flags) #define bitops_unlock(flags) local_irq_restore(flags) +#endif /* !CONFIG_SMP */ + #endif /* !CONFIG_ARC_HAS_LLSC */ #endif diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile index f32f65f98850..46c15ff97e97 100644 --- a/arch/arc/kernel/Makefile +++ b/arch/arc/kernel/Makefile @@ -13,6 +13,7 @@ obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o clk.o obj-y += devtree.o obj-$(CONFIG_MODULES) += arcksyms.o module.o +obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_ARC_FPU_SAVE_RESTORE) += fpu.o CFLAGS_fpu.o += -mdpfp diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c index fbf739cbaf7d..60844dac6132 100644 --- a/arch/arc/kernel/ctx_sw.c +++ b/arch/arc/kernel/ctx_sw.c @@ -58,7 +58,18 @@ __switch_to(struct task_struct *prev_task, struct task_struct *next_task) * For SMP extra work to get to &_current_task[cpu] * (open coded SET_CURR_TASK_ON_CPU) */ +#ifndef CONFIG_SMP "st %2, [@_current_task] \n\t" +#else + "lr r24, [identity] \n\t" + "lsr r24, r24, 8 \n\t" + "bmsk r24, r24, 7 \n\t" + "add2 r24, @_current_task, r24 \n\t" + "st %2, [r24] \n\t" +#endif +#ifdef CONFIG_ARC_CURR_IN_REG + "mov r25, %2 \n\t" +#endif /* get ksp of incoming task from tsk->thread.ksp */ "ld.as sp, [%2, %1] \n\t" diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index e33a0bf45589..3f6ce98fea11 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -232,7 +232,11 @@ ARC_EXIT handle_interrupt_level2 ARC_ENTRY handle_interrupt_level1 /* free up r9 as scratchpad */ +#ifdef CONFIG_SMP + sr r9, [ARC_REG_SCRATCH_DATA0] +#else st r9, [@int1_saved_reg] +#endif ;Which mode (user/kernel) was the system in when intr occured lr r9, [status32_l1] diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index e63f6a43abb1..006dec3fc353 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -27,6 +27,15 @@ stext: ; Don't clobber r0-r4 yet. It might have bootloader provided info ;------------------------------------------------------------------- +#ifdef CONFIG_SMP + ; Only Boot (Master) proceeds. Others wait in platform dependent way + ; IDENTITY Reg [ 3 2 1 0 ] + ; (cpu-id) ^^^ => Zero for UP ARC700 + ; => #Core-ID if SMP (Master 0) + GET_CPU_ID r5 + cmp r5, 0 + jnz arc_platform_smp_wait_to_boot +#endif ; Clear BSS before updating any globals ; XXX: use ZOL here mov r5, __bss_start @@ -76,3 +85,27 @@ stext: GET_TSK_STACK_BASE r9, sp ; r9 = tsk, sp = stack base(output) j start_kernel ; "C" entry point + +#ifdef CONFIG_SMP +;---------------------------------------------------------------- +; First lines of code run by secondary before jumping to 'C' +;---------------------------------------------------------------- + .section .init.text, "ax",@progbits + .type first_lines_of_secondary, @function + .globl first_lines_of_secondary + +first_lines_of_secondary: + + ; setup per-cpu idle task as "current" on this CPU + ld r0, [@secondary_idle_tsk] + SET_CURR_TASK_ON_CPU r0, r1 + + ; setup stack (fp, sp) + mov fp, 0 + + ; set it's stack base to tsk->thread_info bottom + GET_TSK_STACK_BASE r0, sp + + j start_kernel_secondary + +#endif diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index ca70894e2309..df7da2b5a5bd 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -124,6 +124,11 @@ void __init init_IRQ(void) { init_onchip_IRQ(); plat_init_IRQ(); + +#ifdef CONFIG_SMP + /* Master CPU can initialize it's side of IPI */ + arc_platform_smp_init_cpu(); +#endif } /* diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 27aebd6d9513..4026b5a004d2 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -86,6 +86,10 @@ void __init setup_arch(char **cmdline_p) setup_processor(); +#ifdef CONFIG_SMP + smp_init_cpus(); +#endif + setup_arch_memory(); unflatten_device_tree(); diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c new file mode 100644 index 000000000000..1f762ad6969b --- /dev/null +++ b/arch/arc/kernel/smp.c @@ -0,0 +1,320 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * RajeshwarR: Dec 11, 2007 + * -- Added support for Inter Processor Interrupts + * + * Vineetg: Nov 1st, 2007 + * -- Initial Write (Borrowed heavily from ARM) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED; +arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +/* XXX: per cpu ? Only needed once in early seconday boot */ +struct task_struct *secondary_idle_tsk; + +/* Called from start_kernel */ +void __init smp_prepare_boot_cpu(void) +{ +} + +/* + * Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + */ +void __init smp_init_cpus(void) +{ + unsigned int i; + + for (i = 0; i < NR_CPUS; i++) + set_cpu_possible(i, true); +} + +/* called from init ( ) => process 1 */ +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + int i; + + /* + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. + */ + for (i = 0; i < max_cpus; i++) + set_cpu_present(i, true); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + +} + +/* + * After power-up, a non Master CPU needs to wait for Master to kick start it + * + * The default implementation halts + * + * This relies on platform specific support allowing Master to directly set + * this CPU's PC (to be @first_lines_of_secondary() and kick start it. + * + * In lack of such h/w assist, platforms can override this function + * - make this function busy-spin on a token, eventually set by Master + * (from arc_platform_smp_wakeup_cpu()) + * - Once token is available, jump to @first_lines_of_secondary + * (using inline asm). + * + * Alert: can NOT use stack here as it has not been determined/setup for CPU. + * If it turns out to be elaborate, it's better to code it in assembly + * + */ +void __attribute__((weak)) arc_platform_smp_wait_to_boot(int cpu) +{ + /* + * As a hack for debugging - since debugger will single-step over the + * FLAG insn - wrap the halt itself it in a self loop + */ + __asm__ __volatile__( + "1: \n" + " flag 1 \n" + " b 1b \n"); +} + +/* + * The very first "C" code executed by secondary + * Called from asm stub in head.S + * "current"/R25 already setup by low level boot code + */ +void __cpuinit start_kernel_secondary(void) +{ + struct mm_struct *mm = &init_mm; + unsigned int cpu = smp_processor_id(); + + /* MMU, Caches, Vector Table, Interrupts etc */ + setup_processor(); + + atomic_inc(&mm->mm_users); + atomic_inc(&mm->mm_count); + current->active_mm = mm; + + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); + + pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); + + arc_platform_smp_init_cpu(); + + arc_local_timer_setup(cpu); + + local_irq_enable(); + preempt_disable(); + cpu_idle(); +} + +/* + * Called from kernel_init( ) -> smp_init( ) - for each CPU + * + * At this point, Secondary Processor is "HALT"ed: + * -It booted, but was halted in head.S + * -It was configured to halt-on-reset + * So need to wake it up. + * + * Essential requirements being where to run from (PC) and stack (SP) +*/ +int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) +{ + unsigned long wait_till; + + secondary_idle_tsk = idle; + + pr_info("Idle Task [%d] %p", cpu, idle); + pr_info("Trying to bring up CPU%u ...\n", cpu); + + arc_platform_smp_wakeup_cpu(cpu, + (unsigned long)first_lines_of_secondary); + + /* wait for 1 sec after kicking the secondary */ + wait_till = jiffies + HZ; + while (time_before(jiffies, wait_till)) { + if (cpu_online(cpu)) + break; + } + + if (!cpu_online(cpu)) { + pr_info("Timeout: CPU%u FAILED to comeup !!!\n", cpu); + return -1; + } + + secondary_idle_tsk = NULL; + + return 0; +} + +/* + * not supported here + */ +int __init setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +/*****************************************************************************/ +/* Inter Processor Interrupt Handling */ +/*****************************************************************************/ + +/* + * structures for inter-processor calls + * A Collection of single bit ipi messages + * + */ + +/* + * TODO_rajesh investigate tlb message types. + * IPI Timer not needed because each ARC has an individual Interrupting Timer + */ +enum ipi_msg_type { + IPI_NOP = 0, + IPI_RESCHEDULE = 1, + IPI_CALL_FUNC, + IPI_CALL_FUNC_SINGLE, + IPI_CPU_STOP +}; + +struct ipi_data { + unsigned long bits; +}; + +static DEFINE_PER_CPU(struct ipi_data, ipi_data); + +static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg) +{ + unsigned long flags; + unsigned int cpu; + + local_irq_save(flags); + + for_each_cpu(cpu, callmap) { + struct ipi_data *ipi = &per_cpu(ipi_data, cpu); + set_bit(msg, &ipi->bits); + } + + /* Call the platform specific cross-CPU call function */ + arc_platform_ipi_send(callmap); + + local_irq_restore(flags); +} + +void smp_send_reschedule(int cpu) +{ + ipi_send_msg(cpumask_of(cpu), IPI_RESCHEDULE); +} + +void smp_send_stop(void) +{ + struct cpumask targets; + cpumask_copy(&targets, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &targets); + ipi_send_msg(&targets, IPI_CPU_STOP); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + ipi_send_msg(mask, IPI_CALL_FUNC); +} + +/* + * ipi_cpu_stop - handle IPI from smp_send_stop() + */ +static void ipi_cpu_stop(unsigned int cpu) +{ + machine_halt(); +} + +static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi, int cpu) +{ + unsigned long msg = 0; + + do { + msg = find_next_bit(ops, BITS_PER_LONG, msg+1); + + switch (msg) { + case IPI_RESCHEDULE: + scheduler_ipi(); + break; + + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; + + case IPI_CPU_STOP: + ipi_cpu_stop(cpu); + break; + } + } while (msg < BITS_PER_LONG); + +} + +/* + * arch-common ISR to handle for inter-processor interrupts + * Has hooks for platform specific IPI + */ +irqreturn_t do_IPI(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + struct ipi_data *ipi = &per_cpu(ipi_data, cpu); + unsigned long ops; + + arc_platform_ipi_clear(cpu, irq); + + /* + * XXX: is this loop really needed + * And do we need to move ipi_clean inside + */ + while ((ops = xchg(&ipi->bits, 0)) != 0) + __do_IPI(&ops, ipi, cpu); + + return IRQ_HANDLED; +} + +/* + * API called by platform code to hookup arch-common ISR to their IPI IRQ + */ +static DEFINE_PER_CPU(int, ipi_dev); +int smp_ipi_irq_setup(int cpu, int irq) +{ + int *dev_id = &per_cpu(ipi_dev, smp_processor_id()); + return request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev_id); +} diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 232a0ff80a5e..e96030c13b52 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -474,6 +474,12 @@ void __init arc_mmu_init(void) /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); + + /* In smp we use this reg for interrupt 1 scratch */ +#ifndef CONFIG_SMP + /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ + write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); +#endif } /* diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 164b02169498..4b1ad2d905ca 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -57,9 +57,15 @@ .global ex_saved_reg1 .align 1 << L1_CACHE_SHIFT ; IMP: Must be Cache Line aligned .type ex_saved_reg1, @object +#ifdef CONFIG_SMP + .size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT) +ex_saved_reg1: + .zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT) +#else .size ex_saved_reg1, 16 ex_saved_reg1: .zero 16 +#endif ;============================================================================ ; Troubleshooting Stuff @@ -116,7 +122,13 @@ ex_saved_reg1: lr r2, [efa] +#ifndef CONFIG_SMP lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd +#else + GET_CURR_TASK_ON_CPU r1 + ld r1, [r1, TASK_ACT_MM] + ld r1, [r1, MM_PGD] +#endif lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr @@ -192,12 +204,28 @@ ex_saved_reg1: ; ".size ex_saved_reg1, 16" ; [All of this dance is to avoid stack switching for each TLB Miss, since we ; only need to save only a handful of regs, as opposed to complete reg file] +; +; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST +; core reg as it will not be SMP safe. +; Thus scratch AUX reg is used (and no longer used to cache task PGD). +; To save the rest of 3 regs - per cpu, the global is made "per-cpu". +; Epilogue thus has to locate the "per-cpu" storage for regs. +; To avoid cache line bouncing the per-cpu global is aligned/sized per +; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence +; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" ; As simple as that.... .macro TLBMISS_FREEUP_REGS +#ifdef CONFIG_SMP + sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with + GET_CPU_ID r0 ; get to per cpu scratch mem, + lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu + add r0, @ex_saved_reg1, r0 +#else st r0, [@ex_saved_reg1] mov_s r0, @ex_saved_reg1 +#endif st_s r1, [r0, 4] st_s r2, [r0, 8] st_s r3, [r0, 12] @@ -210,11 +238,21 @@ ex_saved_reg1: ;----------------------------------------------------------------- .macro TLBMISS_RESTORE_REGS +#ifdef CONFIG_SMP + GET_CPU_ID r0 ; get to per cpu scratch mem + lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide + add r0, @ex_saved_reg1, r0 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + lr r0, [ARC_REG_SCRATCH_DATA0] +#else mov_s r0, @ex_saved_reg1 ld_s r3, [r0,12] ld_s r2, [r0, 8] ld_s r1, [r0, 4] ld_s r0, [r0] +#endif .endm .section .text, "ax",@progbits ;Fast Path Code, candidate for ICCM diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig index 7af3a4e7f7d2..38752bfb91e0 100644 --- a/arch/arc/plat-arcfpga/Kconfig +++ b/arch/arc/plat-arcfpga/Kconfig @@ -13,6 +13,7 @@ choice config ARC_BOARD_ANGEL4 bool "ARC Angel4" + select ISS_SMP_EXTN if SMP help ARC Angel4 FPGA Ref Platform (Xilinx Virtex Based) @@ -21,6 +22,19 @@ config ARC_BOARD_ML509 help ARC ML509 FPGA Ref Platform (Xilinx Virtex-5 Based) +config ISS_SMP_EXTN + bool "ARC SMP Extensions (ISS Models only)" + default n + depends on SMP + select ARC_HAS_COH_RTSC + help + SMP Extensions to ARC700, in a "simulation only" Model, supported in + ARC ISS (Instruction Set Simulator). + The SMP extensions include: + -IDU (Interrupt Distribution Unit) + -XTL (To enable CPU start/stop/set-PC for another CPU) + It doesn't provide coherent Caches and/or Atomic Ops (LLOCK/SCOND) + endchoice config ARC_SERIAL_BAUD diff --git a/arch/arc/plat-arcfpga/Makefile b/arch/arc/plat-arcfpga/Makefile index 385eb9d83b63..2a828bec8212 100644 --- a/arch/arc/plat-arcfpga/Makefile +++ b/arch/arc/plat-arcfpga/Makefile @@ -7,3 +7,4 @@ # obj-y := platform.o irq.o +obj-$(CONFIG_SMP) += smp.o diff --git a/arch/arc/plat-arcfpga/include/plat/irq.h b/arch/arc/plat-arcfpga/include/plat/irq.h index b34e08734c65..255b90e973ee 100644 --- a/arch/arc/plat-arcfpga/include/plat/irq.h +++ b/arch/arc/plat-arcfpga/include/plat/irq.h @@ -12,7 +12,11 @@ #ifndef __PLAT_IRQ_H #define __PLAT_IRQ_H -#define NR_IRQS 16 +#ifdef CONFIG_SMP +#define NR_IRQS 32 +#else +#define NR_IRQS 16 +#endif #define UART0_IRQ 5 #define UART1_IRQ 10 @@ -24,4 +28,8 @@ #define PCI_IRQ 14 #define PS2_IRQ 15 +#ifdef CONFIG_SMP +#define IDU_INTERRUPT_0 16 +#endif + #endif diff --git a/arch/arc/plat-arcfpga/include/plat/smp.h b/arch/arc/plat-arcfpga/include/plat/smp.h new file mode 100644 index 000000000000..8c5e46c01b15 --- /dev/null +++ b/arch/arc/plat-arcfpga/include/plat/smp.h @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Rajeshwar Ranga: Interrupt Distribution Unit API's + */ + +#ifndef __PLAT_ARCFPGA_SMP_H +#define __PLAT_ARCFPGA_SMP_H + +#ifdef CONFIG_SMP + +#include +#include + +#define ARC_AUX_IDU_REG_CMD 0x2000 +#define ARC_AUX_IDU_REG_PARAM 0x2001 + +#define ARC_AUX_XTL_REG_CMD 0x2002 +#define ARC_AUX_XTL_REG_PARAM 0x2003 + +#define ARC_REG_MP_BCR 0x2021 + +#define ARC_XTL_CMD_WRITE_PC 0x04 +#define ARC_XTL_CMD_CLEAR_HALT 0x02 + +/* + * Build Configuration Register which identifies the sub-components + */ +struct bcr_mp { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int mp_arch:16, pad:5, sdu:1, idu:1, scu:1, ver:8; +#else + unsigned int ver:8, scu:1, idu:1, sdu:1, pad:5, mp_arch:16; +#endif +}; + +/* IDU supports 256 common interrupts */ +#define NR_IDU_IRQS 256 + +/* + * The Aux Regs layout is same bit-by-bit in both BE/LE modes. + * However when casted as a bitfield encoded "C" struct, gcc treats it as + * memory, generating different code for BE/LE, requiring strcture adj (see + * include/asm/arcregs.h) + * + * However when manually "carving" the value for a Aux, no special handling + * of BE is needed because of the property discribed above + */ +#define IDU_SET_COMMAND(irq, cmd) \ +do { \ + uint32_t __val; \ + __val = (((irq & 0xFF) << 8) | (cmd & 0xFF)); \ + write_aux_reg(ARC_AUX_IDU_REG_CMD, __val); \ +} while (0) + +#define IDU_SET_PARAM(par) write_aux_reg(ARC_AUX_IDU_REG_PARAM, par) +#define IDU_GET_PARAM() read_aux_reg(ARC_AUX_IDU_REG_PARAM) + +/* IDU Commands */ +#define IDU_DISABLE 0x00 +#define IDU_ENABLE 0x01 +#define IDU_IRQ_CLEAR 0x02 +#define IDU_IRQ_ASSERT 0x03 +#define IDU_IRQ_WMODE 0x04 +#define IDU_IRQ_STATUS 0x05 +#define IDU_IRQ_ACK 0x06 +#define IDU_IRQ_PEND 0x07 +#define IDU_IRQ_RMODE 0x08 +#define IDU_IRQ_WBITMASK 0x09 +#define IDU_IRQ_RBITMASK 0x0A + +#define idu_enable() IDU_SET_COMMAND(0, IDU_ENABLE) +#define idu_disable() IDU_SET_COMMAND(0, IDU_DISABLE) + +#define idu_irq_assert(irq) IDU_SET_COMMAND((irq), IDU_IRQ_ASSERT) +#define idu_irq_clear(irq) IDU_SET_COMMAND((irq), IDU_IRQ_CLEAR) + +/* IDU Interrupt Mode - Destination Encoding */ +#define IDU_IRQ_MOD_DISABLE 0x00 +#define IDU_IRQ_MOD_ROUND_RECP 0x01 +#define IDU_IRQ_MOD_TCPU_FIRSTRECP 0x02 +#define IDU_IRQ_MOD_TCPU_ALLRECP 0x03 + +/* IDU Interrupt Mode - Triggering Mode */ +#define IDU_IRQ_MODE_LEVEL_TRIG 0x00 +#define IDU_IRQ_MODE_PULSE_TRIG 0x01 + +#define IDU_IRQ_MODE_PARAM(dest_mode, trig_mode) \ + (((trig_mode & 0x01) << 15) | (dest_mode & 0xFF)) + +struct idu_irq_config { + uint8_t irq; + uint8_t dest_mode; + uint8_t trig_mode; +}; + +struct idu_irq_status { + uint8_t irq; + bool enabled; + bool status; + bool ack; + bool pend; + uint8_t next_rr; +}; + +extern void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask); +extern void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode); + +#endif /* CONFIG_SMP */ + +#endif diff --git a/arch/arc/plat-arcfpga/irq.c b/arch/arc/plat-arcfpga/irq.c index ed726360b9f6..590edd174c47 100644 --- a/arch/arc/plat-arcfpga/irq.c +++ b/arch/arc/plat-arcfpga/irq.c @@ -9,7 +9,17 @@ */ #include +#include void __init plat_init_IRQ(void) { + /* + * SMP Hack because UART IRQ hardwired to cpu0 (boot-cpu) but if the + * request_irq() comes from any other CPU, the low level IRQ unamsking + * essential for getting Interrupts won't be enabled on cpu0, locking + * up the UART state machine. + */ +#ifdef CONFIG_SMP + arch_unmask_irq(UART0_IRQ); +#endif } diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c new file mode 100644 index 000000000000..a95fcdb29033 --- /dev/null +++ b/arch/arc/plat-arcfpga/smp.c @@ -0,0 +1,167 @@ +/* + * ARC700 Simulation-only Extensions for SMP + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Vineet Gupta - 2012 : split off arch common and plat specific SMP + * Rajeshwar Ranga - 2007 : Interrupt Distribution Unit API's + */ + +#include +#include +#include + +static char smp_cpuinfo_buf[128]; + +/* + *------------------------------------------------------------------- + * Platform specific callbacks expected by arch SMP code + *------------------------------------------------------------------- + */ + +const char *arc_platform_smp_cpuinfo(void) +{ +#define IS_AVAIL1(var, str) ((var) ? str : "") + + struct bcr_mp mp; + + READ_BCR(ARC_REG_MP_BCR, mp); + + sprintf(smp_cpuinfo_buf, "Extn [700-SMP]: v%d, arch(%d) %s %s %s\n", + mp.ver, mp.mp_arch, IS_AVAIL1(mp.scu, "SCU"), + IS_AVAIL1(mp.idu, "IDU"), IS_AVAIL1(mp.sdu, "SDU")); + + return smp_cpuinfo_buf; +} + +/* + * Master kick starting another CPU + */ +void arc_platform_smp_wakeup_cpu(int cpu, unsigned long pc) +{ + /* setup the start PC */ + write_aux_reg(ARC_AUX_XTL_REG_PARAM, pc); + + /* Trigger WRITE_PC cmd for this cpu */ + write_aux_reg(ARC_AUX_XTL_REG_CMD, + (ARC_XTL_CMD_WRITE_PC | (cpu << 8))); + + /* Take the cpu out of Halt */ + write_aux_reg(ARC_AUX_XTL_REG_CMD, + (ARC_XTL_CMD_CLEAR_HALT | (cpu << 8))); + +} + +/* + * Any SMP specific init any CPU does when it comes up. + * Here we setup the CPU to enable Inter-Processor-Interrupts + * Called for each CPU + * -Master : init_IRQ() + * -Other(s) : start_kernel_secondary() + */ +void arc_platform_smp_init_cpu(void) +{ + int cpu = smp_processor_id(); + + /* Check if CPU is configured for more than 16 interrupts */ + if (NR_IRQS <= 16 || get_hw_config_num_irq() <= 16) + panic("[arcfpga] IRQ system can't support IDU IPI\n"); + + idu_disable(); + + /**************************************************************** + * IDU provides a set of Common IRQs, each of which can be dynamically + * attached to (1|many|all) CPUs. + * The Common IRQs [0-15] are mapped as CPU pvt [16-31] + * + * Here we use a simple 1:1 mapping: + * A CPU 'x' is wired to Common IRQ 'x'. + * So an IDU ASSERT on IRQ 'x' will trigger Interupt on CPU 'x', which + * makes up for our simple IPI plumbing. + * + * TBD: Have a dedicated multicast IRQ for sending IPIs to all CPUs + * w/o having to do one-at-a-time + ******************************************************************/ + + /* + * Claim an IRQ which would trigger IPI on this CPU. + * In IDU parlance it involves setting up a cpu bitmask for the IRQ + * The bitmap here contains only 1 CPU (self). + */ + idu_irq_set_tgtcpu(cpu, 0x1 << cpu); + + /* Set the IRQ destination to use the bitmask above */ + idu_irq_set_mode(cpu, 7, /* XXX: IDU_IRQ_MOD_TCPU_ALLRECP: ISS bug */ + IDU_IRQ_MODE_PULSE_TRIG); + + idu_enable(); + + /* Attach the arch-common IPI ISR to our IDU IRQ */ + smp_ipi_irq_setup(cpu, IDU_INTERRUPT_0 + cpu); +} + +void arc_platform_ipi_send(const struct cpumask *callmap) +{ + unsigned int cpu; + + for_each_cpu(cpu, callmap) + idu_irq_assert(cpu); +} + +void arc_platform_ipi_clear(int cpu, int irq) +{ + idu_irq_clear(IDU_INTERRUPT_0 + cpu); +} + +/* + *------------------------------------------------------------------- + * Low level Platform IPI Providers + *------------------------------------------------------------------- + */ + +/* Set the Mode for the Common IRQ */ +void idu_irq_set_mode(uint8_t irq, uint8_t dest_mode, uint8_t trig_mode) +{ + uint32_t par = IDU_IRQ_MODE_PARAM(dest_mode, trig_mode); + + IDU_SET_PARAM(par); + IDU_SET_COMMAND(irq, IDU_IRQ_WMODE); +} + +/* Set the target cpu Bitmask for Common IRQ */ +void idu_irq_set_tgtcpu(uint8_t irq, uint32_t mask) +{ + IDU_SET_PARAM(mask); + IDU_SET_COMMAND(irq, IDU_IRQ_WBITMASK); +} + +/* Get the Interrupt Acknowledged status for IRQ (as CPU Bitmask) */ +bool idu_irq_get_ack(uint8_t irq) +{ + uint32_t val; + + IDU_SET_COMMAND(irq, IDU_IRQ_ACK); + val = IDU_GET_PARAM(); + + return val & (1 << irq); +} + +/* + * Get the Interrupt Pending status for IRQ (as CPU Bitmask) + * -Pending means CPU has not yet noticed the IRQ (e.g. disabled) + * -After Interrupt has been taken, the IPI expcitily needs to be + * cleared, to be acknowledged. + */ +bool idu_irq_get_pend(uint8_t irq) +{ + uint32_t val; + + IDU_SET_COMMAND(irq, IDU_IRQ_PEND); + val = IDU_GET_PARAM(); + + return val & (1 << irq); +} -- cgit v1.2.3 From af61742813aa9dde65ca796801e36d03b83fa79f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Jan 2013 15:12:24 +0530 Subject: ARC: Boot #2: Verbose Boot reporting / feature verification Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 + arch/arc/include/asm/arcregs.h | 122 ++++++++++++++++++++-- arch/arc/include/asm/defines.h | 56 +++++++++++ arch/arc/include/asm/setup.h | 14 +++ arch/arc/kernel/setup.c | 223 ++++++++++++++++++++++++++++++++++++++++- arch/arc/mm/cache_arc700.c | 46 +++++++++ arch/arc/mm/tlb.c | 38 +++++++ 7 files changed, 490 insertions(+), 11 deletions(-) create mode 100644 arch/arc/include/asm/defines.h (limited to 'arch/arc/mm/tlb.c') diff --git a/arch/arc/Makefile b/arch/arc/Makefile index fae66bfc2bdb..9a36c04e4306 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -20,6 +20,8 @@ export PLATFORM cflags-y += -Iarch/arc/plat-$(PLATFORM)/include cflags-y += -mA7 -fno-common -pipe -fno-builtin -D__linux__ +LINUXINCLUDE += -include ${src}/arch/arc/include/asm/defines.h + ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 9e42611e39d3..1b907c465666 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -12,8 +12,26 @@ #ifdef __KERNEL__ /* Build Configuration Registers */ +#define ARC_REG_DCCMBASE_BCR 0x61 /* DCCM Base Addr */ +#define ARC_REG_CRC_BCR 0x62 +#define ARC_REG_DVFB_BCR 0x64 +#define ARC_REG_EXTARITH_BCR 0x65 #define ARC_REG_VECBASE_BCR 0x68 +#define ARC_REG_PERIBASE_BCR 0x69 +#define ARC_REG_FP_BCR 0x6B /* Single-Precision FPU */ +#define ARC_REG_DPFP_BCR 0x6C /* Dbl Precision FPU */ #define ARC_REG_MMU_BCR 0x6f +#define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */ +#define ARC_REG_TIMERS_BCR 0x75 +#define ARC_REG_ICCM_BCR 0x78 +#define ARC_REG_XY_MEM_BCR 0x79 +#define ARC_REG_MAC_BCR 0x7a +#define ARC_REG_MUL_BCR 0x7b +#define ARC_REG_SWAP_BCR 0x7c +#define ARC_REG_NORM_BCR 0x7d +#define ARC_REG_MIXMAX_BCR 0x7e +#define ARC_REG_BARREL_BCR 0x7f +#define ARC_REG_D_UNCACH_BCR 0x6A /* status32 Bits Positions */ #define STATUS_H_BIT 0 /* CPU Halted */ @@ -88,16 +106,6 @@ #define TIMER_CTRL_IE (1 << 0) /* Interupt when Count reachs limit */ #define TIMER_CTRL_NH (1 << 1) /* Count only when CPU NOT halted */ -#if defined(CONFIG_ARC_MMU_V1) -#define CONFIG_ARC_MMU_VER 1 -#elif defined(CONFIG_ARC_MMU_V2) -#define CONFIG_ARC_MMU_VER 2 -#elif defined(CONFIG_ARC_MMU_V3) -#define CONFIG_ARC_MMU_VER 3 -#else -#error "Error: MMU ver" -#endif - /* MMU Management regs */ #define ARC_REG_TLBPD0 0x405 #define ARC_REG_TLBPD1 0x406 @@ -277,6 +285,13 @@ struct arc_fpu { *************************************************************** * Build Configuration Registers, with encoded hardware config */ +struct bcr_identity { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int chip_id:16, cpu_id:8, family:8; +#else + unsigned int family:8, cpu_id:8, chip_id:16; +#endif +}; struct bcr_mmu_1_2 { #ifdef CONFIG_CPU_BIG_ENDIAN @@ -296,6 +311,38 @@ struct bcr_mmu_3 { #endif }; +#define EXTN_SWAP_VALID 0x1 +#define EXTN_NORM_VALID 0x2 +#define EXTN_MINMAX_VALID 0x2 +#define EXTN_BARREL_VALID 0x2 + +struct bcr_extn { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:20, crc:1, ext_arith:2, mul:2, barrel:2, minmax:2, + norm:2, swap:1; +#else + unsigned int swap:1, norm:2, minmax:2, barrel:2, mul:2, ext_arith:2, + crc:1, pad:20; +#endif +}; + +/* DSP Options Ref Manual */ +struct bcr_extn_mac_mul { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:16, type:8, ver:8; +#else + unsigned int ver:8, type:8, pad:16; +#endif +}; + +struct bcr_extn_xymem { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ram_org:2, num_banks:4, bank_sz:4, ver:8; +#else + unsigned int ver:8, bank_sz:4, num_banks:4, ram_org:2; +#endif +}; + struct bcr_cache { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; @@ -304,6 +351,48 @@ struct bcr_cache { #endif }; +struct bcr_perip { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int start:8, pad2:8, sz:8, pad:8; +#else + unsigned int pad:8, sz:8, pad2:8, start:8; +#endif +}; +struct bcr_iccm { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int base:16, pad:5, sz:3, ver:8; +#else + unsigned int ver:8, sz:3, pad:5, base:16; +#endif +}; + +/* DCCM Base Address Register: ARC_REG_DCCMBASE_BCR */ +struct bcr_dccm_base { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int addr:24, ver:8; +#else + unsigned int ver:8, addr:24; +#endif +}; + +/* DCCM RAM Configuration Register: ARC_REG_DCCM_BCR */ +struct bcr_dccm { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int res:21, sz:3, ver:8; +#else + unsigned int ver:8, sz:3, res:21; +#endif +}; + +/* Both SP and DP FPU BCRs have same format */ +struct bcr_fp { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int fast:1, ver:8; +#else + unsigned int ver:8, fast:1; +#endif +}; + /* ******************************************************************* * Generic structures to hold build configuration used at runtime @@ -317,9 +406,22 @@ struct cpuinfo_arc_cache { unsigned int has_aliasing, sz, line_len, assoc, ver; }; +struct cpuinfo_arc_ccm { + unsigned int base_addr, sz; +}; + struct cpuinfo_arc { struct cpuinfo_arc_cache icache, dcache; struct cpuinfo_arc_mmu mmu; + struct bcr_identity core; + unsigned int timers; + unsigned int vec_base; + unsigned int uncached_base; + struct cpuinfo_arc_ccm iccm, dccm; + struct bcr_extn extn; + struct bcr_extn_xymem extn_xymem; + struct bcr_extn_mac_mul extn_mac_mul; + struct bcr_fp fp, dpfp; }; extern struct cpuinfo_arc cpuinfo_arc700[]; diff --git a/arch/arc/include/asm/defines.h b/arch/arc/include/asm/defines.h new file mode 100644 index 000000000000..6097bb439cc5 --- /dev/null +++ b/arch/arc/include/asm/defines.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ARC_ASM_DEFINES_H__ +#define __ARC_ASM_DEFINES_H__ + +#if defined(CONFIG_ARC_MMU_V1) +#define CONFIG_ARC_MMU_VER 1 +#elif defined(CONFIG_ARC_MMU_V2) +#define CONFIG_ARC_MMU_VER 2 +#elif defined(CONFIG_ARC_MMU_V3) +#define CONFIG_ARC_MMU_VER 3 +#endif + +#ifdef CONFIG_ARC_HAS_LLSC +#define __CONFIG_ARC_HAS_LLSC_VAL 1 +#else +#define __CONFIG_ARC_HAS_LLSC_VAL 0 +#endif + +#ifdef CONFIG_ARC_HAS_SWAPE +#define __CONFIG_ARC_HAS_SWAPE_VAL 1 +#else +#define __CONFIG_ARC_HAS_SWAPE_VAL 0 +#endif + +#ifdef CONFIG_ARC_HAS_RTSC +#define __CONFIG_ARC_HAS_RTSC_VAL 1 +#else +#define __CONFIG_ARC_HAS_RTSC_VAL 0 +#endif + +#ifdef CONFIG_ARC_MMU_SASID +#define __CONFIG_ARC_MMU_SASID_VAL 1 +#else +#define __CONFIG_ARC_MMU_SASID_VAL 0 +#endif + +#ifdef CONFIG_ARC_HAS_ICACHE +#define __CONFIG_ARC_HAS_ICACHE 1 +#else +#define __CONFIG_ARC_HAS_ICACHE 0 +#endif + +#ifdef CONFIG_ARC_HAS_DCACHE +#define __CONFIG_ARC_HAS_DCACHE 1 +#else +#define __CONFIG_ARC_HAS_DCACHE 0 +#endif + +#endif /* __ARC_ASM_DEFINES_H__ */ diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index ab427e6a2cb5..fc97411269fe 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -13,6 +13,20 @@ #define COMMAND_LINE_SIZE 256 +/* + * Data structure to map a ID to string + * Used a lot for bootup reporting of hardware diversity + */ +struct id_to_str { + int id; + const char *str; +}; + +struct cpuinfo_data { + struct id_to_str info; + int up_range; +}; + extern int root_mountflags, end_mem; extern int running_on_hw; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 6e3996cb9df9..e25538e29fe7 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -24,6 +24,7 @@ #include #include #include +#include #define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x)) @@ -35,10 +36,205 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; + void __init read_arc_build_cfg_regs(void) { + struct bcr_perip uncached_space; + struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; + FIX_PTR(cpu); + + READ_BCR(AUX_IDENTITY, cpu->core); + + cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR); + + cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); + if (cpu->vec_base == 0) + cpu->vec_base = (unsigned int)_int_vec_base_lds; + + READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); + cpu->uncached_base = uncached_space.start << 24; + + cpu->extn.mul = read_aux_reg(ARC_REG_MUL_BCR); + cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR); + cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR); + cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR); + cpu->extn.barrel = read_aux_reg(ARC_REG_BARREL_BCR); + READ_BCR(ARC_REG_MAC_BCR, cpu->extn_mac_mul); + + cpu->extn.ext_arith = read_aux_reg(ARC_REG_EXTARITH_BCR); + cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR); + + READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem); + read_decode_mmu_bcr(); read_decode_cache_bcr(); + + READ_BCR(ARC_REG_FP_BCR, cpu->fp); + READ_BCR(ARC_REG_DPFP_BCR, cpu->dpfp); +} + +static const struct cpuinfo_data arc_cpu_tbl[] = { + { {0x10, "ARCTangent A5"}, 0x1F}, + { {0x20, "ARC 600" }, 0x2F}, + { {0x30, "ARC 700" }, 0x33}, + { {0x34, "ARC 700 R4.10"}, 0x34}, + { {0x00, NULL } } +}; + +char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) +{ + int n = 0; + struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; + struct bcr_identity *core = &cpu->core; + const struct cpuinfo_data *tbl; + int be = 0; +#ifdef CONFIG_CPU_BIG_ENDIAN + be = 1; +#endif + FIX_PTR(cpu); + + n += scnprintf(buf + n, len - n, + "\nARC IDENTITY\t: Family [%#02x]" + " Cpu-id [%#02x] Chip-id [%#4x]\n", + core->family, core->cpu_id, + core->chip_id); + + for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { + if ((core->family >= tbl->info.id) && + (core->family <= tbl->up_range)) { + n += scnprintf(buf + n, len - n, + "processor\t: %s %s\n", + tbl->info.str, + be ? "[Big Endian]" : ""); + break; + } + } + + if (tbl->info.id == 0) + n += scnprintf(buf + n, len - n, "UNKNOWN ARC Processor\n"); + + n += scnprintf(buf + n, len - n, "CPU speed\t: %u.%02u Mhz\n", + (unsigned int)(arc_get_core_freq() / 1000000), + (unsigned int)(arc_get_core_freq() / 10000) % 100); + + n += scnprintf(buf + n, len - n, "Timers\t\t: %s %s\n", + (cpu->timers & 0x200) ? "TIMER1" : "", + (cpu->timers & 0x100) ? "TIMER0" : ""); + + n += scnprintf(buf + n, len - n, "Vect Tbl Base\t: %#x\n", + cpu->vec_base); + + n += scnprintf(buf + n, len - n, "UNCACHED Base\t: %#x\n", + cpu->uncached_base); + + return buf; +} + +static const struct id_to_str mul_type_nm[] = { + { 0x0, "N/A"}, + { 0x1, "32x32 (spl Result Reg)" }, + { 0x2, "32x32 (ANY Result Reg)" } +}; + +static const struct id_to_str mac_mul_nm[] = { + {0x0, "N/A"}, + {0x1, "N/A"}, + {0x2, "Dual 16 x 16"}, + {0x3, "N/A"}, + {0x4, "32x16"}, + {0x5, "N/A"}, + {0x6, "Dual 16x16 and 32x16"} +}; + +char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) +{ + int n = 0; + struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; + + FIX_PTR(cpu); +#define IS_AVAIL1(var, str) ((var) ? str : "") +#define IS_AVAIL2(var, str) ((var == 0x2) ? str : "") +#define IS_USED(var) ((var) ? "(in-use)" : "(not used)") + + n += scnprintf(buf + n, len - n, + "Extn [700-Base]\t: %s %s %s %s %s %s\n", + IS_AVAIL2(cpu->extn.norm, "norm,"), + IS_AVAIL2(cpu->extn.barrel, "barrel-shift,"), + IS_AVAIL1(cpu->extn.swap, "swap,"), + IS_AVAIL2(cpu->extn.minmax, "minmax,"), + IS_AVAIL1(cpu->extn.crc, "crc,"), + IS_AVAIL2(cpu->extn.ext_arith, "ext-arith")); + + n += scnprintf(buf + n, len - n, "Extn [700-MPY]\t: %s", + mul_type_nm[cpu->extn.mul].str); + + n += scnprintf(buf + n, len - n, " MAC MPY: %s\n", + mac_mul_nm[cpu->extn_mac_mul.type].str); + + if (cpu->core.family == 0x34) { + n += scnprintf(buf + n, len - n, + "Extn [700-4.10]\t: LLOCK/SCOND %s, SWAPE %s, RTSC %s\n", + IS_USED(__CONFIG_ARC_HAS_LLSC_VAL), + IS_USED(__CONFIG_ARC_HAS_SWAPE_VAL), + IS_USED(__CONFIG_ARC_HAS_RTSC_VAL)); + } + + n += scnprintf(buf + n, len - n, "Extn [CCM]\t: %s", + !(cpu->dccm.sz || cpu->iccm.sz) ? "N/A" : ""); + + if (cpu->dccm.sz) + n += scnprintf(buf + n, len - n, "DCCM: @ %x, %d KB ", + cpu->dccm.base_addr, TO_KB(cpu->dccm.sz)); + + if (cpu->iccm.sz) + n += scnprintf(buf + n, len - n, "ICCM: @ %x, %d KB", + cpu->iccm.base_addr, TO_KB(cpu->iccm.sz)); + + n += scnprintf(buf + n, len - n, "\nExtn [FPU]\t: %s", + !(cpu->fp.ver || cpu->dpfp.ver) ? "N/A" : ""); + + if (cpu->fp.ver) + n += scnprintf(buf + n, len - n, "SP [v%d] %s", + cpu->fp.ver, cpu->fp.fast ? "(fast)" : ""); + + if (cpu->dpfp.ver) + n += scnprintf(buf + n, len - n, "DP [v%d] %s", + cpu->dpfp.ver, cpu->dpfp.fast ? "(fast)" : ""); + + n += scnprintf(buf + n, len - n, "\n"); + +#ifdef _ASM_GENERIC_UNISTD_H + n += scnprintf(buf + n, len - n, + "OS ABI [v2]\t: asm-generic/{unistd,stat,fcntl}\n"); +#endif + + return buf; +} + +/* + * Ensure that FP hardware and kernel config match + * -If hardware contains DPFP, kernel needs to save/restore FPU state + * across context switches + * -If hardware lacks DPFP, but kernel configured to save FPU state then + * kernel trying to access non-existant DPFP regs will crash + * + * We only check for Dbl precision Floating Point, because only DPFP + * hardware has dedicated regs which need to be saved/restored on ctx-sw + * (Single Precision uses core regs), thus kernel is kind of oblivious to it + */ +void __init arc_chk_fpu(void) +{ + struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; + + if (cpu->dpfp.ver) { +#ifndef CONFIG_ARC_FPU_SAVE_RESTORE + pr_warn("DPFP support broken in this kernel...\n"); +#endif + } else { +#ifdef CONFIG_ARC_FPU_SAVE_RESTORE + panic("H/w lacks DPFP support, apps won't work\n"); +#endif + } } /* @@ -49,10 +245,25 @@ void __init read_arc_build_cfg_regs(void) void __init setup_processor(void) { + char str[512]; + int cpu_id = smp_processor_id(); + read_arc_build_cfg_regs(); arc_init_IRQ(); + + printk(arc_cpu_mumbojumbo(cpu_id, str, sizeof(str))); + arc_mmu_init(); arc_cache_init(); + + + printk(arc_extn_mumbojumbo(cpu_id, str, sizeof(str))); + +#ifdef CONFIG_SMP + printk(arc_platform_smp_cpuinfo()); +#endif + + arc_chk_fpu(); } void __init __attribute__((weak)) arc_platform_early_init(void) @@ -126,12 +337,22 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (!str) goto done; - seq_printf(m, "ARC700 #%d\n", cpu_id); + seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE)); seq_printf(m, "Bogo MIPS : \t%lu.%02lu\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100); + seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE)); + + seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE)); + + seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE)); + +#ifdef CONFIG_SMP + seq_printf(m, arc_platform_smp_cpuinfo()); +#endif + free_page((unsigned long)str); done: seq_printf(m, "\n\n"); diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 670f65bb2ef1..c299b30924ee 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -82,6 +82,28 @@ static void __ic_line_inv_4_alias(unsigned long, int); static void (*___flush_icache_rtn) (unsigned long, int); #endif +char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) +{ + int n = 0; + unsigned int c = smp_processor_id(); + +#define PR_CACHE(p, enb, str) \ +{ \ + if (!(p)->ver) \ + n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ + else \ + n += scnprintf(buf + n, len - n, \ + str"\t\t: (%uK) VIPT, %dway set-asc, %ub Line %s\n", \ + TO_KB((p)->sz), (p)->assoc, (p)->line_len, \ + enb ? "" : "DISABLED (kernel-build)"); \ +} + + PR_CACHE(&cpuinfo_arc700[c].icache, __CONFIG_ARC_HAS_ICACHE, "I-Cache"); + PR_CACHE(&cpuinfo_arc700[c].dcache, __CONFIG_ARC_HAS_DCACHE, "D-Cache"); + + return buf; +} + /* * Read the Cache Build Confuration Registers, Decode them and save into * the cpuinfo structure for later use. @@ -132,10 +154,29 @@ void __init arc_cache_init(void) struct cpuinfo_arc_cache *dc; #endif int way_pg_ratio = way_pg_ratio; + char str[256]; + + printk(arc_cache_mumbojumbo(0, str, sizeof(str))); #ifdef CONFIG_ARC_HAS_ICACHE ic = &cpuinfo_arc700[cpu].icache; + /* 1. Confirm some of I-cache params which Linux assumes */ + if ((ic->assoc != ARC_ICACHE_WAYS) || + (ic->line_len != ARC_ICACHE_LINE_LEN)) { + panic("Cache H/W doesn't match kernel Config"); + } +#if (CONFIG_ARC_MMU_VER > 2) + if (ic->ver != 3) { + if (running_on_hw) + panic("Cache ver doesn't match MMU ver\n"); + + /* For ISS - suggest the toggles to use */ + pr_err("Use -prop=icache_version=3,-prop=dcache_version=3\n"); + + } +#endif + /* * if Cache way size is <= page size then no aliasing exhibited * otherwise ratio determines num of aliases. @@ -175,6 +216,11 @@ void __init arc_cache_init(void) #ifdef CONFIG_ARC_HAS_DCACHE dc = &cpuinfo_arc700[cpu].dcache; + if ((dc->assoc != ARC_DCACHE_WAYS) || + (dc->line_len != ARC_DCACHE_LINE_LEN)) { + panic("Cache H/W doesn't match kernel Config"); + } + /* check for D-Cache aliasing */ if ((dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE) panic("D$ aliasing not handled right now\n"); diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index e96030c13b52..9b9ce23f4ec3 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -463,8 +463,46 @@ void __init read_decode_mmu_bcr(void) mmu->num_tlb = mmu->sets * mmu->ways; } +char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) +{ + int n = 0; + struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + + n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ", + p_mmu->ver, TO_KB(p_mmu->pg_sz)); + + n += scnprintf(buf + n, len - n, + "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n", + p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, + p_mmu->u_dtlb, p_mmu->u_itlb, + __CONFIG_ARC_MMU_SASID_VAL ? "SASID" : ""); + + return buf; +} + void __init arc_mmu_init(void) { + char str[256]; + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + + printk(arc_mmu_mumbojumbo(0, str, sizeof(str))); + + /* For efficiency sake, kernel is compile time built for a MMU ver + * This must match the hardware it is running on. + * Linux built for MMU V2, if run on MMU V1 will break down because V1 + * hardware doesn't understand cmds such as WriteNI, or IVUTLB + * On the other hand, Linux built for V1 if run on MMU V2 will do + * un-needed workarounds to prevent memcpy thrashing. + * Similarly MMU V3 has new features which won't work on older MMU + */ + if (mmu->ver != CONFIG_ARC_MMU_VER) { + panic("MMU ver %d doesn't match kernel built for %d...\n", + mmu->ver, CONFIG_ARC_MMU_VER); + } + + if (mmu->pg_sz != PAGE_SIZE) + panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); + /* * ASID mgmt data structures are compile time init * asid_cache = FIRST_ASID and asid_mm_map[] all zeroes -- cgit v1.2.3