From c0fcaf554ed9b65477576439e8de63578d731070 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Wed, 9 May 2012 23:19:44 +0200 Subject: openrisc: use SPARSE_IRQ The sparse IRQ framework is preferred nowadays so switch over to it. Signed-off-by: Jonas Bonn Signed-off-by: Stafford Horne --- arch/openrisc/Kconfig | 1 + arch/openrisc/include/asm/Kbuild | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/openrisc') diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 8d22015fde3e..1e95920b0737 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -12,6 +12,7 @@ config OPENRISC select HAVE_MEMBLOCK select GPIOLIB select HAVE_ARCH_TRACEHOOK + select SPARSE_IRQ select GENERIC_IRQ_CHIP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 2832f031fb11..6dd177d28c3b 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -29,6 +29,7 @@ generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += irq.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h -- cgit v1.2.3 From 3e06a16339303016b2c57b350a15afeaa7ba7813 Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Sat, 27 Apr 2013 21:02:32 +0300 Subject: openrisc: add cache way information to cpuinfo Motivation for this is to be able to print the way information properly in print_cpuinfo(), instead of hardcoding it to one. Signed-off-by: Stefan Kristiansson Signed-off-by: Jonas Bonn [shorne@gmail.com fixed conflict with show_cpuinfo change] Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/cpuinfo.h | 2 ++ arch/openrisc/kernel/setup.c | 67 ++++++++++++++++++++----------------- 2 files changed, 38 insertions(+), 31 deletions(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h index 917318b6a970..ec10679d6429 100644 --- a/arch/openrisc/include/asm/cpuinfo.h +++ b/arch/openrisc/include/asm/cpuinfo.h @@ -24,9 +24,11 @@ struct cpuinfo { u32 icache_size; u32 icache_block_size; + u32 icache_ways; u32 dcache_size; u32 dcache_block_size; + u32 dcache_ways; }; extern struct cpuinfo cpuinfo; diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index cb797a3beb47..dbf5ee95a0d5 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -117,13 +117,15 @@ static void print_cpuinfo(void) if (upr & SPR_UPR_DCP) printk(KERN_INFO "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo.dcache_size, cpuinfo.dcache_block_size, 1); + cpuinfo.dcache_size, cpuinfo.dcache_block_size, + cpuinfo.dcache_ways); else printk(KERN_INFO "-- dcache disabled\n"); if (upr & SPR_UPR_ICP) printk(KERN_INFO "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo.icache_size, cpuinfo.icache_block_size, 1); + cpuinfo.icache_size, cpuinfo.icache_block_size, + cpuinfo.icache_ways); else printk(KERN_INFO "-- icache disabled\n"); @@ -155,25 +157,25 @@ void __init setup_cpuinfo(void) { struct device_node *cpu; unsigned long iccfgr, dccfgr; - unsigned long cache_set_size, cache_ways; + unsigned long cache_set_size; cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481"); if (!cpu) panic("No compatible CPU found in device tree...\n"); iccfgr = mfspr(SPR_ICCFGR); - cache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); + cpuinfo.icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); cpuinfo.icache_size = - cache_set_size * cache_ways * cpuinfo.icache_block_size; + cache_set_size * cpuinfo.icache_ways * cpuinfo.icache_block_size; dccfgr = mfspr(SPR_DCCFGR); - cache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); + cpuinfo.dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); cpuinfo.dcache_size = - cache_set_size * cache_ways * cpuinfo.dcache_block_size; + cache_set_size * cpuinfo.dcache_ways * cpuinfo.dcache_block_size; if (of_property_read_u32(cpu, "clock-frequency", &cpuinfo.clock_frequency)) { @@ -308,30 +310,33 @@ static int show_cpuinfo(struct seq_file *m, void *v) revision = vr & SPR_VR_REV; seq_printf(m, - "cpu\t\t: OpenRISC-%x\n" - "revision\t: %d\n" - "frequency\t: %ld\n" - "dcache size\t: %d bytes\n" - "dcache block size\t: %d bytes\n" - "icache size\t: %d bytes\n" - "icache block size\t: %d bytes\n" - "immu\t\t: %d entries, %lu ways\n" - "dmmu\t\t: %d entries, %lu ways\n" - "bogomips\t: %lu.%02lu\n", - version, - revision, - loops_per_jiffy * HZ, - cpuinfo.dcache_size, - cpuinfo.dcache_block_size, - cpuinfo.icache_size, - cpuinfo.icache_block_size, - 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), - 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW), - 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2), - 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW), - (loops_per_jiffy * HZ) / 500000, - ((loops_per_jiffy * HZ) / 5000) % 100); - + "cpu\t\t: OpenRISC-%x\n" + "revision\t: %d\n" + "frequency\t: %ld\n" + "dcache size\t: %d bytes\n" + "dcache block size\t: %d bytes\n" + "dcache ways\t: %d\n" + "icache size\t: %d bytes\n" + "icache block size\t: %d bytes\n" + "icache ways\t: %d\n" + "immu\t\t: %d entries, %lu ways\n" + "dmmu\t\t: %d entries, %lu ways\n" + "bogomips\t: %lu.%02lu\n", + version, + revision, + loops_per_jiffy * HZ, + cpuinfo.dcache_size, + cpuinfo.dcache_block_size, + cpuinfo.dcache_ways, + cpuinfo.icache_size, + cpuinfo.icache_block_size, + cpuinfo.icache_ways, + 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), + 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW), + 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2), + 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW), + (loops_per_jiffy * HZ) / 500000, + ((loops_per_jiffy * HZ) / 5000) % 100); return 0; } -- cgit v1.2.3 From 742fb582b4540a7b59427dc67be1fe4a5eac7078 Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Thu, 1 Aug 2013 10:46:10 +0300 Subject: openrisc: tlb miss handler optimizations By slightly reorganizing the code, the number of registers used in the tlb miss handlers can be reduced by two, thus removing the need to save them to memory. Also, some dead and commented out code is removed. No functional change. Signed-off-by: Stefan Kristiansson Signed-off-by: Jonas Bonn Signed-off-by: Stafford Horne --- arch/openrisc/kernel/head.S | 141 +++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 95 deletions(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index f14793306b03..2346c5b39095 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -971,8 +971,6 @@ ENTRY(dtlb_miss_handler) EXCEPTION_STORE_GPR2 EXCEPTION_STORE_GPR3 EXCEPTION_STORE_GPR4 - EXCEPTION_STORE_GPR5 - EXCEPTION_STORE_GPR6 /* * get EA of the miss */ @@ -980,91 +978,70 @@ ENTRY(dtlb_miss_handler) /* * pmd = (pmd_t *)(current_pgd + pgd_index(daddr)); */ - GET_CURRENT_PGD(r3,r5) // r3 is current_pgd, r5 is temp + GET_CURRENT_PGD(r3,r4) // r3 is current_pgd, r4 is temp l.srli r4,r2,0x18 // >> PAGE_SHIFT + (PAGE_SHIFT - 2) l.slli r4,r4,0x2 // to get address << 2 - l.add r5,r4,r3 // r4 is pgd_index(daddr) + l.add r3,r4,r3 // r4 is pgd_index(daddr) /* * if (pmd_none(*pmd)) * goto pmd_none: */ - tophys (r4,r5) + tophys (r4,r3) l.lwz r3,0x0(r4) // get *pmd value l.sfne r3,r0 l.bnf d_pmd_none - l.andi r3,r3,~PAGE_MASK //0x1fff // ~PAGE_MASK - /* - * if (pmd_bad(*pmd)) - * pmd_clear(pmd) - * goto pmd_bad: - */ -// l.sfeq r3,r0 // check *pmd value -// l.bf d_pmd_good - l.addi r3,r0,0xffffe000 // PAGE_MASK -// l.j d_pmd_bad -// l.sw 0x0(r4),r0 // clear pmd + l.addi r3,r0,0xffffe000 // PAGE_MASK + d_pmd_good: /* * pte = *pte_offset(pmd, daddr); */ l.lwz r4,0x0(r4) // get **pmd value l.and r4,r4,r3 // & PAGE_MASK - l.srli r5,r2,0xd // >> PAGE_SHIFT, r2 == EEAR - l.andi r3,r5,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 + l.srli r2,r2,0xd // >> PAGE_SHIFT, r2 == EEAR + l.andi r3,r2,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 l.slli r3,r3,0x2 // to get address << 2 l.add r3,r3,r4 - l.lwz r2,0x0(r3) // this is pte at last + l.lwz r3,0x0(r3) // this is pte at last /* * if (!pte_present(pte)) */ - l.andi r4,r2,0x1 + l.andi r4,r3,0x1 l.sfne r4,r0 // is pte present l.bnf d_pte_not_present - l.addi r3,r0,0xffffe3fa // PAGE_MASK | DTLB_UP_CONVERT_MASK + l.addi r4,r0,0xffffe3fa // PAGE_MASK | DTLB_UP_CONVERT_MASK /* * fill DTLB TR register */ - l.and r4,r2,r3 // apply the mask + l.and r4,r3,r4 // apply the mask // Determine number of DMMU sets - l.mfspr r6, r0, SPR_DMMUCFGR - l.andi r6, r6, SPR_DMMUCFGR_NTS - l.srli r6, r6, SPR_DMMUCFGR_NTS_OFF + l.mfspr r2, r0, SPR_DMMUCFGR + l.andi r2, r2, SPR_DMMUCFGR_NTS + l.srli r2, r2, SPR_DMMUCFGR_NTS_OFF l.ori r3, r0, 0x1 - l.sll r3, r3, r6 // r3 = number DMMU sets DMMUCFGR - l.addi r6, r3, -1 // r6 = nsets mask - l.and r5, r5, r6 // calc offset: & (NUM_TLB_ENTRIES-1) + l.sll r3, r3, r2 // r3 = number DMMU sets DMMUCFGR + l.addi r2, r3, -1 // r2 = nsets mask + l.mfspr r3, r0, SPR_EEAR_BASE + l.srli r3, r3, 0xd // >> PAGE_SHIFT + l.and r2, r3, r2 // calc offset: & (NUM_TLB_ENTRIES-1) //NUM_TLB_ENTRIES - l.mtspr r5,r4,SPR_DTLBTR_BASE(0) + l.mtspr r2,r4,SPR_DTLBTR_BASE(0) /* * fill DTLB MR register */ - l.mfspr r2,r0,SPR_EEAR_BASE - l.addi r3,r0,0xffffe000 // PAGE_MASK - l.and r4,r2,r3 // apply PAGE_MASK to EA (__PHX__ do we really need this?) - l.ori r4,r4,0x1 // set hardware valid bit: DTBL_MR entry - l.mtspr r5,r4,SPR_DTLBMR_BASE(0) + l.slli r3, r3, 0xd /* << PAGE_SHIFT => EA & PAGE_MASK */ + l.ori r4,r3,0x1 // set hardware valid bit: DTBL_MR entry + l.mtspr r2,r4,SPR_DTLBMR_BASE(0) EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 - l.rfe -d_pmd_bad: - l.nop 1 - EXCEPTION_LOAD_GPR2 - EXCEPTION_LOAD_GPR3 - EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 l.rfe d_pmd_none: d_pte_not_present: EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 EXCEPTION_HANDLE(_dtlb_miss_page_fault_handler) /* ==============================================[ ITLB miss handler ]=== */ @@ -1072,8 +1049,6 @@ ENTRY(itlb_miss_handler) EXCEPTION_STORE_GPR2 EXCEPTION_STORE_GPR3 EXCEPTION_STORE_GPR4 - EXCEPTION_STORE_GPR5 - EXCEPTION_STORE_GPR6 /* * get EA of the miss */ @@ -1083,30 +1058,19 @@ ENTRY(itlb_miss_handler) * pmd = (pmd_t *)(current_pgd + pgd_index(daddr)); * */ - GET_CURRENT_PGD(r3,r5) // r3 is current_pgd, r5 is temp + GET_CURRENT_PGD(r3,r4) // r3 is current_pgd, r5 is temp l.srli r4,r2,0x18 // >> PAGE_SHIFT + (PAGE_SHIFT - 2) l.slli r4,r4,0x2 // to get address << 2 - l.add r5,r4,r3 // r4 is pgd_index(daddr) + l.add r3,r4,r3 // r4 is pgd_index(daddr) /* * if (pmd_none(*pmd)) * goto pmd_none: */ - tophys (r4,r5) + tophys (r4,r3) l.lwz r3,0x0(r4) // get *pmd value l.sfne r3,r0 l.bnf i_pmd_none - l.andi r3,r3,0x1fff // ~PAGE_MASK - /* - * if (pmd_bad(*pmd)) - * pmd_clear(pmd) - * goto pmd_bad: - */ - -// l.sfeq r3,r0 // check *pmd value -// l.bf i_pmd_good - l.addi r3,r0,0xffffe000 // PAGE_MASK -// l.j i_pmd_bad -// l.sw 0x0(r4),r0 // clear pmd + l.addi r3,r0,0xffffe000 // PAGE_MASK i_pmd_good: /* @@ -1115,35 +1079,36 @@ i_pmd_good: */ l.lwz r4,0x0(r4) // get **pmd value l.and r4,r4,r3 // & PAGE_MASK - l.srli r5,r2,0xd // >> PAGE_SHIFT, r2 == EEAR - l.andi r3,r5,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 + l.srli r2,r2,0xd // >> PAGE_SHIFT, r2 == EEAR + l.andi r3,r2,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 l.slli r3,r3,0x2 // to get address << 2 l.add r3,r3,r4 - l.lwz r2,0x0(r3) // this is pte at last + l.lwz r3,0x0(r3) // this is pte at last /* * if (!pte_present(pte)) * */ - l.andi r4,r2,0x1 + l.andi r4,r3,0x1 l.sfne r4,r0 // is pte present l.bnf i_pte_not_present - l.addi r3,r0,0xffffe03a // PAGE_MASK | ITLB_UP_CONVERT_MASK + l.addi r4,r0,0xffffe03a // PAGE_MASK | ITLB_UP_CONVERT_MASK /* * fill ITLB TR register */ - l.and r4,r2,r3 // apply the mask - l.andi r3,r2,0x7c0 // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE | _PAGE_URE | _PAGE_UWE -// l.andi r3,r2,0x400 // _PAGE_EXEC + l.and r4,r3,r4 // apply the mask + l.andi r3,r3,0x7c0 // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE | _PAGE_URE | _PAGE_UWE l.sfeq r3,r0 l.bf itlb_tr_fill //_workaround // Determine number of IMMU sets - l.mfspr r6, r0, SPR_IMMUCFGR - l.andi r6, r6, SPR_IMMUCFGR_NTS - l.srli r6, r6, SPR_IMMUCFGR_NTS_OFF + l.mfspr r2, r0, SPR_IMMUCFGR + l.andi r2, r2, SPR_IMMUCFGR_NTS + l.srli r2, r2, SPR_IMMUCFGR_NTS_OFF l.ori r3, r0, 0x1 - l.sll r3, r3, r6 // r3 = number IMMU sets IMMUCFGR - l.addi r6, r3, -1 // r6 = nsets mask - l.and r5, r5, r6 // calc offset: & (NUM_TLB_ENTRIES-1) + l.sll r3, r3, r2 // r3 = number IMMU sets IMMUCFGR + l.addi r2, r3, -1 // r2 = nsets mask + l.mfspr r3, r0, SPR_EEAR_BASE + l.srli r3, r3, 0xd // >> PAGE_SHIFT + l.and r2, r3, r2 // calc offset: & (NUM_TLB_ENTRIES-1) /* * __PHX__ :: fixme @@ -1155,38 +1120,24 @@ i_pmd_good: itlb_tr_fill_workaround: l.ori r4,r4,0xc0 // | (SPR_ITLBTR_UXE | ITLBTR_SXE) itlb_tr_fill: - l.mtspr r5,r4,SPR_ITLBTR_BASE(0) + l.mtspr r2,r4,SPR_ITLBTR_BASE(0) /* * fill DTLB MR register */ - l.mfspr r2,r0,SPR_EEAR_BASE - l.addi r3,r0,0xffffe000 // PAGE_MASK - l.and r4,r2,r3 // apply PAGE_MASK to EA (__PHX__ do we really need this?) - l.ori r4,r4,0x1 // set hardware valid bit: DTBL_MR entry - l.mtspr r5,r4,SPR_ITLBMR_BASE(0) + l.slli r3, r3, 0xd /* << PAGE_SHIFT => EA & PAGE_MASK */ + l.ori r4,r3,0x1 // set hardware valid bit: ITBL_MR entry + l.mtspr r2,r4,SPR_ITLBMR_BASE(0) EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 l.rfe -i_pmd_bad: - l.nop 1 - EXCEPTION_LOAD_GPR2 - EXCEPTION_LOAD_GPR3 - EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 - l.rfe i_pmd_none: i_pte_not_present: EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 EXCEPTION_HANDLE(_itlb_miss_page_fault_handler) /* ==============================================[ boot tlb handlers ]=== */ -- cgit v1.2.3 From c2dc72437a5504bf8f4343ed83ae745afa388522 Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Wed, 8 Jan 2014 13:15:05 +0200 Subject: openrisc: head: use THREAD_SIZE instead of magic constant The stack size was hard coded to 0x2000, use the standard THREAD_SIZE definition loaded from thread_info.h. Signed-off-by: Stefan Kristiansson [shorne@gmail.com: Added body to the commit message] Signed-off-by: Stafford Horne --- arch/openrisc/kernel/head.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 2346c5b39095..63ba2d96a860 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -486,7 +487,8 @@ _start: /* * set up initial ksp and current */ - LOAD_SYMBOL_2_GPR(r1,init_thread_union+0x2000) // setup kernel stack + /* setup kernel stack */ + LOAD_SYMBOL_2_GPR(r1,init_thread_union + THREAD_SIZE) LOAD_SYMBOL_2_GPR(r10,init_thread_union) // setup current tophys (r31,r10) l.sw TI_KSP(r31), r1 -- cgit v1.2.3 From 8c9b7db0de3d64c9a6fcd12622636d4aa6a8c30c Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Sat, 17 May 2014 19:18:58 +0300 Subject: openrisc: head: refactor out tlb flush into it's own function This brings it inline with the other setup oprations done like the cache enables _ic_enable and _dc_enable. Also, this is going to make it easier to initialize additional cpu's when smp is introduced. Signed-off-by: Stefan Kristiansson [shorne@gmail.com: Added commit body] Signed-off-by: Stafford Horne --- arch/openrisc/kernel/head.S | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 63ba2d96a860..a22f1fc40a6c 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -522,22 +522,8 @@ enable_dc: l.nop flush_tlb: - /* - * I N V A L I D A T E T L B e n t r i e s - */ - LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0)) - LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0)) - l.addi r7,r0,128 /* Maximum number of sets */ -1: - l.mtspr r5,r0,0x0 - l.mtspr r6,r0,0x0 - - l.addi r5,r5,1 - l.addi r6,r6,1 - l.sfeq r7,r0 - l.bnf 1b - l.addi r7,r7,-1 - + l.jal _flush_tlb + l.nop /* The MMU needs to be enabled before or32_early_setup is called */ @@ -629,6 +615,26 @@ jump_start_kernel: l.jr r30 l.nop +_flush_tlb: + /* + * I N V A L I D A T E T L B e n t r i e s + */ + LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0)) + LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0)) + l.addi r7,r0,128 /* Maximum number of sets */ +1: + l.mtspr r5,r0,0x0 + l.mtspr r6,r0,0x0 + + l.addi r5,r5,1 + l.addi r6,r6,1 + l.sfeq r7,r0 + l.bnf 1b + l.addi r7,r7,-1 + + l.jr r9 + l.nop + /* ========================================[ cache ]=== */ /* aligment here so we don't change memory offsets with -- cgit v1.2.3 From 63104c06a9eddf53f88f6d16196bb036c62967b2 Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Mon, 3 Nov 2014 14:28:14 +0200 Subject: openrisc: add l.lwa/l.swa emulation This adds an emulation layer for implementations that lack the l.lwa and l.swa instructions. It handles these instructions both in kernel space and user space. Signed-off-by: Stefan Kristiansson [shorne@gmail.com: Added delay slot pc adjust logic] Signed-off-by: Stafford Horne --- arch/openrisc/kernel/entry.S | 22 ++++- arch/openrisc/kernel/process.c | 3 + arch/openrisc/kernel/traps.c | 183 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 2 deletions(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index aac0bde3330c..ba1a361c9a1e 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -173,6 +173,11 @@ handler: ;\ l.j _ret_from_exception ;\ l.nop +/* clobbers 'reg' */ +#define CLEAR_LWA_FLAG(reg) \ + l.movhi reg,hi(lwa_flag) ;\ + l.ori reg,reg,lo(lwa_flag) ;\ + l.sw 0(reg),r0 /* * NOTE: one should never assume that SPR_EPC, SPR_ESR, SPR_EEAR * contain the same values as when exception we're handling @@ -193,6 +198,7 @@ EXCEPTION_ENTRY(_tng_kernel_start) /* ---[ 0x200: BUS exception ]------------------------------------------- */ EXCEPTION_ENTRY(_bus_fault_handler) + CLEAR_LWA_FLAG(r3) /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_bus_fault l.addi r3,r1,0 /* pt_regs */ @@ -202,11 +208,13 @@ EXCEPTION_ENTRY(_bus_fault_handler) /* ---[ 0x300: Data Page Fault exception ]------------------------------- */ EXCEPTION_ENTRY(_dtlb_miss_page_fault_handler) + CLEAR_LWA_FLAG(r3) l.and r5,r5,r0 l.j 1f l.nop EXCEPTION_ENTRY(_data_page_fault_handler) + CLEAR_LWA_FLAG(r3) /* set up parameters for do_page_fault */ l.ori r5,r0,0x300 // exception vector 1: @@ -282,11 +290,13 @@ EXCEPTION_ENTRY(_data_page_fault_handler) /* ---[ 0x400: Insn Page Fault exception ]------------------------------- */ EXCEPTION_ENTRY(_itlb_miss_page_fault_handler) + CLEAR_LWA_FLAG(r3) l.and r5,r5,r0 l.j 1f l.nop EXCEPTION_ENTRY(_insn_page_fault_handler) + CLEAR_LWA_FLAG(r3) /* set up parameters for do_page_fault */ l.ori r5,r0,0x400 // exception vector 1: @@ -304,6 +314,7 @@ EXCEPTION_ENTRY(_insn_page_fault_handler) /* ---[ 0x500: Timer exception ]----------------------------------------- */ EXCEPTION_ENTRY(_timer_handler) + CLEAR_LWA_FLAG(r3) l.jal timer_interrupt l.addi r3,r1,0 /* pt_regs */ @@ -313,6 +324,7 @@ EXCEPTION_ENTRY(_timer_handler) /* ---[ 0x600: Aligment exception ]-------------------------------------- */ EXCEPTION_ENTRY(_alignment_handler) + CLEAR_LWA_FLAG(r3) /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_unaligned_access l.addi r3,r1,0 /* pt_regs */ @@ -509,6 +521,7 @@ EXCEPTION_ENTRY(_external_irq_handler) // l.sw PT_SR(r1),r4 1: #endif + CLEAR_LWA_FLAG(r3) l.addi r3,r1,0 l.movhi r8,hi(do_IRQ) l.ori r8,r8,lo(do_IRQ) @@ -556,8 +569,12 @@ ENTRY(_sys_call_handler) * they should be clobbered, otherwise */ l.sw PT_GPR3(r1),r3 - /* r4 already saved */ - /* r4 holds the EEAR address of the fault, load the original r4 */ + /* + * r4 already saved + * r4 holds the EEAR address of the fault, use it as screatch reg and + * then load the original r4 + */ + CLEAR_LWA_FLAG(r4) l.lwz r4,PT_GPR4(r1) l.sw PT_GPR5(r1),r5 l.sw PT_GPR6(r1),r6 @@ -776,6 +793,7 @@ UNHANDLED_EXCEPTION(_vector_0xd00,0xd00) /* ---[ 0xe00: Trap exception ]------------------------------------------ */ EXCEPTION_ENTRY(_trap_handler) + CLEAR_LWA_FLAG(r3) /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_trap l.addi r3,r1,0 /* pt_regs */ diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index d7990df9025a..c49350b200e1 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -226,6 +226,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu) extern struct thread_info *_switch(struct thread_info *old_ti, struct thread_info *new_ti); +extern int lwa_flag; struct task_struct *__switch_to(struct task_struct *old, struct task_struct *new) @@ -243,6 +244,8 @@ struct task_struct *__switch_to(struct task_struct *old, new_ti = new->stack; old_ti = old->stack; + lwa_flag = 0; + current_thread_info_set[smp_processor_id()] = new_ti; last = (_switch(old_ti, new_ti))->task; diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index a4574cb4b0fb..7907b6cf5d8a 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -40,6 +40,8 @@ extern char _etext, _stext; int kstack_depth_to_print = 0x180; +int lwa_flag; +unsigned long __user *lwa_addr; static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { @@ -334,10 +336,191 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address) } } +static inline int in_delay_slot(struct pt_regs *regs) +{ +#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX + /* No delay slot flag, do the old way */ + unsigned int op, insn; + + insn = *((unsigned int *)regs->pc); + op = insn >> 26; + switch (op) { + case 0x00: /* l.j */ + case 0x01: /* l.jal */ + case 0x03: /* l.bnf */ + case 0x04: /* l.bf */ + case 0x11: /* l.jr */ + case 0x12: /* l.jalr */ + return 1; + default: + return 0; + } +#else + return regs->sr & SPR_SR_DSX; +#endif +} + +static inline void adjust_pc(struct pt_regs *regs, unsigned long address) +{ + int displacement; + unsigned int rb, op, jmp; + + if (unlikely(in_delay_slot(regs))) { + /* In delay slot, instruction at pc is a branch, simulate it */ + jmp = *((unsigned int *)regs->pc); + + displacement = sign_extend32(((jmp) & 0x3ffffff) << 2, 27); + rb = (jmp & 0x0000ffff) >> 11; + op = jmp >> 26; + + switch (op) { + case 0x00: /* l.j */ + regs->pc += displacement; + return; + case 0x01: /* l.jal */ + regs->pc += displacement; + regs->gpr[9] = regs->pc + 8; + return; + case 0x03: /* l.bnf */ + if (regs->sr & SPR_SR_F) + regs->pc += 8; + else + regs->pc += displacement; + return; + case 0x04: /* l.bf */ + if (regs->sr & SPR_SR_F) + regs->pc += displacement; + else + regs->pc += 8; + return; + case 0x11: /* l.jr */ + regs->pc = regs->gpr[rb]; + return; + case 0x12: /* l.jalr */ + regs->pc = regs->gpr[rb]; + regs->gpr[9] = regs->pc + 8; + return; + default: + break; + } + } else { + regs->pc += 4; + } +} + +static inline void simulate_lwa(struct pt_regs *regs, unsigned long address, + unsigned int insn) +{ + unsigned int ra, rd; + unsigned long value; + unsigned long orig_pc; + long imm; + + const struct exception_table_entry *entry; + + orig_pc = regs->pc; + adjust_pc(regs, address); + + ra = (insn >> 16) & 0x1f; + rd = (insn >> 21) & 0x1f; + imm = (short)insn; + lwa_addr = (unsigned long __user *)(regs->gpr[ra] + imm); + + if ((unsigned long)lwa_addr & 0x3) { + do_unaligned_access(regs, address); + return; + } + + if (get_user(value, lwa_addr)) { + if (user_mode(regs)) { + force_sig(SIGSEGV, current); + return; + } + + if ((entry = search_exception_tables(orig_pc))) { + regs->pc = entry->fixup; + return; + } + + /* kernel access in kernel space, load it directly */ + value = *((unsigned long *)lwa_addr); + } + + lwa_flag = 1; + regs->gpr[rd] = value; +} + +static inline void simulate_swa(struct pt_regs *regs, unsigned long address, + unsigned int insn) +{ + unsigned long __user *vaddr; + unsigned long orig_pc; + unsigned int ra, rb; + long imm; + + const struct exception_table_entry *entry; + + orig_pc = regs->pc; + adjust_pc(regs, address); + + ra = (insn >> 16) & 0x1f; + rb = (insn >> 11) & 0x1f; + imm = (short)(((insn & 0x2200000) >> 10) | (insn & 0x7ff)); + vaddr = (unsigned long __user *)(regs->gpr[ra] + imm); + + if (!lwa_flag || vaddr != lwa_addr) { + regs->sr &= ~SPR_SR_F; + return; + } + + if ((unsigned long)vaddr & 0x3) { + do_unaligned_access(regs, address); + return; + } + + if (put_user(regs->gpr[rb], vaddr)) { + if (user_mode(regs)) { + force_sig(SIGSEGV, current); + return; + } + + if ((entry = search_exception_tables(orig_pc))) { + regs->pc = entry->fixup; + return; + } + + /* kernel access in kernel space, store it directly */ + *((unsigned long *)vaddr) = regs->gpr[rb]; + } + + lwa_flag = 0; + regs->sr |= SPR_SR_F; +} + +#define INSN_LWA 0x1b +#define INSN_SWA 0x33 + asmlinkage void do_illegal_instruction(struct pt_regs *regs, unsigned long address) { siginfo_t info; + unsigned int op; + unsigned int insn = *((unsigned int *)address); + + op = insn >> 26; + + switch (op) { + case INSN_LWA: + simulate_lwa(regs, address, insn); + return; + + case INSN_SWA: + simulate_swa(regs, address, insn); + return; + + default: + break; + } if (user_mode(regs)) { /* Send a SIGILL */ -- cgit v1.2.3 From 0e9f9fd20ce931cc2cf0519c56ae33fc74d656ad Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Mon, 12 May 2014 14:08:26 +0300 Subject: openrisc: add atomic bitops This utilize the load-link/store-conditional l.lwa and l.swa instructions to implement the atomic bitops. When those instructions are not available emulation is provided. Acked-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Signed-off-by: Stefan Kristiansson [shorne@gmail.com: remove OPENRISC_HAVE_INST_LWA_SWA config suggesed by Alan Cox https://lkml.org/lkml/2014/7/23/666, implement test_and_change_bit] Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/bitops.h | 2 +- arch/openrisc/include/asm/bitops/atomic.h | 123 ++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 arch/openrisc/include/asm/bitops/atomic.h (limited to 'arch/openrisc') diff --git a/arch/openrisc/include/asm/bitops.h b/arch/openrisc/include/asm/bitops.h index 3003cdad561b..689f56819d53 100644 --- a/arch/openrisc/include/asm/bitops.h +++ b/arch/openrisc/include/asm/bitops.h @@ -45,7 +45,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/openrisc/include/asm/bitops/atomic.h b/arch/openrisc/include/asm/bitops/atomic.h new file mode 100644 index 000000000000..35fb85f61b4a --- /dev/null +++ b/arch/openrisc/include/asm/bitops/atomic.h @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_BITOPS_ATOMIC_H +#define __ASM_OPENRISC_BITOPS_ATOMIC_H + +static inline void set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%1) \n" + " l.or %0,%0,%2 \n" + " l.swa 0(%1),%0 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); +} + +static inline void clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%1) \n" + " l.and %0,%0,%2 \n" + " l.swa 0(%1),%0 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(tmp) + : "r"(p), "r"(~mask) + : "cc", "memory"); +} + +static inline void change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%1) \n" + " l.xor %0,%0,%2 \n" + " l.swa 0(%1),%0 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); +} + +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%2) \n" + " l.or %1,%0,%3 \n" + " l.swa 0(%2),%1 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%2) \n" + " l.and %1,%0,%3 \n" + " l.swa 0(%2),%1 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(~mask) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%2) \n" + " l.xor %1,%0,%3 \n" + " l.swa 0(%2),%1 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); + + return (old & mask) != 0; +} + +#endif /* __ASM_OPENRISC_BITOPS_ATOMIC_H */ -- cgit v1.2.3 From 11595172537788f0007bfc16590aab18f2b9c40f Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Tue, 13 May 2014 18:01:21 +0300 Subject: openrisc: add cmpxchg and xchg implementations Optimized version that make use of the l.lwa and l.swa atomic instruction pair. Most openrisc cores provide these instructions now, if not available emulation is provided. Acked-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Signed-off-by: Stefan Kristiansson [shorne@gmail.com: remove OPENRISC_HAVE_INST_LWA_SWA config suggesed by Alan Cox https://lkml.org/lkml/2014/7/23/666] [shorne@gmail.com: fixed unused calculated value compiler warning in define cmpxchg] Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/Kbuild | 2 - arch/openrisc/include/asm/cmpxchg.h | 83 +++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 arch/openrisc/include/asm/cmpxchg.h (limited to 'arch/openrisc') diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 6dd177d28c3b..15e6ed526453 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -10,8 +10,6 @@ generic-y += bugs.h generic-y += cacheflush.h generic-y += checksum.h generic-y += clkdev.h -generic-y += cmpxchg-local.h -generic-y += cmpxchg.h generic-y += cputime.h generic-y += current.h generic-y += device.h diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h new file mode 100644 index 000000000000..5fcb9ac72693 --- /dev/null +++ b/arch/openrisc/include/asm/cmpxchg.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_CMPXCHG_H +#define __ASM_OPENRISC_CMPXCHG_H + +#include + +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). + */ +extern void __cmpxchg_called_with_bad_pointer(void); + +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) +{ + if (size != 4) { + __cmpxchg_called_with_bad_pointer(); + return old; + } + + __asm__ __volatile__( + "1: l.lwa %0, 0(%1) \n" + " l.sfeq %0, %2 \n" + " l.bnf 2f \n" + " l.nop \n" + " l.swa 0(%1), %3 \n" + " l.bnf 1b \n" + " l.nop \n" + "2: \n" + : "=&r"(old) + : "r"(ptr), "r"(old), "r"(new) + : "cc", "memory"); + + return old; +} + +#define cmpxchg(ptr, o, n) \ + ({ \ + (__typeof__(*(ptr))) __cmpxchg((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ + }) + +/* + * This function doesn't exist, so you'll get a linker error if + * something tries to do an invalidly-sized xchg(). + */ +extern void __xchg_called_with_bad_pointer(void); + +static inline unsigned long __xchg(unsigned long val, volatile void *ptr, + int size) +{ + if (size != 4) { + __xchg_called_with_bad_pointer(); + return val; + } + + __asm__ __volatile__( + "1: l.lwa %0, 0(%1) \n" + " l.swa 0(%1), %2 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(val) + : "r"(ptr), "r"(val) + : "cc", "memory"); + + return val; +} + +#define xchg(ptr, with) \ + ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr)))) + +#endif /* __ASM_OPENRISC_CMPXCHG_H */ -- cgit v1.2.3 From bc19598f1dde267e5214e386b97bb647973275db Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Tue, 13 May 2014 22:30:56 +0300 Subject: openrisc: add optimized atomic operations Using the l.lwa and l.swa atomic instruction pair. Most openrisc processor cores provide these instructions now. If the instructions are not available emulation is provided. Acked-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Signed-off-by: Stefan Kristiansson [shorne@gmail.com: remove OPENRISC_HAVE_INST_LWA_SWA config suggesed by Alan Cox https://lkml.org/lkml/2014/7/23/666] [shorne@gmail.com: expand to implement all ops suggested by Peter Zijlstra https://lkml.org/lkml/2017/2/20/317] Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/Kbuild | 1 - arch/openrisc/include/asm/atomic.h | 126 +++++++++++++++++++++++++++++++++++++ include/asm-generic/atomic.h | 2 + 3 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 arch/openrisc/include/asm/atomic.h (limited to 'arch/openrisc') diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 15e6ed526453..1cedd6309fa6 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -1,7 +1,6 @@ header-y += ucontext.h -generic-y += atomic.h generic-y += auxvec.h generic-y += barrier.h generic-y += bitsperlong.h diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h new file mode 100644 index 000000000000..146e1660f00e --- /dev/null +++ b/arch/openrisc/include/asm/atomic.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_ATOMIC_H +#define __ASM_OPENRISC_ATOMIC_H + +#include + +/* Atomically perform op with v->counter and i */ +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + int tmp; \ + \ + __asm__ __volatile__( \ + "1: l.lwa %0,0(%1) \n" \ + " l." #op " %0,%0,%2 \n" \ + " l.swa 0(%1),%0 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + : "=&r"(tmp) \ + : "r"(&v->counter), "r"(i) \ + : "cc", "memory"); \ +} + +/* Atomically perform op with v->counter and i, return the result */ +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int tmp; \ + \ + __asm__ __volatile__( \ + "1: l.lwa %0,0(%1) \n" \ + " l." #op " %0,%0,%2 \n" \ + " l.swa 0(%1),%0 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + : "=&r"(tmp) \ + : "r"(&v->counter), "r"(i) \ + : "cc", "memory"); \ + \ + return tmp; \ +} + +/* Atomically perform op with v->counter and i, return orig v->counter */ +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int tmp, old; \ + \ + __asm__ __volatile__( \ + "1: l.lwa %0,0(%2) \n" \ + " l." #op " %1,%0,%3 \n" \ + " l.swa 0(%2),%1 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + : "=&r"(old), "=&r"(tmp) \ + : "r"(&v->counter), "r"(i) \ + : "cc", "memory"); \ + \ + return old; \ +} + +ATOMIC_OP_RETURN(add) +ATOMIC_OP_RETURN(sub) + +ATOMIC_FETCH_OP(add) +ATOMIC_FETCH_OP(sub) +ATOMIC_FETCH_OP(and) +ATOMIC_FETCH_OP(or) +ATOMIC_FETCH_OP(xor) + +ATOMIC_OP(and) +ATOMIC_OP(or) +ATOMIC_OP(xor) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +#define atomic_add_return atomic_add_return +#define atomic_sub_return atomic_sub_return +#define atomic_fetch_add atomic_fetch_add +#define atomic_fetch_sub atomic_fetch_sub +#define atomic_fetch_and atomic_fetch_and +#define atomic_fetch_or atomic_fetch_or +#define atomic_fetch_xor atomic_fetch_xor +#define atomic_and atomic_and +#define atomic_or atomic_or +#define atomic_xor atomic_xor + +/* + * Atomically add a to v->counter as long as v is not already u. + * Returns the original value at v->counter. + * + * This is often used through atomic_inc_not_zero() + */ +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int old, tmp; + + __asm__ __volatile__( + "1: l.lwa %0, 0(%2) \n" + " l.sfeq %0, %4 \n" + " l.bf 2f \n" + " l.add %1, %0, %3 \n" + " l.swa 0(%2), %1 \n" + " l.bnf 1b \n" + " l.nop \n" + "2: \n" + : "=&r"(old), "=&r" (tmp) + : "r"(&v->counter), "r"(a), "r"(u) + : "cc", "memory"); + + return old; +} +#define __atomic_add_unless __atomic_add_unless + +#include + +#endif /* __ASM_OPENRISC_ATOMIC_H */ diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 9ed8b987185b..3f38eb03649c 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -223,6 +223,7 @@ static inline void atomic_dec(atomic_t *v) #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) +#ifndef __atomic_add_unless static inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; @@ -231,5 +232,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) c = old; return c; } +#endif #endif /* __ASM_GENERIC_ATOMIC_H */ -- cgit v1.2.3 From 4ac46db1aaf16a009a8318cdba3acf03de85bcea Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Tue, 15 Jul 2014 18:00:05 +0300 Subject: openrisc: add futex_atomic_* implementations Support for the futex_atomic_* operations by using the load-link/store-conditional l.lwa/l.swa instructions. Most openrisc cores provide these instructions now if not available, emulation is provided. Acked-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Signed-off-by: Stefan Kristiansson [shorne@gmail.com: remove OPENRISC_HAVE_INST_LWA_SWA config suggesed by Alan Cox https://lkml.org/lkml/2014/7/23/666] Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/Kbuild | 1 - arch/openrisc/include/asm/futex.h | 135 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 arch/openrisc/include/asm/futex.h (limited to 'arch/openrisc') diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 1cedd6309fa6..a669c141436e 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -20,7 +20,6 @@ generic-y += exec.h generic-y += fb.h generic-y += fcntl.h generic-y += ftrace.h -generic-y += futex.h generic-y += hardirq.h generic-y += hw_irq.h generic-y += ioctl.h diff --git a/arch/openrisc/include/asm/futex.h b/arch/openrisc/include/asm/futex.h new file mode 100644 index 000000000000..778087341977 --- /dev/null +++ b/arch/openrisc/include/asm/futex.h @@ -0,0 +1,135 @@ +#ifndef __ASM_OPENRISC_FUTEX_H +#define __ASM_OPENRISC_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +({ \ + __asm__ __volatile__ ( \ + "1: l.lwa %0, %2 \n" \ + insn "\n" \ + "2: l.swa %2, %1 \n" \ + " l.bnf 1b \n" \ + " l.ori %1, r0, 0 \n" \ + "3: \n" \ + ".section .fixup,\"ax\" \n" \ + "4: l.j 3b \n" \ + " l.addi %1, r0, %3 \n" \ + ".previous \n" \ + ".section __ex_table,\"a\" \n" \ + ".word 1b,4b,2b,4b \n" \ + ".previous \n" \ + : "=&r" (oldval), "=&r" (ret), "+m" (*uaddr) \ + : "i" (-EFAULT), "r" (oparg) \ + : "cc", "memory" \ + ); \ +}) + +static inline int +futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("l.or %1,%4,%4", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("l.add %1,%0,%4", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("l.or %1,%0,%4", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("l.and %1,%0,%4", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("l.xor %1,%0,%4", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: + ret = (oldval == cmparg); + break; + case FUTEX_OP_CMP_NE: + ret = (oldval != cmparg); + break; + case FUTEX_OP_CMP_LT: + ret = (oldval < cmparg); + break; + case FUTEX_OP_CMP_GE: + ret = (oldval >= cmparg); + break; + case FUTEX_OP_CMP_LE: + ret = (oldval <= cmparg); + break; + case FUTEX_OP_CMP_GT: + ret = (oldval > cmparg); + break; + default: + ret = -ENOSYS; + } + } + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret = 0; + u32 prev; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + __asm__ __volatile__ ( \ + "1: l.lwa %1, %2 \n" \ + " l.sfeq %1, %3 \n" \ + " l.bnf 3f \n" \ + " l.nop \n" \ + "2: l.swa %2, %4 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + "3: \n" \ + ".section .fixup,\"ax\" \n" \ + "4: l.j 3b \n" \ + " l.addi %0, r0, %5 \n" \ + ".previous \n" \ + ".section __ex_table,\"a\" \n" \ + ".word 1b,4b,2b,4b \n" \ + ".previous \n" \ + : "+r" (ret), "=&r" (prev), "+m" (*uaddr) \ + : "r" (oldval), "r" (newval), "i" (-EFAULT) \ + : "cc", "memory" \ + ); + + *uval = prev; + return ret; +} + +#endif /* __KERNEL__ */ + +#endif /* __ASM_OPENRISC_FUTEX_H */ -- cgit v1.2.3 From 89c94fdb9718d8ff3243faf8133a2176fd91f207 Mon Sep 17 00:00:00 2001 From: Stefan Kristiansson Date: Sun, 20 Jul 2014 01:00:31 +0300 Subject: openrisc: remove unnecessary stddef.h include This causes the build to fail when building with the or1k-musl-linux- toolchain and it is not needed. Signed-off-by: Stafford Horne --- arch/openrisc/kernel/ptrace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c index 4f59fa4e34e5..228288887d74 100644 --- a/arch/openrisc/kernel/ptrace.c +++ b/arch/openrisc/kernel/ptrace.c @@ -16,7 +16,6 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include #include -- cgit v1.2.3 From 79f8a4d022f2d11d4fc90f785f575ac331e2ef71 Mon Sep 17 00:00:00 2001 From: Sebastian Macke Date: Sun, 20 Jul 2014 15:13:30 +0000 Subject: openrisc: Fix the bitmask for the unit present register The bits were swapped, as per spec and processor implementation the power management present bit is 9 and PIC bit is 8. This patch brings the definitions into spec. Signed-off-by: Sebastian Macke [shorne@gmail.com: Added commit body] Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/spr_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/include/asm/spr_defs.h b/arch/openrisc/include/asm/spr_defs.h index 5dbc668865c4..367dac70326a 100644 --- a/arch/openrisc/include/asm/spr_defs.h +++ b/arch/openrisc/include/asm/spr_defs.h @@ -152,8 +152,8 @@ #define SPR_UPR_MP 0x00000020 /* MAC present */ #define SPR_UPR_DUP 0x00000040 /* Debug unit present */ #define SPR_UPR_PCUP 0x00000080 /* Performance counters unit present */ -#define SPR_UPR_PMP 0x00000100 /* Power management present */ -#define SPR_UPR_PICP 0x00000200 /* PIC present */ +#define SPR_UPR_PICP 0x00000100 /* PIC present */ +#define SPR_UPR_PMP 0x00000200 /* Power management present */ #define SPR_UPR_TTP 0x00000400 /* Tick timer present */ #define SPR_UPR_RES 0x00fe0000 /* Reserved */ #define SPR_UPR_CUP 0xff000000 /* Context units present */ -- cgit v1.2.3 From e29d11c6991d078bf2011f25dc8c19a2f38c4c8e Mon Sep 17 00:00:00 2001 From: Sebastian Macke Date: Sun, 20 Jul 2014 15:13:31 +0000 Subject: openrisc: Initial support for the idle state This patch adds basic support for the idle state of the cpu. The patch overrides the regular idle function, enables the interupts, checks for the power management unit and enables the cpu doze mode if available. Signed-off-by: Sebastian Macke [shorne@gmail.com: Fixed checkpatch, blankline after declarations] Signed-off-by: Stafford Horne --- arch/openrisc/kernel/process.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index c49350b200e1..6e9d1cb519f2 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -75,6 +75,17 @@ void machine_power_off(void) __asm__("l.nop 1"); } +/* + * Send the doze signal to the cpu if available. + * Make sure, that all interrupts are enabled + */ +void arch_cpu_idle(void) +{ + local_irq_enable(); + if (mfspr(SPR_UPR) & SPR_UPR_PMP) + mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); +} + void (*pm_power_off) (void) = machine_power_off; /* -- cgit v1.2.3 From d857a1e253498feb231173218df26f5562c70f09 Mon Sep 17 00:00:00 2001 From: Olof Kindgren Date: Fri, 6 Feb 2015 13:41:51 +0100 Subject: openrisc: Add optimized memset This adds a hand-optimized assembler version of memset and sets __HAVE_ARCH_MEMSET to use this version instead of the generic C routine Signed-off-by: Olof Kindgren Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/string.h | 7 +++ arch/openrisc/kernel/or32_ksyms.c | 1 + arch/openrisc/lib/Makefile | 2 +- arch/openrisc/lib/memset.S | 98 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 arch/openrisc/include/asm/string.h create mode 100644 arch/openrisc/lib/memset.S (limited to 'arch/openrisc') diff --git a/arch/openrisc/include/asm/string.h b/arch/openrisc/include/asm/string.h new file mode 100644 index 000000000000..33470d4d6948 --- /dev/null +++ b/arch/openrisc/include/asm/string.h @@ -0,0 +1,7 @@ +#ifndef __ASM_OPENRISC_STRING_H +#define __ASM_OPENRISC_STRING_H + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *s, int c, __kernel_size_t n); + +#endif /* __ASM_OPENRISC_STRING_H */ diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c index 86e31cf1de1d..5c4695d13542 100644 --- a/arch/openrisc/kernel/or32_ksyms.c +++ b/arch/openrisc/kernel/or32_ksyms.c @@ -44,3 +44,4 @@ DECLARE_EXPORT(__ashldi3); DECLARE_EXPORT(__lshrdi3); EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(memset); diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile index 966f65dbc6f0..67c583e0617f 100644 --- a/arch/openrisc/lib/Makefile +++ b/arch/openrisc/lib/Makefile @@ -2,4 +2,4 @@ # Makefile for or32 specific library files.. # -obj-y = string.o delay.o +obj-y = memset.o string.o delay.o diff --git a/arch/openrisc/lib/memset.S b/arch/openrisc/lib/memset.S new file mode 100644 index 000000000000..92cc2eac26fa --- /dev/null +++ b/arch/openrisc/lib/memset.S @@ -0,0 +1,98 @@ +/* + * OpenRISC memset.S + * + * Hand-optimized assembler version of memset for OpenRISC. + * Algorithm inspired by several other arch-specific memset routines + * in the kernel tree + * + * Copyright (C) 2015 Olof Kindgren + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + .global memset + .type memset, @function +memset: + /* arguments: + * r3 = *s + * r4 = c + * r5 = n + * r13, r15, r17, r19 used as temp regs + */ + + /* Exit if n == 0 */ + l.sfeqi r5, 0 + l.bf 4f + + /* Truncate c to char */ + l.andi r13, r4, 0xff + + /* Skip word extension if c is 0 */ + l.sfeqi r13, 0 + l.bf 1f + /* Check for at least two whole words (8 bytes) */ + l.sfleui r5, 7 + + /* Extend char c to 32-bit word cccc in r13 */ + l.slli r15, r13, 16 // r13 = 000c, r15 = 0c00 + l.or r13, r13, r15 // r13 = 0c0c, r15 = 0c00 + l.slli r15, r13, 8 // r13 = 0c0c, r15 = c0c0 + l.or r13, r13, r15 // r13 = cccc, r15 = c0c0 + +1: l.addi r19, r3, 0 // Set r19 = src + /* Jump to byte copy loop if less than two words */ + l.bf 3f + l.or r17, r5, r0 // Set r17 = n + + /* Mask out two LSBs to check alignment */ + l.andi r15, r3, 0x3 + + /* lsb == 00, jump to word copy loop */ + l.sfeqi r15, 0 + l.bf 2f + l.addi r19, r3, 0 // Set r19 = src + + /* lsb == 01,10 or 11 */ + l.sb 0(r3), r13 // *src = c + l.addi r17, r17, -1 // Decrease n + + l.sfeqi r15, 3 + l.bf 2f + l.addi r19, r3, 1 // src += 1 + + /* lsb == 01 or 10 */ + l.sb 1(r3), r13 // *(src+1) = c + l.addi r17, r17, -1 // Decrease n + + l.sfeqi r15, 2 + l.bf 2f + l.addi r19, r3, 2 // src += 2 + + /* lsb == 01 */ + l.sb 2(r3), r13 // *(src+2) = c + l.addi r17, r17, -1 // Decrease n + l.addi r19, r3, 3 // src += 3 + + /* Word copy loop */ +2: l.sw 0(r19), r13 // *src = cccc + l.addi r17, r17, -4 // Decrease n + l.sfgeui r17, 4 + l.bf 2b + l.addi r19, r19, 4 // Increase src + + /* When n > 0, copy the remaining bytes, otherwise jump to exit */ + l.sfeqi r17, 0 + l.bf 4f + + /* Byte copy loop */ +3: l.addi r17, r17, -1 // Decrease n + l.sb 0(r19), r13 // *src = cccc + l.sfnei r17, 0 + l.bf 3b + l.addi r19, r19, 1 // Increase src + +4: l.jr r9 + l.ori r11, r3, 0 -- cgit v1.2.3 From f5d45dc9116b17ee830d3425ece1e9485c9bab88 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 21 Mar 2016 16:16:46 +0900 Subject: openrisc: Add optimized memcpy routine The generic memcpy routine provided in kernel does only byte copies. Using word copies we can lower boot time and cycles spend in memcpy quite significantly. Booting on my de0 nano I see boot times go from 7.2 to 5.6 seconds. The avg cycles in memcpy during boot go from 6467 to 1887. I tested several algorithms (see code in previous patch mails) The implementations I tested and avg cycles: - Word Copies + Loop Unrolls + Non Aligned 1882 - Word Copies + Loop Unrolls 1887 - Word Copies 2441 - Byte Copies + Loop Unrolls 6467 - Byte Copies 7600 In the end I ended up going with Word Copies + Loop Unrolls as it provides best tradeoff between simplicity and boot speedups. Signed-off-by: Stafford Horne --- arch/openrisc/TODO.openrisc | 1 - arch/openrisc/include/asm/string.h | 3 + arch/openrisc/lib/Makefile | 2 +- arch/openrisc/lib/memcpy.c | 124 +++++++++++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 arch/openrisc/lib/memcpy.c (limited to 'arch/openrisc') diff --git a/arch/openrisc/TODO.openrisc b/arch/openrisc/TODO.openrisc index 0eb04c8240f9..c43d4e1d14eb 100644 --- a/arch/openrisc/TODO.openrisc +++ b/arch/openrisc/TODO.openrisc @@ -10,4 +10,3 @@ that are due for investigation shortly, i.e. our TODO list: or1k and this change is slowly trickling through the stack. For the time being, or32 is equivalent to or1k. --- Implement optimized version of memcpy and memset diff --git a/arch/openrisc/include/asm/string.h b/arch/openrisc/include/asm/string.h index 33470d4d6948..64939ccd7531 100644 --- a/arch/openrisc/include/asm/string.h +++ b/arch/openrisc/include/asm/string.h @@ -4,4 +4,7 @@ #define __HAVE_ARCH_MEMSET extern void *memset(void *s, int c, __kernel_size_t n); +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *dest, __const void *src, __kernel_size_t n); + #endif /* __ASM_OPENRISC_STRING_H */ diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile index 67c583e0617f..17d9d37f32d2 100644 --- a/arch/openrisc/lib/Makefile +++ b/arch/openrisc/lib/Makefile @@ -2,4 +2,4 @@ # Makefile for or32 specific library files.. # -obj-y = memset.o string.o delay.o +obj-y := delay.o string.o memset.o memcpy.o diff --git a/arch/openrisc/lib/memcpy.c b/arch/openrisc/lib/memcpy.c new file mode 100644 index 000000000000..4706f01a199a --- /dev/null +++ b/arch/openrisc/lib/memcpy.c @@ -0,0 +1,124 @@ +/* + * arch/openrisc/lib/memcpy.c + * + * Optimized memory copy routines for openrisc. These are mostly copied + * from ohter sources but slightly entended based on ideas discuassed in + * #openrisc. + * + * The word unroll implementation is an extension to the arm byte + * unrolled implementation, but using word copies (if things are + * properly aligned) + * + * The great arm loop unroll algorithm can be found at: + * arch/arm/boot/compressed/string.c + */ + +#include + +#include + +#ifdef CONFIG_OR1200 +/* + * Do memcpy with word copies and loop unrolling. This gives the + * best performance on the OR1200 and MOR1KX archirectures + */ +void *memcpy(void *dest, __const void *src, __kernel_size_t n) +{ + int i = 0; + unsigned char *d, *s; + uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; + + /* If both source and dest are word aligned copy words */ + if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { + /* Copy 32 bytes per loop */ + for (i = n >> 5; i > 0; i--) { + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + } + + if (n & 1 << 4) { + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + } + + if (n & 1 << 3) { + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + } + + if (n & 1 << 2) + *dest_w++ = *src_w++; + + d = (unsigned char *)dest_w; + s = (unsigned char *)src_w; + + } else { + d = (unsigned char *)dest_w; + s = (unsigned char *)src_w; + + for (i = n >> 3; i > 0; i--) { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + + if (n & 1 << 2) { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + } + + if (n & 1 << 1) { + *d++ = *s++; + *d++ = *s++; + } + + if (n & 1) + *d++ = *s++; + + return dest; +} +#else +/* + * Use word copies but no loop unrolling as we cannot assume there + * will be benefits on the archirecture + */ +void *memcpy(void *dest, __const void *src, __kernel_size_t n) +{ + unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src; + uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; + + /* If both source and dest are word aligned copy words */ + if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { + for (; n >= 4; n -= 4) + *dest_w++ = *src_w++; + } + + d = (unsigned char *)dest_w; + s = (unsigned char *)src_w; + + /* For remaining or if not aligned, copy bytes */ + for (; n >= 1; n -= 1) + *d++ = *s++; + + return dest; + +} +#endif + +EXPORT_SYMBOL(memcpy); -- cgit v1.2.3 From f52092831b0713e1e2b934fe08460f1348f31888 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sun, 18 Dec 2016 01:13:34 +0900 Subject: openrisc: Add .gitignore This helps to suppress the vmlinux.lds file. Signed-off-by: Stafford Horne --- arch/openrisc/kernel/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/openrisc/kernel/.gitignore (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/.gitignore b/arch/openrisc/kernel/.gitignore new file mode 100644 index 000000000000..c5f676c3c224 --- /dev/null +++ b/arch/openrisc/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds -- cgit v1.2.3 From 2ead7aba44e973b313fde0e39371363459cc1128 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 10 Jan 2017 23:13:45 +0900 Subject: openrisc: entry: Whitespace and comment cleanups Cleanups to whitespace and add some comments. Reading through the delay slot logic I noticed some things: - Delay slot instructions were not indented - Some comments are not lined up - Use tabs and spaces consistent with other code No functional change Signed-off-by: Stafford Horne --- arch/openrisc/kernel/entry.S | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index ba1a361c9a1e..daae2a423c3a 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -228,7 +228,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler) * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part */ #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX - l.lwz r6,PT_PC(r3) // address of an offending insn + l.lwz r6,PT_PC(r3) // address of an offending insn l.lwz r6,0(r6) // instruction that caused pf l.srli r6,r6,26 // check opcode for jump insn @@ -244,49 +244,47 @@ EXCEPTION_ENTRY(_data_page_fault_handler) l.bf 8f l.sfeqi r6,0x12 // l.jalr l.bf 8f - - l.nop + l.nop l.j 9f - l.nop -8: + l.nop - l.lwz r6,PT_PC(r3) // address of an offending insn +8: // offending insn is in delay slot + l.lwz r6,PT_PC(r3) // address of an offending insn l.addi r6,r6,4 l.lwz r6,0(r6) // instruction that caused pf l.srli r6,r6,26 // get opcode -9: +9: // offending instruction opcode loaded in r6 #else - l.mfspr r6,r0,SPR_SR // SR -// l.lwz r6,PT_SR(r3) // ESR - l.andi r6,r6,SPR_SR_DSX // check for delay slot exception - l.sfeqi r6,0x1 // exception happened in delay slot - l.bnf 7f - l.lwz r6,PT_PC(r3) // address of an offending insn + l.mfspr r6,r0,SPR_SR // SR + l.andi r6,r6,SPR_SR_DSX // check for delay slot exception + l.sfeqi r6,0x1 // exception happened in delay slot + l.bnf 7f + l.lwz r6,PT_PC(r3) // address of an offending insn - l.addi r6,r6,4 // offending insn is in delay slot + l.addi r6,r6,4 // offending insn is in delay slot 7: l.lwz r6,0(r6) // instruction that caused pf l.srli r6,r6,26 // check opcode for write access #endif - l.sfgeui r6,0x33 // check opcode for write access + l.sfgeui r6,0x33 // check opcode for write access l.bnf 1f l.sfleui r6,0x37 l.bnf 1f l.ori r6,r0,0x1 // write access l.j 2f - l.nop + l.nop 1: l.ori r6,r0,0x0 // !write access 2: /* call fault.c handler in or32/mm/fault.c */ l.jal do_page_fault - l.nop + l.nop l.j _ret_from_exception - l.nop + l.nop /* ---[ 0x400: Insn Page Fault exception ]------------------------------- */ EXCEPTION_ENTRY(_itlb_miss_page_fault_handler) @@ -306,9 +304,9 @@ EXCEPTION_ENTRY(_insn_page_fault_handler) /* call fault.c handler in or32/mm/fault.c */ l.jal do_page_fault - l.nop + l.nop l.j _ret_from_exception - l.nop + l.nop /* ---[ 0x500: Timer exception ]----------------------------------------- */ -- cgit v1.2.3 From e6d20c55a4d94eca419f80f996133f523ecedfe0 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 10 Jan 2017 23:30:23 +0900 Subject: openrisc: entry: Fix delay slot detection Use execption SR stored in pt_regs for detection, the current SR is not correct as the handler is running after return from exception. Also, The code that checks for a delay slot uses a flag bitmask and then wants to check if the result is not zero. The test it implemented was wrong. Correct it by changing the test to check result against non zero. Signed-off-by: Stafford Horne --- arch/openrisc/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index daae2a423c3a..bc6500860f4d 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -258,9 +258,9 @@ EXCEPTION_ENTRY(_data_page_fault_handler) #else - l.mfspr r6,r0,SPR_SR // SR + l.lwz r6,PT_SR(r3) // SR l.andi r6,r6,SPR_SR_DSX // check for delay slot exception - l.sfeqi r6,0x1 // exception happened in delay slot + l.sfne r6,r0 // exception happened in delay slot l.bnf 7f l.lwz r6,PT_PC(r3) // address of an offending insn -- cgit v1.2.3 From da99f00ece82cb51ca5ead97008658e1dc49782b Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Fri, 13 Jan 2017 19:22:04 +0900 Subject: openrisc: head: Move init strings to rodata section The strings used during the head/init phase of openrisc bootup were stored in the executable section of the binary. This causes compilation to fail when using CONFIG_DEBUG_INFO with error: Error: unaligned opcodes detected in executable segment Signed-off-by: Stafford Horne --- arch/openrisc/kernel/head.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index a22f1fc40a6c..ceb2d536fc77 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -1530,6 +1530,7 @@ ENTRY(_early_uart_init) l.jr r9 l.nop + .section .rodata _string_copying_linux: .string "\n\n\n\n\n\rCopying Linux... \0" -- cgit v1.2.3 From 0ccffdf5e114176c14cb4aea584f77794a734865 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sat, 25 Feb 2017 05:06:22 +0900 Subject: openrisc: head: Remove unused strings These string definitions are no longer used removed them. Noticed this while working on a CONFIG_DEBUG_INFO build issue. Signed-off-by: Stafford Horne --- arch/openrisc/kernel/head.S | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index ceb2d536fc77..fe55156983e0 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -1531,12 +1531,6 @@ ENTRY(_early_uart_init) l.nop .section .rodata -_string_copying_linux: - .string "\n\n\n\n\n\rCopying Linux... \0" - -_string_ok_booting: - .string "Ok, booting the kernel.\n\r\0" - _string_unhandled_exception: .string "\n\rRunarunaround: Unhandled exception 0x\0" @@ -1546,11 +1540,6 @@ _string_epc_prefix: _string_nl: .string "\n\r\0" - .global _string_esr_irq_bug -_string_esr_irq_bug: - .string "\n\rESR external interrupt bug, for details look into entry.S\n\r\0" - - /* ========================================[ page aligned structures ]=== */ -- cgit v1.2.3 From e34f671d1cf8a032a8cd248a12eef217f1789c4c Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Tue, 17 Jan 2017 09:12:48 +0100 Subject: arch/openrisc/lib/memcpy.c: use correct OR1200 option The Kconfig option for OR12000 is OR1K_1200. Signed-off-by: Valentin Rothberg Signed-off-by: Stafford Horne --- arch/openrisc/lib/memcpy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/lib/memcpy.c b/arch/openrisc/lib/memcpy.c index 4706f01a199a..669887a60e27 100644 --- a/arch/openrisc/lib/memcpy.c +++ b/arch/openrisc/lib/memcpy.c @@ -17,7 +17,7 @@ #include -#ifdef CONFIG_OR1200 +#ifdef CONFIG_OR1K_1200 /* * Do memcpy with word copies and loop unrolling. This gives the * best performance on the OR1200 and MOR1KX archirectures -- cgit v1.2.3 From a0eba4f7ebcdf81da0a4480d8fc514af4f76579e Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sun, 5 Feb 2017 15:32:26 +0900 Subject: openrisc: Export ioremap symbols used by modules Noticed this when building with allyesconfig. Got build failures due to iounmap and __ioremap symbols missing. This patch exports them so modules can use them. This is inline with other architectures. Signed-off-by: Stafford Horne --- arch/openrisc/mm/ioremap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/openrisc') diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 8705a46218f9..2175e4bfd9fc 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -80,6 +80,7 @@ __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot) return (void __iomem *)(offset + (char *)v); } +EXPORT_SYMBOL(__ioremap); void iounmap(void *addr) { @@ -106,6 +107,7 @@ void iounmap(void *addr) return vfree((void *)(PAGE_MASK & (unsigned long)addr)); } +EXPORT_SYMBOL(iounmap); /** * OK, this one's a bit tricky... ioremap can get called before memory is -- cgit v1.2.3 From a4d4426635804379d618dd28e29f574a2bc11184 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sun, 5 Feb 2017 18:55:40 +0900 Subject: openrisc: head: Init r0 to 0 on start Originally openrisc spec 0 specified that r0 would be wired to ground. This is no longer the case. r0 is not guaranteed to be 0 at init, so we need to initialize it to 0 before using it. Also, if we are clearing r0 we cant use r0 to clear itself. Change the the CLEAR_GPR macro to use movhi for clearing. Reported-by: Jakob Viketoft Signed-off-by: Stafford Horne --- arch/openrisc/kernel/head.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/openrisc') diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index fe55156983e0..d01b82eace3e 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -35,7 +35,7 @@ l.add rd,rd,rs #define CLEAR_GPR(gpr) \ - l.or gpr,r0,r0 + l.movhi gpr,0x0 #define LOAD_SYMBOL_2_GPR(gpr,symbol) \ l.movhi gpr,hi(symbol) ;\ @@ -443,6 +443,9 @@ _dispatch_do_ipage_fault: __HEAD .global _start _start: + /* Init r0 to zero as per spec */ + CLEAR_GPR(r0) + /* save kernel parameters */ l.or r25,r0,r3 /* pointer to fdt */ -- cgit v1.2.3