diff options
Diffstat (limited to 'arch/powerpc/kernel')
27 files changed, 797 insertions, 540 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2da380fcc34c..fe4c075bcf50 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,12 +42,11 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o -obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o -obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o +obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o @@ -87,7 +86,7 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds -obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o +obj-$(CONFIG_RELOCATABLE) += reloc_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index ec8a228df2f6..52ff3f025437 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -99,6 +99,7 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH + ori r3, r3, LPCR_HVICE bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -118,6 +119,7 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH + ori r3, r3, LPCR_HVICE bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index eeeacf6235a3..d81f826d1029 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -137,7 +137,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (gp)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4, + .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -152,7 +152,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4+ (gq)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4, + .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 888bdf198c3e..47b63de81f9b 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -351,7 +351,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; crash_shutdown_cpu = smp_processor_id(); - for (i = 0; crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX; i++) { + for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* * Insert syncs and delay to ensure diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index ddbcfab7efdf..d4cc26618809 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -114,9 +114,9 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) while (n) { struct pci_io_addr_range *piar; piar = rb_entry(n, struct pci_io_addr_range, rb_node); - pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n", + pr_debug("PCI: %s addr range %d [%pap-%pap]: %s\n", (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, - piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); + &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); cnt++; n = rb_next(n); } @@ -159,8 +159,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo, piar->flags = flags; #ifdef DEBUG - pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n", - alo, ahi, pci_name(dev)); + pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n", + &alo, &ahi, pci_name(dev)); #endif rb_link_node(&piar->rb_node, parent, p); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 389b0d3988dc..5f36e8a70daa 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -642,13 +642,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (pe->type & EEH_PE_VF) { eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_lock_rescan_remove(); pci_hp_remove_devices(bus); pci_unlock_rescan_remove(); } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); + eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } /* @@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, */ edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - if (pe->type & EEH_PE_VF) + if (pe->type & EEH_PE_VF) { eeh_add_virt_device(edev, NULL); - else + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); pci_hp_add_devices(bus); + } } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4c9440629128..6200e4925d26 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -107,25 +107,9 @@ BEGIN_FTR_SECTION beq 9f cmpwi cr3,r13,2 - - /* - * Check if last bit of HSPGR0 is set. This indicates whether we are - * waking up from winkle. - */ GET_PACA(r13) - clrldi r5,r13,63 - clrrdi r13,r13,1 - cmpwi cr4,r5,1 - mtspr SPRN_HSPRG0,r13 - - lbz r0,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,8f /* Either sleep or Winkle */ - - /* Waking up from nap should not cause hypervisor state loss */ - bgt cr3,. + bl pnv_restore_hyp_resource - /* Waking up from nap */ li r0,PNV_THREAD_RUNNING stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ @@ -143,13 +127,9 @@ BEGIN_FTR_SECTION /* Return SRR1 from power7_nap() */ mfspr r3,SPRN_SRR1 - beq cr3,2f - b power7_wakeup_noloss -2: b power7_wakeup_loss - - /* Fast Sleep wakeup on PowerNV */ -8: GET_PACA(r13) - b power7_wakeup_tb_loss + blt cr3,2f + b pnv_wakeup_loss +2: b pnv_wakeup_noloss 9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) @@ -351,6 +331,12 @@ hv_doorbell_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_hv + . = 0xea0 +hv_virt_irq_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b h_virt_irq_hv + /* We need to deal with the Altivec unavailable exception * here which is at 0xf20, thus in the middle of the * prolog code of the PerformanceMonitor one. A little @@ -601,6 +587,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) + MASKABLE_EXCEPTION_HV_OOL(0xea2, h_virt_irq) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xea2) + /* moved from 0xf00 */ STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) @@ -680,6 +669,8 @@ _GLOBAL(__replay_interrupt) BEGIN_FTR_SECTION cmpwi r3,0xe80 beq h_doorbell_common + cmpwi r3,0xea0 + beq h_virt_irq_common FTR_SECTION_ELSE cmpwi r3,0xa00 beq doorbell_super_common @@ -754,6 +745,7 @@ kvmppc_skip_Hinterrupt: #else STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception) #endif + STD_EXCEPTION_COMMON_ASYNC(0xea0, h_virt_irq, do_IRQ) STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception) STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception) @@ -762,11 +754,6 @@ kvmppc_skip_Hinterrupt: #else STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception) #endif -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) - STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) - STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) -#endif /* CONFIG_CBE_RAS */ /* * Relocation-on interrupts: A subset of the interrupts can be delivered @@ -877,6 +864,12 @@ h_doorbell_relon_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_relon_hv + . = 0x4ea0 +h_virt_irq_relon_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b h_virt_irq_relon_hv + . = 0x4f00 performance_monitor_relon_pseries_trampoline: SET_SCRATCH0(r13) @@ -1131,12 +1124,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl vsx_unavailable_exception b ret_from_except - STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) - STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) - /* Equivalents to the above handlers for relocation-on interrupt vectors */ STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) + MASKABLE_RELON_EXCEPTION_HV_OOL(0xea0, h_virt_irq) STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) @@ -1170,6 +1161,15 @@ fwnmi_data_area: . = 0x8000 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ + STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) + STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) + +#ifdef CONFIG_CBE_RAS + STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) + STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) + STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) +#endif /* CONFIG_CBE_RAS */ + .globl hmi_exception_early hmi_exception_early: EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60) @@ -1289,7 +1289,7 @@ machine_check_handle_early: GET_PACA(r13) ld r1,PACAR1(r13) li r3,PNV_THREAD_NAP - b power7_enter_nap_mode + b pnv_enter_arch207_idle_mode 4: #endif /* @@ -1399,11 +1399,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX) lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ mtlr r10 -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) andi. r10,r12,MSR_RI /* check for unrecoverable exception */ +BEGIN_MMU_FTR_SECTION beq- 2f +FTR_SECTION_ELSE + b 2f +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX) .machine push .machine "power4" diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 7af6c4de044b..cc52d9795f88 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -144,6 +144,21 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } +#ifdef CC_USING_MPROFILE_KERNEL + /* When using -mkernel_profile there is no load to jump over */ + pop = PPC_INST_NOP; + + if (probe_kernel_read(&op, (void *)(ip - 4), 4)) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ + if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) { + pr_err("Unexpected instruction %08x around bl _mcount\n", op); + return -EINVAL; + } +#else /* * Our original call site looks like: * @@ -170,24 +185,10 @@ __ftrace_make_nop(struct module *mod, } if (op != PPC_INST_LD_TOC) { - unsigned int inst; - - if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) { - pr_err("Fetching instruction at %lx failed.\n", ip - 4); - return -EFAULT; - } - - /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */ - if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) { - pr_err("Unexpected instructions around bl _mcount\n" - "when enabling dynamic ftrace!\t" - "(%08x,bl,%08x)\n", inst, op); - return -EINVAL; - } - - /* When using -mkernel_profile there is no load to jump over */ - pop = PPC_INST_NOP; + pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op); + return -EINVAL; } +#endif /* CC_USING_MPROFILE_KERNEL */ if (patch_instruction((unsigned int *)ip, pop)) { pr_err("Patching NOP failed.\n"); diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 064cd9397836..f765b0434731 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -941,7 +941,7 @@ start_here_multiplatform: mtspr SPRN_SRR1,r4 RFI b . /* prevent speculative execution */ - + /* This is where all platforms converge execution */ start_here_common: @@ -951,9 +951,6 @@ start_here_common: /* Load the TOC (virtual address) */ ld r2,PACATOC(r13) - /* Do more system initializations in virtual mode */ - bl setup_system - /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_book3s.S index 470ceebd2d23..335eb6cedae5 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -1,5 +1,6 @@ /* - * This file contains the power_save function for Power7 CPUs. + * This file contains idle entry/exit functions for POWER7, + * POWER8 and POWER9 CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -20,6 +21,7 @@ #include <asm/opal.h> #include <asm/cpuidle.h> #include <asm/book3s/64/mmu-hash.h> +#include <asm/mmu.h> #undef DEBUG @@ -36,6 +38,11 @@ #define _AMOR GPR9 #define _WORT GPR10 #define _WORC GPR11 +#define _PTCR GPR12 + +#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ + PSSCR_MTL_MASK /* Idle state entry routines */ @@ -52,6 +59,45 @@ .text /* + * Used by threads before entering deep idle states. Saves SPRs + * in interrupt stack frame + */ +save_sprs_to_stack: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ +BEGIN_FTR_SECTION + mfspr r3,SPRN_PTCR + std r3,_PTCR(r1) + /* + * Note - SDR1 is dropped in Power ISA v3. Hence not restoring + * SDR1 here + */ +FTR_SECTION_ELSE + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + + blr + +/* * Used by threads when the lock bit of core_idle_state is set. * Threads will spin in HMT_LOW until the lock bit is cleared. * r14 - pointer to core_idle_state @@ -69,13 +115,16 @@ core_idle_lock_held: /* * Pass requested state in r3: - * r3 - PNV_THREAD_NAP/SLEEP/WINKLE + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 + * - Requested STOP state in POWER9 * * To check IRQ_HAPPENED in r4 * 0 - don't check * 1 - check + * + * Address to 'rfid' to in r5 */ -_GLOBAL(power7_powersave_common) +_GLOBAL(pnv_powersave_common) /* Use r3 to pass state nap/sleep/winkle */ /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and @@ -126,28 +175,28 @@ _GLOBAL(power7_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + /* * Go to real mode to do the nap, as required by the architecture. * Also, we need to be in real mode before setting hwthread_state, * because as soon as we do that, another thread can switch * the MMU context to the guest. */ - LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + LOAD_REG_IMMEDIATE(r7, MSR_IDLE) li r6, MSR_RI andc r6, r9, r6 - LOAD_REG_ADDR(r7, power7_enter_nap_mode) mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ - mtspr SPRN_SRR0, r7 - mtspr SPRN_SRR1, r5 + mtspr SPRN_SRR0, r5 + mtspr SPRN_SRR1, r7 rfid - .globl power7_enter_nap_mode -power7_enter_nap_mode: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're napping */ - li r4,KVM_HWTHREAD_IN_NAP - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif + .globl pnv_enter_arch207_idle_mode +pnv_enter_arch207_idle_mode: stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f @@ -196,8 +245,7 @@ fastsleep_workaround_at_entry: /* Fast sleep workaround */ li r3,1 li r4,1 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode + bl opal_rm_config_cpu_idle_state /* Clear Lock bit */ li r0,0 @@ -206,30 +254,45 @@ fastsleep_workaround_at_entry: b common_enter enter_winkle: - /* - * Note all register i.e per-core, per-subcore or per-thread is saved - * here since any thread in the core might wake up first - */ - mfspr r3,SPRN_SDR1 - std r3,_SDR1(r1) - mfspr r3,SPRN_RPR - std r3,_RPR(r1) - mfspr r3,SPRN_SPURR - std r3,_SPURR(r1) - mfspr r3,SPRN_PURR - std r3,_PURR(r1) - mfspr r3,SPRN_TSCR - std r3,_TSCR(r1) - mfspr r3,SPRN_DSCR - std r3,_DSCR(r1) - mfspr r3,SPRN_AMOR - std r3,_AMOR(r1) - mfspr r3,SPRN_WORT - std r3,_WORT(r1) - mfspr r3,SPRN_WORC - std r3,_WORC(r1) + bl save_sprs_to_stack + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) +/* + * r3 - requested stop state + */ +power_enter_stop: +/* + * Check if the requested state is a deep idle state. + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + cmpd r3,r4 + bge 2f + IDLE_STATE_ENTER_SEQ(PPC_STOP) +2: +/* + * Entering deep idle state. + * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to + * stack and enter stop + */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + +lwarx_loop_stop: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ + + stwcx. r15,0,r14 + bne- lwarx_loop_stop + isync + + bl save_sprs_to_stack + + IDLE_STATE_ENTER_SEQ(PPC_STOP) + _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ LOAD_REG_ADDRBASE(r3,powersave_nap) @@ -242,19 +305,22 @@ _GLOBAL(power7_idle) _GLOBAL(power7_nap) mr r4,r3 li r3,PNV_THREAD_NAP - b power7_powersave_common + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + b pnv_powersave_common /* No return */ _GLOBAL(power7_sleep) li r3,PNV_THREAD_SLEEP li r4,1 - b power7_powersave_common + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + b pnv_powersave_common /* No return */ _GLOBAL(power7_winkle) - li r3,3 + li r3,PNV_THREAD_WINKLE li r4,1 - b power7_powersave_common + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + b pnv_powersave_common /* No return */ #define CHECK_HMI_INTERRUPT \ @@ -270,25 +336,104 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r2,PACATOC(r13); \ ld r1,PACAR1(r13); \ std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ - bl opal_call_realmode; \ + bl opal_rm_handle_hmi; \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; -_GLOBAL(power7_wakeup_tb_loss) +/* + * r3 - requested stop state + */ +_GLOBAL(power9_idle_stop) + LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) + or r4,r4,r3 + mtspr SPRN_PSSCR, r4 + li r4, 1 + LOAD_REG_ADDR(r5,power_enter_stop) + b pnv_powersave_common + /* No return */ +/* + * Called from reset vector. Check whether we have woken up with + * hypervisor state loss. If yes, restore hypervisor state and return + * back to reset vector. + * + * r13 - Contents of HSPRG0 + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +_GLOBAL(pnv_restore_hyp_resource) ld r2,PACATOC(r13); +BEGIN_FTR_SECTION + /* + * POWER ISA 3. Use PSSCR to determine if we + * are waking up from deep idle state + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + + mfspr r5,SPRN_PSSCR + /* + * 0-3 bits correspond to Power-Saving Level Status + * which indicates the idle state we are waking up from + */ + rldicl r5,r5,4,60 + cmpd cr4,r5,r4 + bge cr4,pnv_wakeup_tb_loss + /* + * Waking up without hypervisor state loss. Return to + * reset vector + */ + blr + +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + /* + * POWER ISA 2.07 or less. + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. + */ + clrldi r5,r13,63 + clrrdi r13,r13,1 + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ + + /* + * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking + * up from nap. At this stage CR3 shouldn't contains 'gt' since that + * indicates we are waking with hypervisor state loss from nap. + */ + bgt cr3,. + + blr /* Return back to System Reset vector from where + pnv_restore_hyp_resource was invoked */ + +/* + * Called if waking up from idle state which can cause either partial or + * complete hyp state loss. + * In POWER8, called if waking up from fastsleep or winkle + * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state + * + * r13 - PACA + * cr3 - gt if waking up with partial/complete hypervisor state loss + * cr4 - eq if waking up from complete hypervisor state loss. + */ +_GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) /* * Before entering any idle state, the NVGPRs are saved in the stack * and they are restored before switching to the process context. Hence * until they are restored, they are free to be used. * - * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode - * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the - * wakeup reason if we branch to kvm_start_guest. + * Save SRR1 and LR in NVGPRs as they might be clobbered in + * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required + * to determine the wakeup reason if we branch to kvm_start_guest. LR + * is required to return back to reset vector after hypervisor state + * restore is complete. */ - + mflr r17 mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -310,35 +455,35 @@ lwarx_loop2: bnel core_idle_lock_held cmpwi cr2,r15,0 - lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 - cmpwi cr1,r4,0 /* Check if first in subcore */ /* * At this stage - * cr1 - 0b0100 if first thread to wakeup in subcore - * cr2 - 0b0100 if first thread to wakeup in core - * cr3- 0b0010 if waking up from sleep or winkle - * cr4 - 0b0100 if waking up from winkle + * cr2 - eq if first thread to wakeup in core + * cr3- gt if waking up with partial/complete hypervisor state loss + * cr4 - eq if waking up from complete hypervisor state loss. */ - or r15,r15,r7 /* Set thread bit */ - - beq cr1,first_thread_in_subcore - - /* Not first thread in subcore to wake up */ - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - b common_exit - -first_thread_in_subcore: - /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT stwcx. r15,0,r14 bne- lwarx_loop2 isync +BEGIN_FTR_SECTION + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi r4,0 /* Check if first in subcore */ + + or r15,r15,r7 /* Set thread bit */ + beq first_thread_in_subcore +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + + or r15,r15,r7 /* Set thread bit */ + beq cr2,first_thread_in_core + + /* Not first thread in core or subcore to wake up */ + b clear_lock + +first_thread_in_subcore: /* * If waking up from sleep, subcore state is not lost. Hence * skip subcore state restore @@ -348,6 +493,7 @@ first_thread_in_subcore: /* Restore per-subcore state */ ld r4,_SDR1(r1) mtspr SPRN_SDR1,r4 + ld r4,_RPR(r1) mtspr SPRN_RPR,r4 ld r4,_AMOR(r1) @@ -363,32 +509,44 @@ subcore_state_restored: first_thread_in_core: /* - * First thread in the core waking up from fastsleep. It needs to + * First thread in the core waking up from any state which can cause + * partial or complete hypervisor state loss. It needs to * call the fastsleep workaround code if the platform requires it. * Call it unconditionally here. The below branch instruction will - * be patched out when the idle states are discovered if platform - * does not require workaround. + * be patched out if the platform does not have fastsleep or does not + * require the workaround. Patching will be performed during the + * discovery of idle-states. */ .global pnv_fastsleep_workaround_at_exit pnv_fastsleep_workaround_at_exit: b fastsleep_workaround_at_exit timebase_resync: - /* Do timebase resync if we are waking up from sleep. Use cr3 value - * set in exceptions-64s.S */ + /* + * Use cr3 which indicates that we are waking up with atleast partial + * hypervisor state loss to determine if TIMEBASE RESYNC is needed. + */ ble cr3,clear_lock /* Time base re-sync */ - li r0,OPAL_RESYNC_TIMEBASE - bl opal_call_realmode; - /* TODO: Check r3 for failure */ - + bl opal_rm_resync_timebase; /* * If waking up from sleep, per core state is not lost, skip to * clear_lock. */ bne cr4,clear_lock - /* Restore per core state */ + /* + * First thread in the core to wake up and its waking up with + * complete hypervisor state loss. Restore per core hypervisor + * state. + */ +BEGIN_FTR_SECTION + ld r4,_PTCR(r1) + mtspr SPRN_PTCR,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + ld r4,_TSCR(r1) mtspr SPRN_TSCR,r4 ld r4,_WORC(r1) @@ -410,9 +568,9 @@ common_exit: /* Waking up from winkle */ - /* Restore per thread state */ - bl __restore_cpu_power8 - +BEGIN_MMU_FTR_SECTION + b no_segments +END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) /* Restore SLB from PACA */ ld r8,PACA_SLBSHADOWPTR(r13) @@ -426,6 +584,9 @@ common_exit: slbmte r6,r5 1: addi r8,r8,16 .endr +no_segments: + + /* Restore per thread state */ ld r4,_SPURR(r1) mtspr SPRN_SPURR,r4 @@ -436,48 +597,34 @@ common_exit: ld r4,_WORT(r1) mtspr SPRN_WORT,r4 -hypervisor_state_restored: + /* Call cur_cpu_spec->cpu_restore() */ + LOAD_REG_ADDR(r4, cur_cpu_spec) + ld r4,0(r4) + ld r12,CPU_SPEC_RESTORE(r4) +#ifdef PPC64_ELF_ABI_v1 + ld r12,0(r12) +#endif + mtctr r12 + bctrl - li r5,PNV_THREAD_RUNNING - stb r5,PACA_THREAD_IDLE_STATE(r13) +hypervisor_state_restored: mtspr SPRN_SRR1,r16 -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 6f - b kvm_start_guest -6: -#endif - - REST_NVGPRS(r1) - REST_GPR(2, r1) - ld r3,_CCR(r1) - ld r4,_MSR(r1) - ld r5,_NIP(r1) - addi r1,r1,INT_FRAME_SIZE - mtcr r3 - mfspr r3,SPRN_SRR1 /* Return SRR1 */ - mtspr SPRN_SRR1,r4 - mtspr SPRN_SRR0,r5 - rfid + mtlr r17 + blr /* Return back to System Reset vector from where + pnv_restore_hyp_resource was invoked */ fastsleep_workaround_at_exit: li r3,1 li r4,0 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode + bl opal_rm_config_cpu_idle_state b timebase_resync /* * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(power7_wakeup_loss) +_GLOBAL(pnv_wakeup_loss) ld r1,PACAR1(r13) BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -497,10 +644,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(power7_wakeup_noloss) +_GLOBAL(pnv_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 - bne power7_wakeup_loss + bne pnv_wakeup_loss BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 3cb46a3b1de7..ac910d9982df 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -250,7 +250,7 @@ notrace void arch_local_irq_restore(unsigned long en) if (WARN_ON(mfmsr() & MSR_EE)) __hard_irq_disable(); } -#endif /* CONFIG_TRACE_IRQFLAG */ +#endif /* CONFIG_TRACE_IRQFLAGS */ set_soft_enabled(0); @@ -342,6 +342,21 @@ bool prep_irq_for_idle(void) return true; } +/* + * Force a replay of the external interrupt handler on this CPU. + */ +void force_external_irq_replay(void) +{ + /* + * This must only be called with interrupts soft-disabled, + * the replay will happen when re-enabling. + */ + WARN_ON(!arch_irqs_disabled()); + + /* Indicate in the PACA that we have an interrupt to replay */ + local_paca->irq_happened |= PACA_IRQ_EE; +} + #endif /* CONFIG_PPC64 */ int arch_show_interrupts(struct seq_file *p, int prec) diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 50bf55135ef8..4c780a342282 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -55,7 +55,7 @@ int default_machine_kexec_prepare(struct kimage *image) const unsigned long *basep; const unsigned int *sizep; - if (!ppc_md.hpte_clear_all) + if (!mmu_hash_ops.hpte_clear_all) return -ENOENT; /* @@ -380,7 +380,12 @@ void default_machine_kexec(struct kimage *image) */ kexec_sequence(&kexec_stack, image->start, image, page_address(image->control_code_page), - ppc_md.hpte_clear_all); +#ifdef CONFIG_PPC_STD_MMU + mmu_hash_ops.hpte_clear_all +#else + NULL +#endif + ); /* NOTREACHED */ } diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 7a8519052b14..cb195157b318 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -667,7 +667,7 @@ _GLOBAL(kexec_sequence) mr r12,r27 #endif mtctr r12 - bctrl /* ppc_md.hpte_clear_all(void); */ + bctrl /* mmu_hash_ops.hpte_clear_all(void); */ #endif /* !CONFIG_PPC_BOOK3E */ /* diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f703f343358e..183368e008cf 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -494,9 +494,10 @@ static bool is_early_mcount_callsite(u32 *instruction) restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { + if (is_early_mcount_callsite(instruction - 1)) + return 1; + if (*instruction != PPC_INST_NOP) { - if (is_early_mcount_callsite(instruction - 1)) - return 1; pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index c6ac4f01dd56..f93942b4b6a6 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1418,8 +1418,10 @@ void __init pcibios_resource_survey(void) /* Allocate and assign resources */ list_for_each_entry(b, &pci_root_buses, node) pcibios_allocate_bus_resources(b); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); + if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + } /* Before we start assigning unassigned resource, we try to reserve * the low IO area and the VGA memory area if they intersect the diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index f71b79a8992b..ed5e9ff61a68 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -47,7 +47,6 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); - pci_io_base = ISA_IO_BASE; /* For now, override phys_mem_access_prot. If we need it,g * later, we may move that initialization to each ppc_md */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ddceeb96e8fb..a8cca88e972f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1527,6 +1527,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.regs = regs - 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Clear any transactional state, we're exec()ing. The cause is + * not important as there will never be a recheckpoint so it's not + * user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); +#endif + memset(regs->gpr, 0, sizeof(regs->gpr)); regs->ctr = 0; regs->link = 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 946e34ffeae9..bae3db791150 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -56,6 +56,8 @@ #include <asm/opal.h> #include <asm/fadump.h> #include <asm/debug.h> +#include <asm/epapr_hcalls.h> +#include <asm/firmware.h> #include <mm/mmu_decl.h> @@ -645,6 +647,14 @@ static void __init early_reserve_mem(void) #endif } +static bool disable_radix; +static int __init parse_disable_radix(char *p) +{ + disable_radix = true; + return 0; +} +early_param("disable_radix", parse_disable_radix); + void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -734,11 +744,26 @@ void __init early_init_devtree(void *params) */ spinning_secondaries = boot_cpu_count - 1; #endif + /* + * now fixup radix MMU mode based on kernel command line + */ + if (disable_radix) + cur_cpu_spec->mmu_features &= ~MMU_FTR_RADIX; #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); #endif + epapr_paravirt_early_init(); + + /* Now try to figure out if we are running on LPAR and so on */ + pseries_probe_fw_features(); + +#ifdef CONFIG_PPC_PS3 + /* Identify PS3 firmware */ + if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) + powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; +#endif DBG(" <- early_init_devtree()\n"); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8da209fdf480..6a3e5de544ce 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -685,7 +685,7 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) return rc; } -void rtas_restart(char *cmd) +void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); @@ -704,7 +704,7 @@ void rtas_power_off(void) for (;;); } -void rtas_halt(void) +void __noreturn rtas_halt(void) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_HALT); @@ -1174,7 +1174,7 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) { rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index e864b7c5884e..a26a02006576 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -526,10 +526,8 @@ void rtas_cancel_event_scan(void) } EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); -static int __init rtas_init(void) +static int __init rtas_event_scan_init(void) { - struct proc_dir_entry *entry; - if (!machine_is(pseries) && !machine_is(chrp)) return 0; @@ -562,13 +560,27 @@ static int __init rtas_init(void) return -ENOMEM; } + start_event_scan(); + + return 0; +} +arch_initcall(rtas_event_scan_init); + +static int __init rtas_init(void) +{ + struct proc_dir_entry *entry; + + if (!machine_is(pseries) && !machine_is(chrp)) + return 0; + + if (!rtas_log_buf) + return -ENODEV; + entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL, &proc_rtas_log_operations); if (!entry) printk(KERN_ERR "Failed to create error_log proc entry\n"); - start_event_scan(); - return 0; } __initcall(rtas_init); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 8ca79b7503d8..714b4ba7ab86 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -35,6 +35,7 @@ #include <linux/percpu.h> #include <linux/memblock.h> #include <linux/of_platform.h> +#include <linux/hugetlb.h> #include <asm/io.h> #include <asm/paca.h> #include <asm/prom.h> @@ -61,6 +62,12 @@ #include <asm/cputhreads.h> #include <mm/mmu_decl.h> #include <asm/fadump.h> +#include <asm/udbg.h> +#include <asm/hugetlb.h> +#include <asm/livepatch.h> +#include <asm/mmu_context.h> + +#include "setup.h" #ifdef DEBUG #include <asm/udbg.h> @@ -494,7 +501,7 @@ void __init smp_setup_cpu_maps(void) * On pSeries LPAR, we need to know how many cpus * could possibly be added to this partition. */ - if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) && + if (firmware_has_feature(FW_FEATURE_LPAR) && (dn = of_find_node_by_path("/rtas"))) { int num_addr_cell, num_size_cell, maxcpus; const __be32 *ireg; @@ -575,6 +582,7 @@ void probe_machine(void) { extern struct machdep_calls __machine_desc_start; extern struct machdep_calls __machine_desc_end; + unsigned int i; /* * Iterate all ppc_md structures until we find the proper @@ -582,6 +590,17 @@ void probe_machine(void) */ DBG("Probing machine type ...\n"); + /* + * Check ppc_md is empty, if not we have a bug, ie, we setup an + * entry before probe_machine() which will be overwritten + */ + for (i = 0; i < (sizeof(ppc_md) / sizeof(void *)); i++) { + if (((void **)&ppc_md)[i]) { + printk(KERN_ERR "Entry %d in ppc_md non empty before" + " machine probe !\n", i); + } + } + for (machine_id = &__machine_desc_start; machine_id < &__machine_desc_end; machine_id++) { @@ -676,6 +695,8 @@ static struct notifier_block ppc_panic_block = { void __init setup_panic(void) { + if (!ppc_md.panic) + return; atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); } @@ -744,3 +765,169 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) pdev->dev.dma_mask = &pdev->archdata.dma_mask; set_dma_ops(&pdev->dev, &dma_direct_ops); } + +static __init void print_system_info(void) +{ + pr_info("-----------------------------------------------------\n"); +#ifdef CONFIG_PPC_STD_MMU_64 + pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); +#endif +#ifdef CONFIG_PPC_STD_MMU_32 + pr_info("Hash_size = 0x%lx\n", Hash_size); +#endif + pr_info("phys_mem_size = 0x%llx\n", + (unsigned long long)memblock_phys_mem_size()); + + pr_info("dcache_bsize = 0x%x\n", dcache_bsize); + pr_info("icache_bsize = 0x%x\n", icache_bsize); + if (ucache_bsize != 0) + pr_info("ucache_bsize = 0x%x\n", ucache_bsize); + + pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + pr_info(" possible = 0x%016lx\n", + (unsigned long)CPU_FTRS_POSSIBLE); + pr_info(" always = 0x%016lx\n", + (unsigned long)CPU_FTRS_ALWAYS); + pr_info("cpu_user_features = 0x%08x 0x%08x\n", + cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); +#ifdef CONFIG_PPC64 + pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); +#endif + +#ifdef CONFIG_PPC_STD_MMU_64 + if (htab_address) + pr_info("htab_address = 0x%p\n", htab_address); + if (htab_hash_mask) + pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif +#ifdef CONFIG_PPC_STD_MMU_32 + if (Hash) + pr_info("Hash = 0x%p\n", Hash); + if (Hash_mask) + pr_info("Hash_mask = 0x%lx\n", Hash_mask); +#endif + + if (PHYSICAL_START > 0) + pr_info("physical_start = 0x%llx\n", + (unsigned long long)PHYSICAL_START); + pr_info("-----------------------------------------------------\n"); +} + +/* + * Called into from start_kernel this initializes memblock, which is used + * to manage page allocation until mem_init is called. + */ +void __init setup_arch(char **cmdline_p) +{ + *cmdline_p = boot_command_line; + + /* Set a half-reasonable default so udelay does something sensible */ + loops_per_jiffy = 500000000 / HZ; + + /* Unflatten the device-tree passed by prom_init or kexec */ + unflatten_device_tree(); + + /* + * Initialize cache line/block info from device-tree (on ppc64) or + * just cputable (on ppc32). + */ + initialize_cache_info(); + + /* Initialize RTAS if available. */ + rtas_initialize(); + + /* Check if we have an initrd provided via the device-tree. */ + check_for_initrd(); + + /* Probe the machine type, establish ppc_md. */ + probe_machine(); + + /* Setup panic notifier if requested by the platform. */ + setup_panic(); + + /* + * Configure ppc_md.power_save (ppc32 only, 64-bit machines do + * it from their respective probe() function. + */ + setup_power_save(); + + /* Discover standard serial ports. */ + find_legacy_serial_ports(); + + /* Register early console with the printk subsystem. */ + register_early_udbg_console(); + + /* Setup the various CPU maps based on the device-tree. */ + smp_setup_cpu_maps(); + + /* Initialize xmon. */ + xmon_setup(); + + /* Check the SMT related command line arguments (ppc64). */ + check_smt_enabled(); + + /* On BookE, setup per-core TLB data structures. */ + setup_tlb_core_data(); + + /* + * Release secondary cpus out of their spinloops at 0x60 now that + * we can map physical -> logical CPU ids. + * + * Freescale Book3e parts spin in a loop provided by firmware, + * so smp_release_cpus() does nothing for them. + */ +#ifdef CONFIG_SMP + smp_release_cpus(); +#endif + + /* Print various info about the machine that has been gathered so far. */ + print_system_info(); + + /* Reserve large chunks of memory for use by CMA for KVM. */ + kvm_cma_reserve(); + + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + + klp_init_thread_info(&init_thread_info); + + init_mm.start_code = (unsigned long)_stext; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = klimit; +#ifdef CONFIG_PPC_64K_PAGES + init_mm.context.pte_frag = NULL; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&init_mm.context); +#endif + irqstack_early_init(); + exc_lvl_early_init(); + emergency_stack_init(); + + initmem_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + if (ppc_md.setup_arch) + ppc_md.setup_arch(); + + paging_init(); + + /* Initialize the MMU context management stuff. */ + mmu_context_init(); + +#ifdef CONFIG_PPC64 + /* Interrupt code needs to be 64K-aligned. */ + if ((unsigned long)_stext & 0xffff) + panic("Kernelbase not 64K-aligned (0x%lx)!\n", + (unsigned long)_stext); +#endif +} diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h new file mode 100644 index 000000000000..cfba134b3024 --- /dev/null +++ b/arch/powerpc/kernel/setup.h @@ -0,0 +1,58 @@ +/* + * Prototypes for functions that are shared between setup_(32|64|common).c + * + * Copyright 2016 Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ARCH_POWERPC_KERNEL_SETUP_H +#define __ARCH_POWERPC_KERNEL_SETUP_H + +void initialize_cache_info(void); +void irqstack_early_init(void); + +#ifdef CONFIG_PPC32 +void setup_power_save(void); +#else +static inline void setup_power_save(void) { }; +#endif + +#if defined(CONFIG_PPC64) && defined(CONFIG_SMP) +void check_smt_enabled(void); +#else +static inline void check_smt_enabled(void) { }; +#endif + +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +void setup_tlb_core_data(void); +#else +static inline void setup_tlb_core_data(void) { }; +#endif + +#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) +void exc_lvl_early_init(void); +#else +static inline void exc_lvl_early_init(void) { }; +#endif + +#ifdef CONFIG_PPC64 +void emergency_stack_init(void); +#else +static inline void emergency_stack_init(void) { }; +#endif + +/* + * Having this in kvm_ppc.h makes include dependencies too + * tricky to solve for setup-common.c so have it here. + */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void kvm_cma_reserve(void); +#else +static inline void kvm_cma_reserve(void) { }; +#endif + +#endif /* __ARCH_POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index d544fa311757..00f57754407e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -36,8 +36,6 @@ #include <asm/time.h> #include <asm/serial.h> #include <asm/udbg.h> -#include <asm/mmu_context.h> -#include <asm/epapr_hcalls.h> #include <asm/code-patching.h> #define DBG(fmt...) @@ -62,9 +60,7 @@ int icache_bsize; int ucache_bsize; /* - * We're called here very early in the boot. We determine the machine - * type and call the appropriate low-level setup functions. - * -- Cort <cort@fsmlabs.com> + * We're called here very early in the boot. * * Note that the kernel may be running at an address which is different * from the address that it was linked at, so we must use RELOC/PTRRELOC @@ -73,7 +69,6 @@ int ucache_bsize; notrace unsigned long __init early_init(unsigned long dt_ptr) { unsigned long offset = reloc_offset(); - struct cpu_spec *spec; /* First zero the BSS -- use memset_io, some platforms don't have * caches on yet */ @@ -84,27 +79,19 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * Identify the CPU type and fix up code sections * that depend on which cpu we have. */ - spec = identify_cpu(offset, mfspr(SPRN_PVR)); + identify_cpu(offset, mfspr(SPRN_PVR)); - do_feature_fixups(spec->cpu_features, - PTRRELOC(&__start___ftr_fixup), - PTRRELOC(&__stop___ftr_fixup)); - - do_feature_fixups(spec->mmu_features, - PTRRELOC(&__start___mmu_ftr_fixup), - PTRRELOC(&__stop___mmu_ftr_fixup)); - - do_lwsync_fixups(spec->cpu_features, - PTRRELOC(&__start___lwsync_fixup), - PTRRELOC(&__stop___lwsync_fixup)); - - do_final_fixups(); + apply_feature_fixups(); return KERNELBASE + offset; } /* + * This is run before start_kernel(), the kernel has been relocated + * and we are running with enough of the MMU enabled to have our + * proper kernel virtual addresses + * * Find out what kind of machine we're on and save any data we need * from the early boot process (devtree is copied on pmac by prom_init()). * This is called very early on the boot process, after a minimal @@ -123,27 +110,9 @@ notrace void __init machine_init(u64 dt_ptr) /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); - epapr_paravirt_early_init(); - early_init_mmu(); - probe_machine(); - setup_kdump_trampoline(); - -#ifdef CONFIG_6xx - if (cpu_has_feature(CPU_FTR_CAN_DOZE) || - cpu_has_feature(CPU_FTR_CAN_NAP)) - ppc_md.power_save = ppc6xx_idle; -#endif - -#ifdef CONFIG_E500 - if (cpu_has_feature(CPU_FTR_CAN_DOZE) || - cpu_has_feature(CPU_FTR_CAN_NAP)) - ppc_md.power_save = e500_idle; -#endif - if (ppc_md.progress) - ppc_md.progress("id mach(): done", 0x200); } /* Checks "l2cr=xxxx" command-line option */ @@ -221,7 +190,7 @@ int __init ppc_init(void) arch_initcall(ppc_init); -static void __init irqstack_early_init(void) +void __init irqstack_early_init(void) { unsigned int i; @@ -236,7 +205,7 @@ static void __init irqstack_early_init(void) } #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) -static void __init exc_lvl_early_init(void) +void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; @@ -259,33 +228,25 @@ static void __init exc_lvl_early_init(void) #endif } } -#else -#define exc_lvl_early_init() #endif -/* Warning, IO base is not yet inited */ -void __init setup_arch(char **cmdline_p) +void __init setup_power_save(void) { - *cmdline_p = boot_command_line; - - /* so udelay does something sensible, assume <= 1000 bogomips */ - loops_per_jiffy = 500000000 / HZ; - - unflatten_device_tree(); - check_for_initrd(); - - if (ppc_md.init_early) - ppc_md.init_early(); - - find_legacy_serial_ports(); - - smp_setup_cpu_maps(); - - /* Register early console */ - register_early_udbg_console(); +#ifdef CONFIG_6xx + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = ppc6xx_idle; +#endif - xmon_setup(); +#ifdef CONFIG_E500 + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = e500_idle; +#endif +} +__init void initialize_cache_info(void) +{ /* * Set cache line size based on type of cpu as a default. * Systems with OF can look in the properties on the cpu node(s) @@ -296,32 +257,4 @@ void __init setup_arch(char **cmdline_p) ucache_bsize = 0; if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) ucache_bsize = icache_bsize = dcache_bsize; - - if (ppc_md.panic) - setup_panic(); - - init_mm.start_code = (unsigned long)_stext; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; - - exc_lvl_early_init(); - - irqstack_early_init(); - - initmem_init(); - if ( ppc_md.progress ) ppc_md.progress("setup_arch: initmem", 0x3eab); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - if (ppc_md.setup_arch) - ppc_md.setup_arch(); - if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); - - paging_init(); - - /* Initialize the MMU context management stuff */ - mmu_context_init(); } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5530bb55a78b..d8216aed22b7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -35,7 +35,6 @@ #include <linux/pci.h> #include <linux/lockdep.h> #include <linux/memblock.h> -#include <linux/hugetlb.h> #include <linux/memory.h> #include <linux/nmi.h> @@ -64,12 +63,10 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/mmu_context.h> #include <asm/code-patching.h> -#include <asm/kvm_ppc.h> -#include <asm/hugetlb.h> -#include <asm/epapr_hcalls.h> #include <asm/livepatch.h> +#include <asm/opal.h> +#include <asm/cputhreads.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -100,7 +97,7 @@ int icache_bsize; int ucache_bsize; #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) -static void setup_tlb_core_data(void) +void __init setup_tlb_core_data(void) { int cpu; @@ -133,10 +130,6 @@ static void setup_tlb_core_data(void) } } } -#else -static void setup_tlb_core_data(void) -{ -} #endif #ifdef CONFIG_SMP @@ -144,7 +137,7 @@ static void setup_tlb_core_data(void) static char *smt_enabled_cmdline; /* Look for ibm,smt-enabled OF option */ -static void check_smt_enabled(void) +void __init check_smt_enabled(void) { struct device_node *dn; const char *smt_option; @@ -193,12 +186,10 @@ static int __init early_smt_enabled(char *p) } early_param("smt-enabled", early_smt_enabled); -#else -#define check_smt_enabled() #endif /* CONFIG_SMP */ /** Fix up paca fields required for the boot cpu */ -static void fixup_boot_paca(void) +static void __init fixup_boot_paca(void) { /* The boot cpu is started */ get_paca()->cpu_start = 1; @@ -206,23 +197,50 @@ static void fixup_boot_paca(void) get_paca()->data_offset = 0; } -static void cpu_ready_for_interrupts(void) +static void __init configure_exceptions(void) { - /* Set IR and DR in PACA MSR */ - get_paca()->kernel_msr = MSR_KERNEL; - /* - * Enable AIL if supported, and we are in hypervisor mode. If we are - * not in hypervisor mode, we enable relocation-on interrupts later - * in pSeries_setup_arch() using the H_SET_MODE hcall. + * Setup the trampolines from the lowmem exception vectors + * to the kdump kernel when not using a relocatable kernel. */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { - unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + setup_kdump_trampoline(); + + /* Under a PAPR hypervisor, we need hypercalls */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + /* Enable AIL if possible */ + pseries_enable_reloc_on_exc(); + + /* + * Tell the hypervisor that we want our exceptions to + * be taken in little endian mode. + * + * We don't call this for big endian as our calling convention + * makes us always enter in BE, and the call may fail under + * some circumstances with kdump. + */ +#ifdef __LITTLE_ENDIAN__ + pseries_little_endian_exceptions(); +#endif + } else { + /* Set endian mode using OPAL */ + if (firmware_has_feature(FW_FEATURE_OPAL)) + opal_configure_cores(); + + /* Enable AIL if supported, and we are in hypervisor mode */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } } } +static void cpu_ready_for_interrupts(void) +{ + /* Set IR and DR in PACA MSR */ + get_paca()->kernel_msr = MSR_KERNEL; +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -270,22 +288,22 @@ void __init early_setup(unsigned long dt_ptr) */ early_init_devtree(__va(dt_ptr)); - epapr_paravirt_early_init(); - /* Now we know the logical id of our boot cpu, setup the paca. */ setup_paca(&paca[boot_cpuid]); fixup_boot_paca(); - /* Probe the machine type */ - probe_machine(); - - setup_kdump_trampoline(); - - DBG("Found, Initializing memory management...\n"); + /* + * Configure exception handlers. This include setting up trampolines + * if needed, setting exception endian mode, etc... + */ + configure_exceptions(); /* Initialize the hash table or TLB handling */ early_init_mmu(); + /* Apply all the dynamic patching */ + apply_feature_fixups(); + /* * At this point, we can let interrupts switch to virtual mode * (the MMU has been setup), so adjust the MSR in the PACA to @@ -293,16 +311,6 @@ void __init early_setup(unsigned long dt_ptr) */ cpu_ready_for_interrupts(); - /* Reserve large chunks of memory for use by CMA for KVM */ - kvm_cma_reserve(); - - /* - * Reserve any gigantic pages requested on the command line. - * memblock needs to have been initialized by the time this is - * called since this will reserve memory. - */ - reserve_hugetlb_gpages(); - DBG(" <- early_setup()\n"); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX @@ -391,7 +399,7 @@ void smp_release_cpus(void) * cache informations about the CPU that will be used by cache flush * routines and/or provided to userland */ -static void __init initialize_cache_info(void) +void __init initialize_cache_info(void) { struct device_node *np; unsigned long num_cpus = 0; @@ -456,127 +464,11 @@ static void __init initialize_cache_info(void) } } - DBG(" <- initialize_cache_info()\n"); -} - - -/* - * Do some initial setup of the system. The parameters are those which - * were passed in from the bootloader. - */ -void __init setup_system(void) -{ - DBG(" -> setup_system()\n"); - - /* Apply the CPUs-specific and firmware specific fixups to kernel - * text (nop out sections not relevant to this CPU or this firmware) - */ - do_feature_fixups(cur_cpu_spec->cpu_features, - &__start___ftr_fixup, &__stop___ftr_fixup); - do_feature_fixups(cur_cpu_spec->mmu_features, - &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup); - do_feature_fixups(powerpc_firmware_features, - &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); - do_lwsync_fixups(cur_cpu_spec->cpu_features, - &__start___lwsync_fixup, &__stop___lwsync_fixup); - do_final_fixups(); - - /* - * Unflatten the device-tree passed by prom_init or kexec - */ - unflatten_device_tree(); - - /* - * Fill the ppc64_caches & systemcfg structures with informations - * retrieved from the device-tree. - */ - initialize_cache_info(); - -#ifdef CONFIG_PPC_RTAS - /* - * Initialize RTAS if available - */ - rtas_initialize(); -#endif /* CONFIG_PPC_RTAS */ - - /* - * Check if we have an initrd provided via the device-tree - */ - check_for_initrd(); - - /* - * Do some platform specific early initializations, that includes - * setting up the hash table pointers. It also sets up some interrupt-mapping - * related options that will be used by finish_device_tree() - */ - if (ppc_md.init_early) - ppc_md.init_early(); - - /* - * We can discover serial ports now since the above did setup the - * hash table management for us, thus ioremap works. We do that early - * so that further code can be debugged - */ - find_legacy_serial_ports(); - - /* - * Register early console - */ - register_early_udbg_console(); - - /* - * Initialize xmon - */ - xmon_setup(); - - smp_setup_cpu_maps(); - check_smt_enabled(); - setup_tlb_core_data(); - - /* - * Freescale Book3e parts spin in a loop provided by firmware, - * so smp_release_cpus() does nothing for them - */ -#if defined(CONFIG_SMP) - /* Release secondary cpus out of their spinloops at 0x60 now that - * we can map physical -> logical CPU ids - */ - smp_release_cpus(); -#endif - - pr_info("Starting Linux %s %s\n", init_utsname()->machine, - init_utsname()->version); - - pr_info("-----------------------------------------------------\n"); - pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); - - if (ppc64_caches.dline_size != 0x80) - pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); - if (ppc64_caches.iline_size != 0x80) - pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size); - - pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); - pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); - pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); - pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, - cur_cpu_spec->cpu_user_features2); - pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); - pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); - -#ifdef CONFIG_PPC_STD_MMU_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - - pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif - - if (PHYSICAL_START > 0) - pr_info("physical_start = 0x%llx\n", - (unsigned long long)PHYSICAL_START); - pr_info("-----------------------------------------------------\n"); + /* For use by binfmt_elf */ + dcache_bsize = ppc64_caches.dline_size; + icache_bsize = ppc64_caches.iline_size; - DBG(" <- setup_system()\n"); + DBG(" <- initialize_cache_info()\n"); } /* This returns the limit below which memory accesses to the linear @@ -584,7 +476,7 @@ void __init setup_system(void) * used to allocate interrupt or emergency stacks for which our * exception entry path doesn't deal with being interrupted. */ -static u64 safe_stack_limit(void) +static __init u64 safe_stack_limit(void) { #ifdef CONFIG_PPC_BOOK3E /* Freescale BookE bolts the entire linear mapping */ @@ -600,7 +492,7 @@ static u64 safe_stack_limit(void) #endif } -static void __init irqstack_early_init(void) +void __init irqstack_early_init(void) { u64 limit = safe_stack_limit(); unsigned int i; @@ -620,7 +512,7 @@ static void __init irqstack_early_init(void) } #ifdef CONFIG_PPC_BOOK3E -static void __init exc_lvl_early_init(void) +void __init exc_lvl_early_init(void) { unsigned int i; unsigned long sp; @@ -642,8 +534,6 @@ static void __init exc_lvl_early_init(void) if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) patch_exception(0x040, exc_debug_debug_book3e); } -#else -#define exc_lvl_early_init() #endif /* @@ -651,7 +541,7 @@ static void __init exc_lvl_early_init(void) * early in SMP boots before relocation is enabled. Exclusive emergency * stack for machine checks. */ -static void __init emergency_stack_init(void) +void __init emergency_stack_init(void) { u64 limit; unsigned int i; @@ -682,61 +572,6 @@ static void __init emergency_stack_init(void) } } -/* - * Called into from start_kernel this initializes memblock, which is used - * to manage page allocation until mem_init is called. - */ -void __init setup_arch(char **cmdline_p) -{ - *cmdline_p = boot_command_line; - - /* - * Set cache line size based on type of cpu as a default. - * Systems with OF can look in the properties on the cpu node(s) - * for a possibly more accurate value. - */ - dcache_bsize = ppc64_caches.dline_size; - icache_bsize = ppc64_caches.iline_size; - - if (ppc_md.panic) - setup_panic(); - - klp_init_thread_info(&init_thread_info); - - init_mm.start_code = (unsigned long)_stext; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; -#ifdef CONFIG_PPC_64K_PAGES - init_mm.context.pte_frag = NULL; -#endif -#ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&init_mm.context); -#endif - irqstack_early_init(); - exc_lvl_early_init(); - emergency_stack_init(); - - initmem_init(); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - if (ppc_md.setup_arch) - ppc_md.setup_arch(); - - paging_init(); - - /* Initialize the MMU context management stuff */ - mmu_context_init(); - - /* Interrupt code needs to be 64K-aligned */ - if ((unsigned long)_stext & 0xffff) - panic("Kernelbase not 64K-aligned (0x%lx)!\n", - (unsigned long)_stext); -} - #ifdef CONFIG_SMP #define PCPU_DYN_SIZE () diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index bf8f34a58670..298afcf3bf2a 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim) std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) - /* We need to setup MSR for VSX register save instructions. Here we - * also clear the MSR RI since when we do the treclaim, we won't have a - * valid kernel pointer for a while. We clear RI here as it avoids - * adding another mtmsr closer to the treclaim. This makes the region - * maked as non-recoverable wider than it needs to be but it saves on - * inserting another mtmsrd later. - */ + /* We need to setup MSR for VSX register save instructions. */ mfmsr r14 mr r15, r14 ori r15, r15, MSR_FP - li r16, MSR_RI + li r16, 0 ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h @@ -176,7 +170,17 @@ dont_backup_fp: 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 - /* The moment we treclaim, ALL of our GPRs will switch + /* Clear MSR RI since we are about to change r1, EE is already off. */ + li r4, 0 + mtmsrd r4, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + * + * The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. */ @@ -197,6 +201,11 @@ dont_backup_fp: /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + + /* Reset MSR RI so we can take SLB faults again */ + li r11, MSR_RI + mtmsrd r11, 1 + mfspr r11, SPRN_PPR HMT_MEDIUM @@ -329,8 +338,6 @@ _GLOBAL(__tm_recheckpoint) */ subi r7, r7, STACK_FRAME_OVERHEAD - SET_SCRATCH0(r1) - mfmsr r6 /* R4 = original MSR to indicate whether thread used FP/Vector etc. */ @@ -397,11 +404,6 @@ restore_gprs: ld r5, THREAD_TM_DSCR(r3) ld r6, THREAD_TM_PPR(r3) - /* Clear the MSR RI since we are about to change R1. EE is already off - */ - li r4, 0 - mtmsrd r4, 1 - REST_GPR(0, r7) /* GPR0 */ REST_2GPRS(2, r7) /* GPR2-3 */ REST_GPR(4, r7) /* GPR4 */ @@ -439,10 +441,34 @@ restore_gprs: ld r6, _CCR(r7) mtcr r6 - REST_GPR(1, r7) /* GPR1 */ - REST_GPR(5, r7) /* GPR5-7 */ REST_GPR(6, r7) - ld r7, GPR7(r7) + + /* + * Store r1 and r5 on the stack so that we can access them + * after we clear MSR RI. + */ + + REST_GPR(5, r7) + std r5, -8(r1) + ld r5, GPR1(r7) + std r5, -16(r1) + + REST_GPR(7, r7) + + /* Clear MSR RI since we are about to change r1. EE is already off */ + li r5, 0 + mtmsrd r5, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + */ + + SET_SCRATCH0(r1) + ld r5, -8(r1) + ld r1, -16(r1) /* Commit register state as checkpointed state: */ TRECHKPT diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 1c2e7a343bf5..616a6d854638 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -70,10 +70,11 @@ _GLOBAL(load_up_altivec) MTMSRD(r5) /* enable use of AltiVec now */ isync - /* Hack: if we get an altivec unavailable trap with VRSAVE - * set to all zeros, we assume this is a broken application - * that fails to set it properly, and thus we switch it to - * all 1's + /* + * While userspace in general ignores VRSAVE, glibc uses it as a boolean + * to optimise userspace context save/restore. Whenever we take an + * altivec unavailable exception we must set VRSAVE to something non + * zero. Set it to all 1s. See also the programming note in the ISA. */ mfspr r4,SPRN_VRSAVE cmpwi 0,r4,0 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 2dd91f79de05..b5fba689fca6 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -165,7 +165,7 @@ SECTIONS . = ALIGN(8); .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { -#ifdef CONFIG_RELOCATABLE_PPC32 +#ifdef CONFIG_PPC32 __dynamic_symtab = .; #endif *(.dynsym) |