diff options
Diffstat (limited to 'arch/powerpc/kernel')
74 files changed, 3857 insertions, 3629 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1925341dbb9c..811f441a125f 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -15,7 +15,7 @@ CFLAGS_btext.o += -fPIC endif CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) -CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) @@ -58,8 +58,6 @@ obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o -obj-$(CONFIG_IBMVIO) += vio.o -obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o @@ -98,6 +96,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -107,8 +106,13 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ machine_kexec_$(BITS).o +obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o +ifeq ($(CONFIG_HAVE_IMA_KEXEC)$(CONFIG_IMA),yy) +obj-y += ima_kexec.o +endif + obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o @@ -128,7 +132,7 @@ obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) +ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),) obj-y += ppc_save_regs.o endif diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 033f3385fa49..ec7a8b099dd9 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -20,7 +20,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <asm/processor.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/cache.h> #include <asm/cputable.h> #include <asm/emulated_ops.h> @@ -204,7 +204,7 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) int i, size; #ifdef __powerpc64__ - size = ppc64_caches.dline_size; + size = ppc64_caches.l1d.block_size; #else size = L1_CACHE_BYTES; #endif @@ -807,14 +807,25 @@ int fix_alignment(struct pt_regs *regs) nb = aligninfo[instr].len; flags = aligninfo[instr].flags; - /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */ - if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) { - nb = 8; - flags = LD+SW; - } else if (IS_XFORM(instruction) && - ((instruction >> 1) & 0x3ff) == 660) { - nb = 8; - flags = ST+SW; + /* + * Handle some cases which give overlaps in the DSISR values. + */ + if (IS_XFORM(instruction)) { + switch (get_xop(instruction)) { + case 532: /* ldbrx */ + nb = 8; + flags = LD+SW; + break; + case 660: /* stdbrx */ + nb = 8; + flags = ST+SW; + break; + case 20: /* lwarx */ + case 84: /* ldarx */ + case 116: /* lharx */ + case 276: /* lqarx */ + return 0; /* not emulated ever */ + } } /* Byteswap little endian loads and stores */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index caec7bf3b99a..4367e7df51a1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -72,205 +72,190 @@ #include <asm/fixmap.h> #endif +#define STACK_PT_REGS_OFFSET(sym, val) \ + DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val)) + int main(void) { - DEFINE(THREAD, offsetof(struct task_struct, thread)); - DEFINE(MM, offsetof(struct task_struct, mm)); - DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); + OFFSET(THREAD, task_struct, thread); + OFFSET(MM, task_struct, mm); + OFFSET(MMCONTEXTID, mm_struct, context.id); #ifdef CONFIG_PPC64 DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); - DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); + OFFSET(TASKTHREADPPR, task_struct, thread.ppr); #else - DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); + OFFSET(THREAD_INFO, task_struct, stack); DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); - DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); + OFFSET(KSP_LIMIT, thread_struct, ksp_limit); #endif /* CONFIG_PPC64 */ #ifdef CONFIG_LIVEPATCH - DEFINE(TI_livepatch_sp, offsetof(struct thread_info, livepatch_sp)); + OFFSET(TI_livepatch_sp, thread_info, livepatch_sp); #endif - DEFINE(KSP, offsetof(struct thread_struct, ksp)); - DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); + OFFSET(KSP, thread_struct, ksp); + OFFSET(PT_REGS, thread_struct, regs); #ifdef CONFIG_BOOKE - DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); + OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]); #endif - DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode)); - DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state)); - DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area)); - DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr)); - DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp)); + OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode); + OFFSET(THREAD_FPSTATE, thread_struct, fp_state); + OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area); + OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr); + OFFSET(THREAD_LOAD_FP, thread_struct, load_fp); #ifdef CONFIG_ALTIVEC - DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state)); - DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area)); - DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); - DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); - DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr)); - DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec)); + OFFSET(THREAD_VRSTATE, thread_struct, vr_state); + OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area); + OFFSET(THREAD_VRSAVE, thread_struct, vrsave); + OFFSET(THREAD_USED_VR, thread_struct, used_vr); + OFFSET(VRSTATE_VSCR, thread_vr_state, vscr); + OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX - DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr)); + OFFSET(THREAD_USED_VSR, thread_struct, used_vsr); #endif /* CONFIG_VSX */ #ifdef CONFIG_PPC64 - DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); + OFFSET(KSP_VSID, thread_struct, ksp_vsid); #else /* CONFIG_PPC64 */ - DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); + OFFSET(PGDIR, thread_struct, pgdir); #ifdef CONFIG_SPE - DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); - DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); - DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr)); - DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); + OFFSET(THREAD_EVR0, thread_struct, evr[0]); + OFFSET(THREAD_ACC, thread_struct, acc); + OFFSET(THREAD_SPEFSCR, thread_struct, spefscr); + OFFSET(THREAD_USED_SPE, thread_struct, used_spe); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0)); + OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0); #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER - DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); + OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu); #endif #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) - DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); + OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); - DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); - DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr)); - DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar)); - DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar)); - DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr)); - DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr)); - DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); - DEFINE(THREAD_CKVRSTATE, offsetof(struct thread_struct, - ckvr_state)); - DEFINE(THREAD_CKVRSAVE, offsetof(struct thread_struct, - ckvrsave)); - DEFINE(THREAD_CKFPSTATE, offsetof(struct thread_struct, - ckfp_state)); + OFFSET(PACATMSCRATCH, paca_struct, tm_scratch); + OFFSET(THREAD_TM_TFHAR, thread_struct, tm_tfhar); + OFFSET(THREAD_TM_TEXASR, thread_struct, tm_texasr); + OFFSET(THREAD_TM_TFIAR, thread_struct, tm_tfiar); + OFFSET(THREAD_TM_TAR, thread_struct, tm_tar); + OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr); + OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr); + OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs); + OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state); + OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave); + OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state); /* Local pt_regs on stack for Transactional Memory funcs. */ DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); - DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_TASK, offsetof(struct thread_info, task)); - DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + OFFSET(TI_FLAGS, thread_info, flags); + OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); + OFFSET(TI_PREEMPT, thread_info, preempt_count); + OFFSET(TI_TASK, thread_info, task); + OFFSET(TI_CPU, thread_info, cpu); #ifdef CONFIG_PPC64 - DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); - DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); - DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); - DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); - DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); - DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); + OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size); + OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size); + OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page); + OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size); + OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size); + OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page); /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); - DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); - DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); - DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); - DEFINE(PACACURRENT, offsetof(struct paca_struct, __current)); - DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr)); - DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr)); - DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1)); - DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); - DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); - DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); - DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); - DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened)); + OFFSET(PACAPACAINDEX, paca_struct, paca_index); + OFFSET(PACAPROCSTART, paca_struct, cpu_start); + OFFSET(PACAKSAVE, paca_struct, kstack); + OFFSET(PACACURRENT, paca_struct, __current); + OFFSET(PACASAVEDMSR, paca_struct, saved_msr); + OFFSET(PACASTABRR, paca_struct, stab_rr); + OFFSET(PACAR1, paca_struct, saved_r1); + OFFSET(PACATOC, paca_struct, kernel_toc); + OFFSET(PACAKBASE, paca_struct, kernelbase); + OFFSET(PACAKMSR, paca_struct, kernel_msr); + OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled); + OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); #ifdef CONFIG_PPC_BOOK3S - DEFINE(PACACONTEXTID, offsetof(struct paca_struct, mm_ctx_id)); + OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); #ifdef CONFIG_PPC_MM_SLICES - DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, - mm_ctx_low_slices_psize)); - DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct, - mm_ctx_high_slices_psize)); + OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); + OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); #endif /* CONFIG_PPC_MM_SLICES */ #endif #ifdef CONFIG_PPC_BOOK3E - DEFINE(PACAPGD, offsetof(struct paca_struct, pgd)); - DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd)); - DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); - DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb)); - DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); - DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit)); - DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg)); - DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack)); - DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack)); - DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack)); - DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr)); - - DEFINE(TCD_ESEL_NEXT, - offsetof(struct tlb_core_data, esel_next)); - DEFINE(TCD_ESEL_MAX, - offsetof(struct tlb_core_data, esel_max)); - DEFINE(TCD_ESEL_FIRST, - offsetof(struct tlb_core_data, esel_first)); + OFFSET(PACAPGD, paca_struct, pgd); + OFFSET(PACA_KERNELPGD, paca_struct, kernel_pgd); + OFFSET(PACA_EXGEN, paca_struct, exgen); + OFFSET(PACA_EXTLB, paca_struct, extlb); + OFFSET(PACA_EXMC, paca_struct, exmc); + OFFSET(PACA_EXCRIT, paca_struct, excrit); + OFFSET(PACA_EXDBG, paca_struct, exdbg); + OFFSET(PACA_MC_STACK, paca_struct, mc_kstack); + OFFSET(PACA_CRIT_STACK, paca_struct, crit_kstack); + OFFSET(PACA_DBG_STACK, paca_struct, dbg_kstack); + OFFSET(PACA_TCD_PTR, paca_struct, tcd_ptr); + + OFFSET(TCD_ESEL_NEXT, tlb_core_data, esel_next); + OFFSET(TCD_ESEL_MAX, tlb_core_data, esel_max); + OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first); #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_STD_MMU_64 - DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); - DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); - DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp)); + OFFSET(PACASLBCACHE, paca_struct, slb_cache); + OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); + OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); #ifdef CONFIG_PPC_MM_SLICES - DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp)); + OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp); #else - DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, mm_ctx_sllp)); + OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp); #endif /* CONFIG_PPC_MM_SLICES */ - DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); - DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); - DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); - DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr)); - DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr)); - DEFINE(SLBSHADOW_STACKVSID, - offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid)); - DEFINE(SLBSHADOW_STACKESID, - offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid)); - DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area)); - DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use)); - DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); - DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count)); - DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); + OFFSET(PACA_EXGEN, paca_struct, exgen); + OFFSET(PACA_EXMC, paca_struct, exmc); + OFFSET(PACA_EXSLB, paca_struct, exslb); + OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); + OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); + OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); + OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); + OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area); + OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use); + OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); + OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); + OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); #endif /* CONFIG_PPC_STD_MMU_64 */ - DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); + OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 - DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp)); - DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce)); -#endif - DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); - DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); - DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default)); - DEFINE(ACCOUNT_STARTTIME, - offsetof(struct paca_struct, accounting.starttime)); - DEFINE(ACCOUNT_STARTTIME_USER, - offsetof(struct paca_struct, accounting.starttime_user)); - DEFINE(ACCOUNT_USER_TIME, - offsetof(struct paca_struct, accounting.user_time)); - DEFINE(ACCOUNT_SYSTEM_TIME, - offsetof(struct paca_struct, accounting.system_time)); - DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); - DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); - DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso)); + OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp); + OFFSET(PACA_IN_MCE, paca_struct, in_mce); +#endif + OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); + OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); + OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default); + OFFSET(ACCOUNT_STARTTIME, paca_struct, accounting.starttime); + OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user); + OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); + OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); + OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); + OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost); + OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); #else /* CONFIG_PPC64 */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - DEFINE(ACCOUNT_STARTTIME, - offsetof(struct thread_info, accounting.starttime)); - DEFINE(ACCOUNT_STARTTIME_USER, - offsetof(struct thread_info, accounting.starttime_user)); - DEFINE(ACCOUNT_USER_TIME, - offsetof(struct thread_info, accounting.user_time)); - DEFINE(ACCOUNT_SYSTEM_TIME, - offsetof(struct thread_info, accounting.system_time)); + OFFSET(ACCOUNT_STARTTIME, thread_info, accounting.starttime); + OFFSET(ACCOUNT_STARTTIME_USER, thread_info, accounting.starttime_user); + OFFSET(ACCOUNT_USER_TIME, thread_info, accounting.utime); + OFFSET(ACCOUNT_SYSTEM_TIME, thread_info, accounting.stime); #endif #endif /* CONFIG_PPC64 */ /* RTAS */ - DEFINE(RTASBASE, offsetof(struct rtas_t, base)); - DEFINE(RTASENTRY, offsetof(struct rtas_t, entry)); + OFFSET(RTASBASE, rtas_t, base); + OFFSET(RTASENTRY, rtas_t, entry); /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); @@ -280,38 +265,38 @@ int main(void) DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); #endif /* CONFIG_PPC64 */ - DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); - DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); - DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2])); - DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3])); - DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4])); - DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5])); - DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6])); - DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7])); - DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8])); - DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9])); - DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10])); - DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11])); - DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12])); - DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); + STACK_PT_REGS_OFFSET(GPR0, gpr[0]); + STACK_PT_REGS_OFFSET(GPR1, gpr[1]); + STACK_PT_REGS_OFFSET(GPR2, gpr[2]); + STACK_PT_REGS_OFFSET(GPR3, gpr[3]); + STACK_PT_REGS_OFFSET(GPR4, gpr[4]); + STACK_PT_REGS_OFFSET(GPR5, gpr[5]); + STACK_PT_REGS_OFFSET(GPR6, gpr[6]); + STACK_PT_REGS_OFFSET(GPR7, gpr[7]); + STACK_PT_REGS_OFFSET(GPR8, gpr[8]); + STACK_PT_REGS_OFFSET(GPR9, gpr[9]); + STACK_PT_REGS_OFFSET(GPR10, gpr[10]); + STACK_PT_REGS_OFFSET(GPR11, gpr[11]); + STACK_PT_REGS_OFFSET(GPR12, gpr[12]); + STACK_PT_REGS_OFFSET(GPR13, gpr[13]); #ifndef CONFIG_PPC64 - DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14])); + STACK_PT_REGS_OFFSET(GPR14, gpr[14]); #endif /* CONFIG_PPC64 */ /* * Note: these symbols include _ because they overlap with special * register names */ - DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip)); - DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr)); - DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr)); - DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link)); - DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr)); - DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer)); - DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); - DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); - DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3)); - DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); - DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); + STACK_PT_REGS_OFFSET(_NIP, nip); + STACK_PT_REGS_OFFSET(_MSR, msr); + STACK_PT_REGS_OFFSET(_CTR, ctr); + STACK_PT_REGS_OFFSET(_LINK, link); + STACK_PT_REGS_OFFSET(_CCR, ccr); + STACK_PT_REGS_OFFSET(_XER, xer); + STACK_PT_REGS_OFFSET(_DAR, dar); + STACK_PT_REGS_OFFSET(_DSISR, dsisr); + STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3); + STACK_PT_REGS_OFFSET(RESULT, result); + STACK_PT_REGS_OFFSET(_TRAP, trap); #ifndef CONFIG_PPC64 /* * The PowerPC 400-class & Book-E processors have neither the DAR @@ -319,10 +304,10 @@ int main(void) * DEAR and ESR SPRs for such processors. For critical interrupts * we use them to hold SRR0 and SRR1. */ - DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); - DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); + STACK_PT_REGS_OFFSET(_DEAR, dar); + STACK_PT_REGS_OFFSET(_ESR, dsisr); #else /* CONFIG_PPC64 */ - DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe)); + STACK_PT_REGS_OFFSET(SOFTE, softe); /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */ DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)); @@ -351,17 +336,17 @@ int main(void) #endif #ifndef CONFIG_PPC64 - DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); + OFFSET(MM_PGD, mm_struct, pgd); #endif /* ! CONFIG_PPC64 */ /* About the CPU features table */ - DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); - DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); - DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore)); + OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features); + OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup); + OFFSET(CPU_SPEC_RESTORE, cpu_spec, cpu_restore); - DEFINE(pbe_address, offsetof(struct pbe, address)); - DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); - DEFINE(pbe_next, offsetof(struct pbe, next)); + OFFSET(pbe_address, pbe, address); + OFFSET(pbe_orig_address, pbe, orig_address); + OFFSET(pbe_next, pbe, next); #ifndef CONFIG_PPC64 DEFINE(TASK_SIZE, TASK_SIZE); @@ -369,40 +354,40 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp)); - DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec)); - DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs)); - DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count)); - DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); - DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32)); - DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec)); - DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); - DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime)); - DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction)); - DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size)); - DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size)); - DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size)); - DEFINE(CFG_DCACHE_LOGBLOCKSZ, offsetof(struct vdso_data, dcache_log_block_size)); + OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp); + OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec); + OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs); + OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count); + OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest); + OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime); + OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32); + OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec); + OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); + OFFSET(STAMP_XTIME, vdso_data, stamp_xtime); + OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); + OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); + OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); + OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); + OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size); #ifdef CONFIG_PPC64 - DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64)); - DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); - DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); - DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); - DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec)); - DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec)); - DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec)); - DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec)); + OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64); + OFFSET(TVAL64_TV_SEC, timeval, tv_sec); + OFFSET(TVAL64_TV_USEC, timeval, tv_usec); + OFFSET(TVAL32_TV_SEC, compat_timeval, tv_sec); + OFFSET(TVAL32_TV_USEC, compat_timeval, tv_usec); + OFFSET(TSPC64_TV_SEC, timespec, tv_sec); + OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec); + OFFSET(TSPC32_TV_SEC, compat_timespec, tv_sec); + OFFSET(TSPC32_TV_NSEC, compat_timespec, tv_nsec); #else - DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec)); - DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec)); - DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec)); + OFFSET(TVAL32_TV_SEC, timeval, tv_sec); + OFFSET(TVAL32_TV_USEC, timeval, tv_usec); + OFFSET(TSPC32_TV_SEC, timespec, tv_sec); + OFFSET(TSPC32_TV_NSEC, timespec, tv_nsec); #endif /* timeval/timezone offsets for use by vdso */ - DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); - DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest); + OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime); /* Other bits used by the vdso */ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); @@ -422,164 +407,170 @@ int main(void) DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM - DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); - DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); - DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid)); - DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); - DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); - DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr)); + OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack); + OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid); + OFFSET(VCPU_GUEST_PID, kvm_vcpu, arch.pid); + OFFSET(VCPU_GPRS, kvm_vcpu, arch.gpr); + OFFSET(VCPU_VRSAVE, kvm_vcpu, arch.vrsave); + OFFSET(VCPU_FPRS, kvm_vcpu, arch.fp.fpr); #ifdef CONFIG_ALTIVEC - DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr)); + OFFSET(VCPU_VRS, kvm_vcpu, arch.vr.vr); #endif - DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); - DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); - DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + OFFSET(VCPU_XER, kvm_vcpu, arch.xer); + OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr); + OFFSET(VCPU_LR, kvm_vcpu, arch.lr); #ifdef CONFIG_PPC_BOOK3S - DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar)); + OFFSET(VCPU_TAR, kvm_vcpu, arch.tar); #endif - DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); - DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + OFFSET(VCPU_CR, kvm_vcpu, arch.cr); + OFFSET(VCPU_PC, kvm_vcpu, arch.pc); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr)); - DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0)); - DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); - DEFINE(VCPU_SPRG0, offsetof(struct kvm_vcpu, arch.shregs.sprg0)); - DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1)); - DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); - DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); + OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr); + OFFSET(VCPU_SRR0, kvm_vcpu, arch.shregs.srr0); + OFFSET(VCPU_SRR1, kvm_vcpu, arch.shregs.srr1); + OFFSET(VCPU_SPRG0, kvm_vcpu, arch.shregs.sprg0); + OFFSET(VCPU_SPRG1, kvm_vcpu, arch.shregs.sprg1); + OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2); + OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3); #endif #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING - DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry)); - DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr)); - DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit)); - DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time)); - DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time)); - DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity)); - DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start)); - DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount)); - DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total)); - DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min)); - DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max)); -#endif - DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3)); - DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); - DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5)); - DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6)); - DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7)); - DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); - DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1)); - DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); - DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); - DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); + OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry); + OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr); + OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit); + OFFSET(VCPU_TB_GUEST, kvm_vcpu, arch.guest_time); + OFFSET(VCPU_TB_CEDE, kvm_vcpu, arch.cede_time); + OFFSET(VCPU_CUR_ACTIVITY, kvm_vcpu, arch.cur_activity); + OFFSET(VCPU_ACTIVITY_START, kvm_vcpu, arch.cur_tb_start); + OFFSET(TAS_SEQCOUNT, kvmhv_tb_accumulator, seqcount); + OFFSET(TAS_TOTAL, kvmhv_tb_accumulator, tb_total); + OFFSET(TAS_MIN, kvmhv_tb_accumulator, tb_min); + OFFSET(TAS_MAX, kvmhv_tb_accumulator, tb_max); +#endif + OFFSET(VCPU_SHARED_SPRG3, kvm_vcpu_arch_shared, sprg3); + OFFSET(VCPU_SHARED_SPRG4, kvm_vcpu_arch_shared, sprg4); + OFFSET(VCPU_SHARED_SPRG5, kvm_vcpu_arch_shared, sprg5); + OFFSET(VCPU_SHARED_SPRG6, kvm_vcpu_arch_shared, sprg6); + OFFSET(VCPU_SHARED_SPRG7, kvm_vcpu_arch_shared, sprg7); + OFFSET(VCPU_SHADOW_PID, kvm_vcpu, arch.shadow_pid); + OFFSET(VCPU_SHADOW_PID1, kvm_vcpu, arch.shadow_pid1); + OFFSET(VCPU_SHARED, kvm_vcpu, arch.shared); + OFFSET(VCPU_SHARED_MSR, kvm_vcpu_arch_shared, msr); + OFFSET(VCPU_SHADOW_MSR, kvm_vcpu, arch.shadow_msr); #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE) - DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian)); + OFFSET(VCPU_SHAREDBE, kvm_vcpu, arch.shared_big_endian); #endif - DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0)); - DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1)); - DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2)); - DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3)); - DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4)); - DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6)); + OFFSET(VCPU_SHARED_MAS0, kvm_vcpu_arch_shared, mas0); + OFFSET(VCPU_SHARED_MAS1, kvm_vcpu_arch_shared, mas1); + OFFSET(VCPU_SHARED_MAS2, kvm_vcpu_arch_shared, mas2); + OFFSET(VCPU_SHARED_MAS7_3, kvm_vcpu_arch_shared, mas7_3); + OFFSET(VCPU_SHARED_MAS4, kvm_vcpu_arch_shared, mas4); + OFFSET(VCPU_SHARED_MAS6, kvm_vcpu_arch_shared, mas6); - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); - DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); + OFFSET(VCPU_KVM, kvm_vcpu, kvm); + OFFSET(KVM_LPID, kvm, arch.lpid); /* book3s */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); - DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); - DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); - DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); - DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); - DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); - DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); - DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); - DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); - DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); - DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); - DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst)); - DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu)); - DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu)); + OFFSET(KVM_TLB_SETS, kvm, arch.tlb_sets); + OFFSET(KVM_SDR1, kvm, arch.sdr1); + OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid); + OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr); + OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1); + OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits); + OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); + OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); + OFFSET(KVM_RADIX, kvm, arch.radix); + OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); + OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); + OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); + OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty); + OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst); + OFFSET(VCPU_CPU, kvm_vcpu, cpu); + OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu); #endif #ifdef CONFIG_PPC_BOOK3S - DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); - DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); - DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); - DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); - DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); - DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); - DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr)); - DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); - DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); - DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx)); - DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr)); - DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx)); - DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr)); - DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); - DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); - DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); - DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); - DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); - DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); - DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); - DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); - DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc)); - DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar)); - DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar)); - DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier)); - DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); - DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); - DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); - DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); - DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); - DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); - DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); - DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); - DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); - DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); - DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); - DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); - DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); - DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); - DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr)); - DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr)); - DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr)); - DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); - DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); - DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); - DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); - DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); - DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); - DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm)); - DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); - DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); - DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); - DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes)); - DEFINE(VCORE_VTB, offsetof(struct kvmppc_vcore, vtb)); - DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); - DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); + OFFSET(VCPU_PURR, kvm_vcpu, arch.purr); + OFFSET(VCPU_SPURR, kvm_vcpu, arch.spurr); + OFFSET(VCPU_IC, kvm_vcpu, arch.ic); + OFFSET(VCPU_DSCR, kvm_vcpu, arch.dscr); + OFFSET(VCPU_AMR, kvm_vcpu, arch.amr); + OFFSET(VCPU_UAMOR, kvm_vcpu, arch.uamor); + OFFSET(VCPU_IAMR, kvm_vcpu, arch.iamr); + OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl); + OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr); + OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx); + OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr); + OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx); + OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); + OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); + OFFSET(VCPU_DEC, kvm_vcpu, arch.dec); + OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires); + OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); + OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); + OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); + OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); + OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); + OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); + OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar); + OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar); + OFFSET(VCPU_SIER, kvm_vcpu, arch.sier); + OFFSET(VCPU_SLB, kvm_vcpu, arch.slb); + OFFSET(VCPU_SLB_MAX, kvm_vcpu, arch.slb_max); + OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr); + OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr); + OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar); + OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa); + OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr); + OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst); + OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap); + OFFSET(VCPU_CFAR, kvm_vcpu, arch.cfar); + OFFSET(VCPU_PPR, kvm_vcpu, arch.ppr); + OFFSET(VCPU_FSCR, kvm_vcpu, arch.fscr); + OFFSET(VCPU_PSPB, kvm_vcpu, arch.pspb); + OFFSET(VCPU_EBBHR, kvm_vcpu, arch.ebbhr); + OFFSET(VCPU_EBBRR, kvm_vcpu, arch.ebbrr); + OFFSET(VCPU_BESCR, kvm_vcpu, arch.bescr); + OFFSET(VCPU_CSIGR, kvm_vcpu, arch.csigr); + OFFSET(VCPU_TACR, kvm_vcpu, arch.tacr); + OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr); + OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop); + OFFSET(VCPU_WORT, kvm_vcpu, arch.wort); + OFFSET(VCPU_TID, kvm_vcpu, arch.tid); + OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr); + OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map); + OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest); + OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); + OFFSET(VCORE_KVM, kvmppc_vcore, kvm); + OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset); + OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr); + OFFSET(VCORE_PCR, kvmppc_vcore, pcr); + OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes); + OFFSET(VCORE_VTB, kvmppc_vcore, vtb); + OFFSET(VCPU_SLB_E, kvmppc_slb, orige); + OFFSET(VCPU_SLB_V, kvmppc_slb, origv); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar)); - DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar)); - DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr)); - DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm)); - DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr)); - DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); - DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); - DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); - DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); - DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); - DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); - DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm)); - DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm)); - DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm)); + OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar); + OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar); + OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr); + OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm); + OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr); + OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr); + OFFSET(VCPU_VRSAVE_TM, kvm_vcpu, arch.vrsave_tm); + OFFSET(VCPU_CR_TM, kvm_vcpu, arch.cr_tm); + OFFSET(VCPU_XER_TM, kvm_vcpu, arch.xer_tm); + OFFSET(VCPU_LR_TM, kvm_vcpu, arch.lr_tm); + OFFSET(VCPU_CTR_TM, kvm_vcpu, arch.ctr_tm); + OFFSET(VCPU_AMR_TM, kvm_vcpu, arch.amr_tm); + OFFSET(VCPU_PPR_TM, kvm_vcpu, arch.ppr_tm); + OFFSET(VCPU_DSCR_TM, kvm_vcpu, arch.dscr_tm); + OFFSET(VCPU_TAR_TM, kvm_vcpu, arch.tar_tm); #endif #ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE - DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); + OFFSET(PACA_SVCPU, paca_struct, shadow_vcpu); # define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) #else # define SVCPU_FIELD(x, f) @@ -662,11 +653,11 @@ int main(void) HSTATE_FIELD(HSTATE_DECEXP, dec_expires); HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode); DEFINE(IPI_PRIORITY, IPI_PRIORITY); - DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr)); - DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar)); - DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar)); - DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap)); - DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped)); + OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr); + OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar); + OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); + OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap); + OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_PPC_BOOK3S_64 @@ -676,32 +667,27 @@ int main(void) #endif /* CONFIG_PPC_BOOK3S_64 */ #else /* CONFIG_PPC_BOOK3S */ - DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); - DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); - DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); - DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); - DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); - DEFINE(VCPU_SPRG9, offsetof(struct kvm_vcpu, arch.sprg9)); - DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); - DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); - DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); - DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save)); + OFFSET(VCPU_CR, kvm_vcpu, arch.cr); + OFFSET(VCPU_XER, kvm_vcpu, arch.xer); + OFFSET(VCPU_LR, kvm_vcpu, arch.lr); + OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr); + OFFSET(VCPU_PC, kvm_vcpu, arch.pc); + OFFSET(VCPU_SPRG9, kvm_vcpu, arch.sprg9); + OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst); + OFFSET(VCPU_FAULT_DEAR, kvm_vcpu, arch.fault_dear); + OFFSET(VCPU_FAULT_ESR, kvm_vcpu, arch.fault_esr); + OFFSET(VCPU_CRIT_SAVE, kvm_vcpu, arch.crit_save); #endif /* CONFIG_PPC_BOOK3S */ #endif /* CONFIG_KVM */ #ifdef CONFIG_KVM_GUEST - DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared, - scratch1)); - DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared, - scratch2)); - DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared, - scratch3)); - DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared, - int_pending)); - DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); - DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared, - critical)); - DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr)); + OFFSET(KVM_MAGIC_SCRATCH1, kvm_vcpu_arch_shared, scratch1); + OFFSET(KVM_MAGIC_SCRATCH2, kvm_vcpu_arch_shared, scratch2); + OFFSET(KVM_MAGIC_SCRATCH3, kvm_vcpu_arch_shared, scratch3); + OFFSET(KVM_MAGIC_INT, kvm_vcpu_arch_shared, int_pending); + OFFSET(KVM_MAGIC_MSR, kvm_vcpu_arch_shared, msr); + OFFSET(KVM_MAGIC_CRITICAL, kvm_vcpu_arch_shared, critical); + OFFSET(KVM_MAGIC_SR, kvm_vcpu_arch_shared, sr); #endif #ifdef CONFIG_44x @@ -710,45 +696,37 @@ int main(void) #endif #ifdef CONFIG_PPC_FSL_BOOK3E DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); - DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0)); - DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1)); - DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2)); - DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3)); - DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7)); + OFFSET(TLBCAM_MAS0, tlbcam, MAS0); + OFFSET(TLBCAM_MAS1, tlbcam, MAS1); + OFFSET(TLBCAM_MAS2, tlbcam, MAS2); + OFFSET(TLBCAM_MAS3, tlbcam, MAS3); + OFFSET(TLBCAM_MAS7, tlbcam, MAS7); #endif #if defined(CONFIG_KVM) && defined(CONFIG_SPE) - DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0])); - DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc)); - DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr)); - DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr)); + OFFSET(VCPU_EVR, kvm_vcpu, arch.evr[0]); + OFFSET(VCPU_ACC, kvm_vcpu, arch.acc); + OFFSET(VCPU_SPEFSCR, kvm_vcpu, arch.spefscr); + OFFSET(VCPU_HOST_SPEFSCR, kvm_vcpu, arch.host_spefscr); #endif #ifdef CONFIG_KVM_BOOKE_HV - DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4)); - DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6)); + OFFSET(VCPU_HOST_MAS4, kvm_vcpu, arch.host_mas4); + OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6); #endif #ifdef CONFIG_KVM_EXIT_TIMING - DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, - arch.timing_exit.tv32.tbu)); - DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, - arch.timing_exit.tv32.tbl)); - DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, - arch.timing_last_enter.tv32.tbu)); - DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, - arch.timing_last_enter.tv32.tbl)); + OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu); + OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl); + OFFSET(VCPU_TIMING_LAST_ENTER_TBU, kvm_vcpu, arch.timing_last_enter.tv32.tbu); + OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl); #endif #ifdef CONFIG_PPC_POWERNV - DEFINE(PACA_CORE_IDLE_STATE_PTR, - offsetof(struct paca_struct, core_idle_state_ptr)); - DEFINE(PACA_THREAD_IDLE_STATE, - offsetof(struct paca_struct, thread_idle_state)); - DEFINE(PACA_THREAD_MASK, - offsetof(struct paca_struct, thread_mask)); - DEFINE(PACA_SUBCORE_SIBLING_MASK, - offsetof(struct paca_struct, subcore_sibling_mask)); + OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr); + OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state); + OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask); + OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 37c027ca83b2..7fe8c79e6937 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -96,10 +96,13 @@ _GLOBAL(__setup_cpu_power9) mtlr r11 beqlr li r0,0 + mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 + LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) + andc r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -116,10 +119,13 @@ _GLOBAL(__restore_cpu_power9) mtlr r11 beqlr li r0,0 + mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE) or r3, r3, r4 + LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) + andc r3, r3, r4 bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -174,7 +180,7 @@ __init_FSCR: __init_HFSCR: mfspr r3,SPRN_HFSCR ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ - HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB + HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP mtspr SPRN_HFSCR,r3 blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 6a82ef039c50..e79b9daa873c 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action); extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -386,6 +387,23 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* 3.00-compliant processor, i.e. Power9 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000005, + .cpu_name = "POWER9 (architected)", + .cpu_features = CPU_FTRS_POWER9, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .oprofile_type = PPC_OPROFILE_INVALID, + .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .flush_tlb = __flush_tlb_power9, + .platform = "power9", + }, { /* Power7 */ .pvr_mask = 0xffff0000, .pvr_value = 0x003f0000, @@ -523,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .flush_tlb = __flush_tlb_power9, + .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, { /* Power9 */ @@ -542,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .flush_tlb = __flush_tlb_power9, + .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, { /* Cell Broadband Engine */ diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index cfa0f81a5bb0..d10ad258d41a 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -18,7 +18,7 @@ #include <asm/kdump.h> #include <asm/prom.h> #include <asm/firmware.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/rtas.h> #ifdef DEBUG diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index c6689f658b50..d0ea7860e02b 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -46,7 +46,7 @@ static u64 swiotlb_powerpc_get_required(struct device *dev) * map_page, and unmap_page on highmem, use normal dma_ops * for everything else. */ -struct dma_map_ops swiotlb_dma_ops = { +const struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_direct_alloc_coherent, .free = __dma_direct_free_coherent, .mmap = dma_direct_mmap_coherent, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index e64a6016fba7..41c749586bd2 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -33,7 +33,7 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev) struct dev_archdata __maybe_unused *sd = &dev->archdata; #ifdef CONFIG_SWIOTLB - if (sd->max_direct_dma_addr && sd->dma_ops == &swiotlb_dma_ops) + if (sd->max_direct_dma_addr && dev->dma_ops == &swiotlb_dma_ops) pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT); #endif @@ -203,6 +203,10 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, for_each_sg(sgl, sg, nents, i) { sg->dma_address = sg_phys(sg) + get_dma_offset(dev); sg->dma_length = sg->length; + + if (attrs & DMA_ATTR_SKIP_CPU_SYNC) + continue; + __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } @@ -235,7 +239,10 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev, unsigned long attrs) { BUG_ON(dir == DMA_NONE); - __dma_sync_page(page, offset, size, dir); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + __dma_sync_page(page, offset, size, dir); + return page_to_phys(page) + offset + get_dma_offset(dev); } @@ -267,7 +274,7 @@ static inline void dma_direct_sync_single(struct device *dev, } #endif -struct dma_map_ops dma_direct_ops = { +const struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc_coherent, .free = dma_direct_free_coherent, .mmap = dma_direct_mmap_coherent, @@ -309,7 +316,7 @@ EXPORT_SYMBOL(dma_set_coherent_mask); int __dma_set_mask(struct device *dev, u64 dma_mask) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL)) return dma_ops->set_dma_mask(dev, dma_mask); @@ -337,7 +344,7 @@ EXPORT_SYMBOL(dma_set_mask); u64 __dma_get_required_mask(struct device *dev) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); if (unlikely(dma_ops == NULL)) return 0; diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f25731627d7f..9de7f79e702b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -298,9 +298,17 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) * * For pHyp, we have to enable IO for log retrieval. Otherwise, * 0xFF's is always returned from PCI config space. + * + * When the @severity is EEH_LOG_PERM, the PE is going to be + * removed. Prior to that, the drivers for devices included in + * the PE will be closed. The drivers rely on working IO path + * to bring the devices to quiet state. Otherwise, PCI traffic + * from those devices after they are removed is like to cause + * another unexpected EEH error. */ if (!(pe->type & EEH_PE_PHB)) { - if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) + if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || + severity == EEH_LOG_PERM) eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); /* @@ -372,7 +380,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) /* Find the PHB PE */ phb_pe = eeh_phb_pe_get(pe->phb); if (!phb_pe) { - pr_warn("%s Can't find PE for PHB#%d\n", + pr_warn("%s Can't find PE for PHB#%x\n", __func__, pe->phb->global_number); return -EEXIST; } @@ -664,7 +672,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) rc = eeh_ops->set_option(pe, function); if (rc) pr_warn("%s: Unexpected state change %d on " - "PHB#%d-PE#%x, err=%d\n", + "PHB#%x-PE#%x, err=%d\n", __func__, function, pe->phb->global_number, pe->addr, rc); @@ -808,76 +816,67 @@ static void *eeh_set_dev_freset(void *data, void *flag) } /** - * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second + * eeh_pe_reset_full - Complete a full reset process on the indicated PE * @pe: EEH PE * - * Assert the PCI #RST line for 1/4 second. + * This function executes a full reset procedure on a PE, including setting + * the appropriate flags, performing a fundamental or hot reset, and then + * deactivating the reset status. It is designed to be used within the EEH + * subsystem, as opposed to eeh_pe_reset which is exported to drivers and + * only performs a single operation at a time. + * + * This function will attempt to reset a PE three times before failing. */ -static void eeh_reset_pe_once(struct eeh_pe *pe) +int eeh_pe_reset_full(struct eeh_pe *pe) { + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + int type = EEH_RESET_HOT; unsigned int freset = 0; + int i, state, ret; - /* Determine type of EEH reset required for - * Partitionable Endpoint, a hot-reset (1) - * or a fundamental reset (3). - * A fundamental reset required by any device under - * Partitionable Endpoint trumps hot-reset. + /* + * Determine the type of reset to perform - hot or fundamental. + * Hot reset is the default operation, unless any device under the + * PE requires a fundamental reset. */ eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); if (freset) - eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); - else - eeh_ops->reset(pe, EEH_RESET_HOT); + type = EEH_RESET_FUNDAMENTAL; - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); -} + /* Mark the PE as in reset state and block config space accesses */ + eeh_pe_state_mark(pe, reset_state); -/** - * eeh_reset_pe - Reset the indicated PE - * @pe: EEH PE - * - * This routine should be called to reset indicated device, including - * PE. A PE might include multiple PCI devices and sometimes PCI bridges - * might be involved as well. - */ -int eeh_reset_pe(struct eeh_pe *pe) -{ - int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); - int i, state, ret; - - /* Mark as reset and block config space */ - eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); - - /* Take three shots at resetting the bus */ + /* Make three attempts at resetting the bus */ for (i = 0; i < 3; i++) { - eeh_reset_pe_once(pe); + ret = eeh_pe_reset(pe, type); + if (ret) + break; - /* - * EEH_PE_ISOLATED is expected to be removed after - * BAR restore. - */ + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + if (ret) + break; + + /* Wait until the PE is in a functioning state */ state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((state & flags) == flags) { - ret = 0; - goto out; - } + if ((state & active_flags) == active_flags) + break; if (state < 0) { - pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", + pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", __func__, pe->phb->global_number, pe->addr); ret = -ENOTRECOVERABLE; - goto out; + break; } - /* We might run out of credits */ + /* Set error in case this is our last attempt */ ret = -EIO; pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } -out: - eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, reset_state); return ret; } @@ -1601,6 +1600,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) return eeh_unfreeze_pe(pe, true); } + /** * eeh_pe_reset - Issue PE reset according to specified type * @pe: EEH PE diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 5c31369435f2..b94887165a10 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -545,7 +545,7 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - bool *clear_sw_state = flag; + bool clear_sw_state = *(bool *)flag; int i, rc = 1; for (i = 0; rc && i < 3; i++) @@ -588,7 +588,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); /* Issue reset */ - ret = eeh_reset_pe(pe); + ret = eeh_pe_reset_full(pe); if (ret) { eeh_pe_state_clear(pe, EEH_PE_RECOVERING); return ret; @@ -659,7 +659,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - rc = eeh_reset_pe(pe); + rc = eeh_pe_reset_full(pe); if (rc) return rc; @@ -734,7 +734,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { - pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); return; } @@ -878,7 +878,7 @@ excess_failures: * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ - pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n" + pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" "last hour and has been permanently disabled.\n" "Please try reseating or replacing it.\n", pe->phb->global_number, pe->addr, @@ -886,7 +886,7 @@ excess_failures: goto perm_error; hard_fail: - pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n" + pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" "Please try reseating or replacing it\n", pe->phb->global_number, pe->addr); @@ -1000,7 +1000,7 @@ static void eeh_handle_special_event(void) bus = eeh_pe_bus_get(phb_pe); if (!bus) { pr_err("%s: Cannot find PCI bus for " - "PHB#%d-PE#%x\n", + "PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 82e7327e3cd0..accbf8b5fd46 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -75,11 +75,11 @@ static int eeh_event_handler(void * dummy) if (pe) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); if (pe->type & EEH_PE_PHB) - pr_info("EEH: Detected error on PHB#%d\n", + pr_info("EEH: Detected error on PHB#%x\n", pe->phb->global_number); else pr_info("EEH: Detected PCI bus error on " - "PHB#%d-PE#%x\n", + "PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); eeh_handle_event(pe); eeh_pe_state_clear(pe, EEH_PE_RECOVERING); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index de7d091c4c31..cc4b206f77e4 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -104,7 +104,7 @@ int eeh_phb_pe_create(struct pci_controller *phb) /* Put it into the list */ list_add_tail(&pe->child, &eeh_phb_pe); - pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); + pr_debug("EEH: Add PE for PHB#%x\n", phb->global_number); return 0; } @@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Check if the PE number is valid */ if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) { - pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%d\n", + pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n", __func__, edev->config_addr, edev->phb->global_number); return -EINVAL; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 3841d749a430..a38600949f3a 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -205,6 +205,9 @@ transfer_to_handler_cont: mflr r9 lwz r11,0(r9) /* virtual address of handler */ lwz r9,4(r9) /* where to go when done */ +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif #ifdef CONFIG_TRACE_IRQFLAGS lis r12,reenable_mmu@h ori r12,r12,reenable_mmu@l @@ -292,7 +295,9 @@ stack_ovf: lis r9,StackOverflow@ha addi r9,r9,StackOverflow@l LOAD_MSR_KERNEL(r10,MSR_KERNEL) - FIX_SRR1(r10,r12) +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif mtspr SPRN_SRR0,r9 mtspr SPRN_SRR1,r10 SYNC @@ -417,9 +422,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) mtlr r4 mtcr r5 lwz r7,_NIP(r1) - FIX_SRR1(r8, r0) lwz r2,GPR2(r1) lwz r1,GPR1(r1) +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 SYNC @@ -699,6 +706,9 @@ fast_exception_return: lwz r10,_LINK(r11) mtlr r10 REST_GPR(10, r11) +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif mtspr SPRN_SRR1,r9 mtspr SPRN_SRR0,r12 REST_GPR(9, r11) @@ -947,7 +957,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) .globl exc_exit_restart exc_exit_restart: lwz r12,_NIP(r1) - FIX_SRR1(r9,r10) +#ifdef CONFIG_PPC_8xx_PERF_EVENT + mtspr SPRN_NRI, r0 +#endif mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r9 REST_4GPRS(9, r1) @@ -1290,7 +1302,6 @@ _GLOBAL(enter_rtas) 1: tophys(r9,r1) lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */ lwz r9,8(r9) /* original msr value */ - FIX_SRR1(r9,r0) addi r1,r1,INT_FRAME_SIZE li r0,0 mtspr SPRN_SPRG_RTAS,r0 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6432d4bf08c8..767ef6d68c9e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -689,7 +689,7 @@ resume_kernel: addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ - lwz r3,GPR1(r1) + ld r3,GPR1(r1) subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ mr r4,r1 /* src: current exception frame */ mr r1,r3 /* Reroute the trampoline frame to r1 */ @@ -703,8 +703,8 @@ resume_kernel: addi r6,r6,8 bdnz 2b - /* Do real store operation to complete stwu */ - lwz r5,GPR1(r1) + /* Do real store operation to complete stdu */ + ld r5,GPR1(r1) std r8,0(r5) /* Clear _TIF_EMULATE_STACK_STORE flag */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 38a1f96430e1..45b453e4d0c8 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -923,10 +923,10 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) addi r3,r1,STACK_FRAME_OVERHEAD - bl .save_nvgprs + bl save_nvgprs INTS_RESTORE_HARD - bl .unknown_exception - b .ret_from_except + bl unknown_exception + b ret_from_except /* * An interrupt came in while soft-disabled; We mark paca->irq_happened diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1ba82ea90230..6353019966e6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -93,7 +93,7 @@ USE_FIXED_SECTION(real_vectors) __start_interrupts: /* No virt vectors corresponding with 0x0..0x100 */ -EXC_VIRT_NONE(0x4000, 0x4100) +EXC_VIRT_NONE(0x4000, 0x100) #ifdef CONFIG_PPC_P7_NAP @@ -114,15 +114,15 @@ EXC_VIRT_NONE(0x4000, 0x4100) #define IDLETEST NOTEST #endif -EXC_REAL_BEGIN(system_reset, 0x100, 0x200) +EXC_REAL_BEGIN(system_reset, 0x100, 0x100) SET_SCRATCH0(r13) GET_PACA(r13) clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, IDLETEST, 0x100) -EXC_REAL_END(system_reset, 0x100, 0x200) -EXC_VIRT_NONE(0x4100, 0x4200) +EXC_REAL_END(system_reset, 0x100, 0x100) +EXC_VIRT_NONE(0x4100, 0x100) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) @@ -142,7 +142,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) lbz r0,HSTATE_HWTHREAD_REQ(r13) cmpwi r0,0 beq 1f - b kvm_start_guest + BRANCH_TO_KVM(r10, kvm_start_guest) 1: #endif @@ -166,7 +166,7 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi) #endif /* CONFIG_PPC_PSERIES */ -EXC_REAL_BEGIN(machine_check, 0x200, 0x300) +EXC_REAL_BEGIN(machine_check, 0x200, 0x100) /* This is moved out of line as it can be patched by FW, but * some code path might still want to branch into the original * vector @@ -186,8 +186,8 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE b machine_check_pSeries_0 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -EXC_REAL_END(machine_check, 0x200, 0x300) -EXC_VIRT_NONE(0x4200, 0x4300) +EXC_REAL_END(machine_check, 0x200, 0x100) +EXC_VIRT_NONE(0x4200, 0x100) TRAMP_REAL_BEGIN(machine_check_powernv_early) BEGIN_FTR_SECTION EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) @@ -381,12 +381,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early) lbz r3,PACA_THREAD_IDLE_STATE(r13) cmpwi r3,PNV_THREAD_NAP bgt 10f - IDLE_STATE_ENTER_SEQ(PPC_NAP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) /* No return */ 10: cmpwi r3,PNV_THREAD_SLEEP bgt 2f - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) /* No return */ 2: @@ -400,7 +400,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ ori r13,r13,1 SET_PACA(r13) - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) /* No return */ 4: #endif @@ -483,8 +483,8 @@ EXC_COMMON_BEGIN(unrecover_mce) b 1b -EXC_REAL(data_access, 0x300, 0x380) -EXC_VIRT(data_access, 0x4300, 0x4380, 0x300) +EXC_REAL(data_access, 0x300, 0x80) +EXC_VIRT(data_access, 0x4300, 0x80, 0x300) TRAMP_KVM_SKIP(PACA_EXGEN, 0x300) EXC_COMMON_BEGIN(data_access_common) @@ -512,7 +512,7 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) -EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400) +EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) @@ -533,9 +533,9 @@ EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400) mtctr r10 bctr #endif -EXC_REAL_END(data_access_slb, 0x380, 0x400) +EXC_REAL_END(data_access_slb, 0x380, 0x80) -EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400) +EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) @@ -556,12 +556,12 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400) mtctr r10 bctr #endif -EXC_VIRT_END(data_access_slb, 0x4380, 0x4400) +EXC_VIRT_END(data_access_slb, 0x4380, 0x80) TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) -EXC_REAL(instruction_access, 0x400, 0x480) -EXC_VIRT(instruction_access, 0x4400, 0x4480, 0x400) +EXC_REAL(instruction_access, 0x400, 0x80) +EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400) TRAMP_KVM(PACA_EXGEN, 0x400) EXC_COMMON_BEGIN(instruction_access_common) @@ -580,7 +580,7 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) -EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500) +EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) @@ -596,9 +596,9 @@ EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500) mtctr r10 bctr #endif -EXC_REAL_END(instruction_access_slb, 0x480, 0x500) +EXC_REAL_END(instruction_access_slb, 0x480, 0x80) -EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500) +EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) @@ -614,7 +614,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500) mtctr r10 bctr #endif -EXC_VIRT_END(instruction_access_slb, 0x4480, 0x4500) +EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) TRAMP_KVM(PACA_EXSLB, 0x480) @@ -711,23 +711,19 @@ EXC_COMMON_BEGIN(bad_addr_slb) bl slb_miss_bad_addr b ret_from_except -EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x600) +EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) .globl hardware_interrupt_hv; hardware_interrupt_hv: BEGIN_FTR_SECTION _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV) -do_kvm_H0x500: - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) -do_kvm_0x500: - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -EXC_REAL_END(hardware_interrupt, 0x500, 0x600) +EXC_REAL_END(hardware_interrupt, 0x500, 0x100) -EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x4600) +EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) .globl hardware_interrupt_relon_hv; hardware_interrupt_relon_hv: BEGIN_FTR_SECTION @@ -735,13 +731,15 @@ hardware_interrupt_relon_hv: FTR_SECTION_ELSE _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) +EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) +TRAMP_KVM(PACA_EXGEN, 0x500) +TRAMP_KVM_HV(PACA_EXGEN, 0x500) EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) -EXC_REAL(alignment, 0x600, 0x700) -EXC_VIRT(alignment, 0x4600, 0x4700, 0x600) +EXC_REAL(alignment, 0x600, 0x100) +EXC_VIRT(alignment, 0x4600, 0x100, 0x600) TRAMP_KVM(PACA_EXGEN, 0x600) EXC_COMMON_BEGIN(alignment_common) mfspr r10,SPRN_DAR @@ -760,8 +758,8 @@ EXC_COMMON_BEGIN(alignment_common) b ret_from_except -EXC_REAL(program_check, 0x700, 0x800) -EXC_VIRT(program_check, 0x4700, 0x4800, 0x700) +EXC_REAL(program_check, 0x700, 0x100) +EXC_VIRT(program_check, 0x4700, 0x100, 0x700) TRAMP_KVM(PACA_EXGEN, 0x700) EXC_COMMON_BEGIN(program_check_common) EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) @@ -772,8 +770,8 @@ EXC_COMMON_BEGIN(program_check_common) b ret_from_except -EXC_REAL(fp_unavailable, 0x800, 0x900) -EXC_VIRT(fp_unavailable, 0x4800, 0x4900, 0x800) +EXC_REAL(fp_unavailable, 0x800, 0x100) +EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800) TRAMP_KVM(PACA_EXGEN, 0x800) EXC_COMMON_BEGIN(fp_unavailable_common) EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) @@ -805,20 +803,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x980) -EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x4980, 0x900) +EXC_REAL_MASKABLE(decrementer, 0x900, 0x80) +EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) -EXC_REAL_HV(hdecrementer, 0x980, 0xa00) -EXC_VIRT_HV(hdecrementer, 0x4980, 0x4a00, 0x980) +EXC_REAL_HV(hdecrementer, 0x980, 0x80) +EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980) TRAMP_KVM_HV(PACA_EXGEN, 0x980) EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) -EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0xb00) -EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x4b00, 0xa00) +EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100) +EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00) TRAMP_KVM(PACA_EXGEN, 0xa00) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) @@ -827,11 +825,36 @@ EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception) #endif -EXC_REAL(trap_0b, 0xb00, 0xc00) -EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) +EXC_REAL(trap_0b, 0xb00, 0x100) +EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00) TRAMP_KVM(PACA_EXGEN, 0xb00) EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + /* + * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems + * that support it) before changing to HMT_MEDIUM. That allows the KVM + * code to save that value into the guest state (it is the guest's PPR + * value). Otherwise just change to HMT_MEDIUM as userspace has + * already saved the PPR. + */ +#define SYSCALL_KVMTEST \ + SET_SCRATCH0(r13); \ + GET_PACA(r13); \ + std r9,PACA_EXGEN+EX_R9(r13); \ + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ + HMT_MEDIUM; \ + std r10,PACA_EXGEN+EX_R10(r13); \ + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \ + mfcr r9; \ + KVMTEST_PR(0xc00); \ + GET_SCRATCH0(r13) + +#else +#define SYSCALL_KVMTEST \ + HMT_MEDIUM +#endif + #define LOAD_SYSCALL_HANDLER(reg) \ __LOAD_HANDLER(reg, system_call_common) @@ -884,50 +907,30 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ b system_call_common ; #endif -EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) - /* - * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems - * that support it) before changing to HMT_MEDIUM. That allows the KVM - * code to save that value into the guest state (it is the guest's PPR - * value). Otherwise just change to HMT_MEDIUM as userspace has - * already saved the PPR. - */ -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER - SET_SCRATCH0(r13) - GET_PACA(r13) - std r9,PACA_EXGEN+EX_R9(r13) - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); - HMT_MEDIUM; - std r10,PACA_EXGEN+EX_R10(r13) - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); - mfcr r9 - KVMTEST_PR(0xc00) - GET_SCRATCH0(r13) -#else - HMT_MEDIUM; -#endif +EXC_REAL_BEGIN(system_call, 0xc00, 0x100) + SYSCALL_KVMTEST SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_RFID SYSCALL_PSERIES_3 -EXC_REAL_END(system_call, 0xc00, 0xd00) +EXC_REAL_END(system_call, 0xc00, 0x100) -EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) - HMT_MEDIUM +EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) + SYSCALL_KVMTEST SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_DIRECT SYSCALL_PSERIES_3 -EXC_VIRT_END(system_call, 0x4c00, 0x4d00) +EXC_VIRT_END(system_call, 0x4c00, 0x100) TRAMP_KVM(PACA_EXGEN, 0xc00) -EXC_REAL(single_step, 0xd00, 0xe00) -EXC_VIRT(single_step, 0x4d00, 0x4e00, 0xd00) +EXC_REAL(single_step, 0xd00, 0x100) +EXC_VIRT(single_step, 0x4d00, 0x100, 0xd00) TRAMP_KVM(PACA_EXGEN, 0xd00) EXC_COMMON(single_step_common, 0xd00, single_step_exception) -EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) -EXC_VIRT_NONE(0x4e00, 0x4e20) +EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20) +EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) EXC_COMMON_BEGIN(h_data_storage_common) mfspr r10,SPRN_HDAR @@ -942,14 +945,14 @@ EXC_COMMON_BEGIN(h_data_storage_common) b ret_from_except -EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) -EXC_VIRT_NONE(0x4e20, 0x4e40) +EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20) +EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20) TRAMP_KVM_HV(PACA_EXGEN, 0xe20) EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) -EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0xe60) -EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x4e60, 0xe40) +EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0x20) +EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x20, 0xe40) TRAMP_KVM_HV(PACA_EXGEN, 0xe40) EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) @@ -959,9 +962,9 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) * first, and then eventaully from there to the trampoline to get into virtual * mode. */ -__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0xe80, hmi_exception_early) -__TRAMP_REAL_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) -EXC_VIRT_NONE(0x4e60, 0x4e80) +__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early) +__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) +EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) @@ -979,7 +982,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD - bl hmi_exception_realmode + BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */ /* Windup the stack. */ /* Move original HSRR0 and HSRR1 into the respective regs */ ld r9,_MSR(r1) @@ -1015,8 +1018,8 @@ hmi_exception_after_realmode: EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception) -EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0xea0) -EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x4ea0, 0xe80) +EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20) +EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80) TRAMP_KVM_HV(PACA_EXGEN, 0xe80) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) @@ -1025,24 +1028,26 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception) #endif -EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0xec0) -EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x4ec0, 0xea0) +EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20) +EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0) TRAMP_KVM_HV(PACA_EXGEN, 0xea0) EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) -EXC_REAL_NONE(0xec0, 0xf00) -EXC_VIRT_NONE(0x4ec0, 0x4f00) +EXC_REAL_NONE(0xec0, 0x20) +EXC_VIRT_NONE(0x4ec0, 0x20) +EXC_REAL_NONE(0xee0, 0x20) +EXC_VIRT_NONE(0x4ee0, 0x20) -EXC_REAL_OOL(performance_monitor, 0xf00, 0xf20) -EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x4f20, 0xf00) +EXC_REAL_OOL(performance_monitor, 0xf00, 0x20) +EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00) TRAMP_KVM(PACA_EXGEN, 0xf00) EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) -EXC_REAL_OOL(altivec_unavailable, 0xf20, 0xf40) -EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x4f40, 0xf20) +EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20) +EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20) TRAMP_KVM(PACA_EXGEN, 0xf20) EXC_COMMON_BEGIN(altivec_unavailable_common) EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) @@ -1078,8 +1083,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) b ret_from_except -EXC_REAL_OOL(vsx_unavailable, 0xf40, 0xf60) -EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x4f60, 0xf40) +EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20) +EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40) TRAMP_KVM(PACA_EXGEN, 0xf40) EXC_COMMON_BEGIN(vsx_unavailable_common) EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) @@ -1114,41 +1119,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) b ret_from_except -EXC_REAL_OOL(facility_unavailable, 0xf60, 0xf80) -EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x4f80, 0xf60) +EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20) +EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x20, 0xf60) TRAMP_KVM(PACA_EXGEN, 0xf60) EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception) -EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0xfa0) -EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x4fa0, 0xf80) +EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0x20) +EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x20, 0xf80) TRAMP_KVM_HV(PACA_EXGEN, 0xf80) EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception) -EXC_REAL_NONE(0xfa0, 0x1200) -EXC_VIRT_NONE(0x4fa0, 0x5200) +EXC_REAL_NONE(0xfa0, 0x20) +EXC_VIRT_NONE(0x4fa0, 0x20) +EXC_REAL_NONE(0xfc0, 0x20) +EXC_VIRT_NONE(0x4fc0, 0x20) +EXC_REAL_NONE(0xfe0, 0x20) +EXC_VIRT_NONE(0x4fe0, 0x20) + +EXC_REAL_NONE(0x1000, 0x100) +EXC_VIRT_NONE(0x5000, 0x100) +EXC_REAL_NONE(0x1100, 0x100) +EXC_VIRT_NONE(0x5100, 0x100) #ifdef CONFIG_CBE_RAS -EXC_REAL_HV(cbe_system_error, 0x1200, 0x1300) -EXC_VIRT_NONE(0x5200, 0x5300) +EXC_REAL_HV(cbe_system_error, 0x1200, 0x100) +EXC_VIRT_NONE(0x5200, 0x100) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200) EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception) #else /* CONFIG_CBE_RAS */ -EXC_REAL_NONE(0x1200, 0x1300) -EXC_VIRT_NONE(0x5200, 0x5300) +EXC_REAL_NONE(0x1200, 0x100) +EXC_VIRT_NONE(0x5200, 0x100) #endif -EXC_REAL(instruction_breakpoint, 0x1300, 0x1400) -EXC_VIRT(instruction_breakpoint, 0x5300, 0x5400, 0x1300) +EXC_REAL(instruction_breakpoint, 0x1300, 0x100) +EXC_VIRT(instruction_breakpoint, 0x5300, 0x100, 0x1300) TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300) EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception) -EXC_REAL_NONE(0x1400, 0x1500) -EXC_VIRT_NONE(0x5400, 0x5500) +EXC_REAL_NONE(0x1400, 0x100) +EXC_VIRT_NONE(0x5400, 0x100) -EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600) +EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) mtspr SPRN_SPRG_HSCRATCH0,r13 EXCEPTION_PROLOG_0(PACA_EXGEN) EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) @@ -1163,14 +1177,14 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600) KVMTEST_PR(0x1500) EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) -EXC_REAL_END(denorm_exception_hv, 0x1500, 0x1600) +EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION -EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x5600) +EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100) b exc_real_0x1500_denorm_exception_hv -EXC_VIRT_END(denorm_exception, 0x5500, 0x5600) +EXC_VIRT_END(denorm_exception, 0x5500, 0x100) #else -EXC_VIRT_NONE(0x5500, 0x5600) +EXC_VIRT_NONE(0x5500, 0x100) #endif TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) @@ -1243,18 +1257,18 @@ EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception) #ifdef CONFIG_CBE_RAS -EXC_REAL_HV(cbe_maintenance, 0x1600, 0x1700) -EXC_VIRT_NONE(0x5600, 0x5700) +EXC_REAL_HV(cbe_maintenance, 0x1600, 0x100) +EXC_VIRT_NONE(0x5600, 0x100) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600) EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception) #else /* CONFIG_CBE_RAS */ -EXC_REAL_NONE(0x1600, 0x1700) -EXC_VIRT_NONE(0x5600, 0x5700) +EXC_REAL_NONE(0x1600, 0x100) +EXC_VIRT_NONE(0x5600, 0x100) #endif -EXC_REAL(altivec_assist, 0x1700, 0x1800) -EXC_VIRT(altivec_assist, 0x5700, 0x5800, 0x1700) +EXC_REAL(altivec_assist, 0x1700, 0x100) +EXC_VIRT(altivec_assist, 0x5700, 0x100, 0x1700) TRAMP_KVM(PACA_EXGEN, 0x1700) #ifdef CONFIG_ALTIVEC EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception) @@ -1264,13 +1278,13 @@ EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception) #ifdef CONFIG_CBE_RAS -EXC_REAL_HV(cbe_thermal, 0x1800, 0x1900) -EXC_VIRT_NONE(0x5800, 0x5900) +EXC_REAL_HV(cbe_thermal, 0x1800, 0x100) +EXC_VIRT_NONE(0x5800, 0x100) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800) EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception) #else /* CONFIG_CBE_RAS */ -EXC_REAL_NONE(0x1800, 0x1900) -EXC_VIRT_NONE(0x5800, 0x5900) +EXC_REAL_NONE(0x1800, 0x100) +EXC_VIRT_NONE(0x5800, 0x100) #endif @@ -1408,7 +1422,7 @@ USE_TEXT_SECTION() /* * Hash table stuff */ - .align 7 + .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 andis. r0,r4,0xa410 /* weird error? */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8f0c7c5d93f2..8ff0dd4e77a7 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -406,12 +406,35 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) void crash_fadump(struct pt_regs *regs, const char *str) { struct fadump_crash_info_header *fdh = NULL; + int old_cpu, this_cpu; if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) return; + /* + * old_cpu == -1 means this is the first CPU which has come here, + * go ahead and trigger fadump. + * + * old_cpu != -1 means some other CPU has already on it's way + * to trigger fadump, just keep looping here. + */ + this_cpu = smp_processor_id(); + old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); + + if (old_cpu != -1) { + /* + * We can't loop here indefinitely. Wait as long as fadump + * is in force. If we race with fadump un-registration this + * loop will break and then we go down to normal panic path + * and reboot. If fadump is in force the first crashing + * cpu will definitely trigger fadump. + */ + while (fw_dump.dump_registered) + cpu_relax(); + return; + } + fdh = __va(fw_dump.fadumphdr_addr); - crashing_cpu = smp_processor_id(); fdh->crashing_cpu = crashing_cpu; crash_save_vmcoreinfo(); diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index a95639b8d4ac..5c9f50c1aa99 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -47,13 +47,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) unsigned int replaced; /* - * Note: Due to modules and __init, code can - * disappear and change, we need to protect against faulting - * as well as code changing. We do this by using the - * probe_kernel_* functions. - * - * No real locking needed, this code is run through - * kstop_machine, or before SMP starts. + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 9d963547d243..1607be7c0ef2 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -869,7 +869,6 @@ __secondary_start: /* enable MMU and jump to start_secondary */ li r4,MSR_KERNEL - FIX_SRR1(r4,r5) lis r3,start_secondary@h ori r3,r3,start_secondary@l mtspr SPRN_SRR0,r3 @@ -977,7 +976,6 @@ start_here: ori r4,r4,2f@l tophys(r4,r4) li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) - FIX_SRR1(r3,r5) mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 SYNC @@ -1001,7 +999,6 @@ start_here: /* Now turn on the MMU for real! */ li r4,MSR_KERNEL - FIX_SRR1(r4,r5) lis r3,start_kernel@h ori r3,r3,start_kernel@l mtspr SPRN_SRR0,r3 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 04c546e20cc0..1dc5eae2ced3 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -107,12 +107,19 @@ __secondary_hold_acknowledge: * crash_kernel region. The loader is responsible for * observing the alignment requirement. */ + +#ifdef CONFIG_RELOCATABLE_TEST +#define RUN_AT_LOAD_DEFAULT 1 /* Test relocation, do not copy to 0 */ +#else +#define RUN_AT_LOAD_DEFAULT 0x72756e30 /* "run0" -- relocate to 0 by default */ +#endif + /* Do not move this variable as kexec-tools knows about it. */ . = 0x5c .globl __run_at_load __run_at_load: DEFINE_FIXED_SYMBOL(__run_at_load) - .long 0x72756e30 /* "run0" -- relocate to 0 by default */ + .long RUN_AT_LOAD_DEFAULT #endif . = 0x60 @@ -153,7 +160,7 @@ __secondary_hold: cmpdi 0,r12,0 beq 100b -#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) #ifdef CONFIG_PPC_BOOK3E tovirt(r12,r12) #endif @@ -214,9 +221,9 @@ booting_thread_hwid: */ _GLOBAL(book3e_start_thread) LOAD_REG_IMMEDIATE(r5, MSR_KERNEL) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 11f /* If the thread id is invalid, just exit. */ b 13f @@ -241,9 +248,9 @@ _GLOBAL(book3e_start_thread) * r3 = the thread physical id */ _GLOBAL(book3e_stop_thread) - cmpi 0, r3, 0 + cmpwi r3, 0 beq 10f - cmpi 0, r3, 1 + cmpwi r3, 1 beq 10f /* If the thread id is invalid, just exit. */ b 13f diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fb133a163263..c032fe8c2d26 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -73,6 +73,9 @@ #define RPN_PATTERN 0x00f0 #endif +#define PAGE_SHIFT_512K 19 +#define PAGE_SHIFT_8M 23 + __HEAD _ENTRY(_stext); _ENTRY(_start); @@ -322,20 +325,28 @@ SystemCall: #endif InstructionTLBMiss: -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 +#ifdef CONFIG_PPC_8xx_PERF_EVENT + lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha + lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, 1 + stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10) +#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. */ mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mfcr r3 +#endif +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) IS_KERNEL(r11, r10) #endif mfspr r11, SPRN_M_TW /* Get level 1 table */ @@ -343,7 +354,6 @@ InstructionTLBMiss: BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: - mtcr r3 #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -351,14 +361,25 @@ InstructionTLBMiss: /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 +#ifdef CONFIG_HUGETLB_PAGE + mtcr r11 + bt- 28, 10f /* bit 28 = Large page (8M) */ + bt- 29, 20f /* bit 29 = Large page (8M or 512k) */ +#endif rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - +4: +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) + mtcr r3 +#endif /* Insert the APG into the TWC from the Linux PTE. */ rlwimi r11, r10, 0, 25, 26 /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 1, MI_SPS16K +#endif #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 @@ -371,20 +392,55 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */ +#else rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */ +#endif MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 rfi +#ifdef CONFIG_HUGETLB_PAGE +10: /* 8M pages */ +#ifdef CONFIG_PPC_16K_PAGES + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 +#else + /* Level 2 base */ + rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b + +20: /* 512k pages */ + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b +#endif + . = 0x1200 DataStoreTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r3 EXCEPTION_PROLOG_0 +#ifdef CONFIG_PPC_8xx_PERF_EVENT + lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha + lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, 1 + stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10) +#endif mfcr r3 /* If we are faulting a kernel address, we have to use the @@ -407,7 +463,6 @@ _ENTRY(DTLBMiss_jmp) #endif blt cr7, DTLBMissLinear 3: - mtcr r3 mfspr r10, SPRN_MD_EPN /* Insert level 1 index */ @@ -418,8 +473,15 @@ _ENTRY(DTLBMiss_jmp) */ /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 +#ifdef CONFIG_HUGETLB_PAGE + mtcr r11 + bt- 28, 10f /* bit 28 = Large page (8M) */ + bt- 29, 20f /* bit 29 = Large page (8M or 512k) */ +#endif rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ +4: + mtcr r3 /* Insert the Guarded flag and APG into the TWC from the Linux PTE. * It is bit 26-27 of both the Linux PTE and the TWC (at least @@ -434,6 +496,11 @@ _ENTRY(DTLBMiss_jmp) rlwimi r11, r10, 32-5, 30, 30 MTSPR_CPU6(SPRN_MD_TWC, r11, r3) + /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29) + * In 16k pages mode, SPS is always 1 */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 1, MD_SPS16K +#endif /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. * We also need to know if the insn is a load/store, so: * Clear _PAGE_PRESENT and load that which will @@ -455,7 +522,11 @@ _ENTRY(DTLBMiss_jmp) * of the MMU. */ li r11, RPN_PATTERN +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ +#else rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ +#endif rlwimi r10, r11, 0, 20, 20 /* clear 20 */ MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ @@ -465,6 +536,30 @@ _ENTRY(DTLBMiss_jmp) EXCEPTION_EPILOG_0 rfi +#ifdef CONFIG_HUGETLB_PAGE +10: /* 8M pages */ + /* Extract level 2 index */ +#ifdef CONFIG_PPC_16K_PAGES + rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 +#else + /* Level 2 base */ + rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b + +20: /* 512k pages */ + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b +#endif /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction @@ -478,6 +573,7 @@ InstructionTLBError: andis. r10,r5,0x4000 beq+ 1f tlbie r4 +itlbie: /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ 1: EXC_XFER_LITE(0x400, handle_page_fault) @@ -502,6 +598,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ andis. r10,r5,0x4000 beq+ 1f tlbie r4 +dtlbie: 1: li r10,RPN_PATTERN mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ @@ -519,8 +616,43 @@ DARFixed:/* Return from dcbx instruction bug workaround */ * support of breakpoints and such. Someday I will get around to * using them. */ - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) + . = 0x1c00 +DataBreakpoint: + EXCEPTION_PROLOG_0 + mfcr r10 + mfspr r11, SPRN_SRR0 + cmplwi cr0, r11, (dtlbie - PAGE_OFFSET)@l + cmplwi cr7, r11, (itlbie - PAGE_OFFSET)@l + beq- cr0, 11f + beq- cr7, 11f + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 + addi r3,r1,STACK_FRAME_OVERHEAD + mfspr r4,SPRN_BAR + stw r4,_DAR(r11) + mfspr r5,SPRN_DSISR + EXC_XFER_EE(0x1c00, do_break) +11: + mtcr r10 + EXCEPTION_EPILOG_0 + rfi + +#ifdef CONFIG_PPC_8xx_PERF_EVENT + . = 0x1d00 +InstructionBreakpoint: + EXCEPTION_PROLOG_0 + lis r10, (instruction_counter - PAGE_OFFSET)@ha + lwz r11, (instruction_counter - PAGE_OFFSET)@l(r10) + addi r11, r11, -1 + stw r11, (instruction_counter - PAGE_OFFSET)@l(r10) + lis r10, 0xffff + ori r10, r10, 0x01 + mtspr SPRN_COUNTA, r10 + EXCEPTION_EPILOG_0 + rfi +#else EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) +#endif EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) @@ -586,6 +718,9 @@ _ENTRY(FixupDAR_cmp) /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + mtcr r11 + bt 28,200f /* bit 28 = Large page (8M) */ + bt 29,202f /* bit 29 = Large page (8M or 512K) */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -611,6 +746,27 @@ _ENTRY(FixupDAR_cmp) 141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ + /* concat physical page address(r11) and page offset(r10) */ +200: +#ifdef CONFIG_PPC_16K_PAGES + rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 + rlwimi r11, r10, 32 - (PAGE_SHIFT_8M - 2), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 +#else + rlwinm r11, r10, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 + b 201b + +202: + rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + rlwimi r11, r10, 32 - (PAGE_SHIFT_512K - 2), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31 + b 201b + 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 @@ -870,6 +1026,14 @@ initial_mmu: lis r8, IDC_ENABLE@h mtspr SPRN_DC_CST, r8 #endif + /* Disable debug mode entry on breakpoints */ + mfspr r8, SPRN_DER +#ifdef CONFIG_PPC_8xx_PERF_EVENT + rlwinm r8, r8, 0, ~0xc +#else + rlwinm r8, r8, 0, ~0x8 +#endif + mtspr SPRN_DER, r8 blr @@ -903,3 +1067,16 @@ cpu6_errata_word: .space 16 #endif +#ifdef CONFIG_PPC_8xx_PERF_EVENT + .globl itlb_miss_counter +itlb_miss_counter: + .space 4 + + .globl dtlb_miss_counter +dtlb_miss_counter: + .space 4 + + .globl instruction_counter +instruction_counter: + .space 4 +#endif diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 03d089b3ed72..53b9c1dfd7d9 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -33,7 +33,7 @@ #include <asm/hw_breakpoint.h> #include <asm/processor.h> #include <asm/sstep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> /* * Stores the breakpoints currently in use on each breakpoint address @@ -211,9 +211,11 @@ int hw_breakpoint_handler(struct die_args *args) int rc = NOTIFY_STOP; struct perf_event *bp; struct pt_regs *regs = args->regs; +#ifndef CONFIG_PPC_8xx int stepped = 1; - struct arch_hw_breakpoint *info; unsigned int instr; +#endif + struct arch_hw_breakpoint *info; unsigned long dar = regs->dar; /* Disable breakpoints during exception handling */ @@ -228,8 +230,10 @@ int hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); bp = __this_cpu_read(bp_per_reg); - if (!bp) + if (!bp) { + rc = NOTIFY_DONE; goto out; + } info = counter_arch_bp(bp); /* @@ -255,6 +259,7 @@ int hw_breakpoint_handler(struct die_args *args) (dar - bp->attr.bp_addr < bp->attr.bp_len))) info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; +#ifndef CONFIG_PPC_8xx /* Do not emulate user-space instructions, instead single-step them */ if (user_mode(regs)) { current->thread.last_hit_ubp = bp; @@ -278,6 +283,7 @@ int hw_breakpoint_handler(struct die_args *args) perf_event_disable_inatomic(bp); goto out; } +#endif /* * As a policy, the callback is invoked in a 'trigger-after-execute' * fashion diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c deleted file mode 100644 index 6ca9a2ffaac7..000000000000 --- a/arch/powerpc/kernel/ibmebus.c +++ /dev/null @@ -1,759 +0,0 @@ -/* - * IBM PowerPC IBM eBus Infrastructure Support. - * - * Copyright (c) 2005 IBM Corporation - * Joachim Fenkes <fenkes@de.ibm.com> - * Heiko J Schick <schickhj@de.ibm.com> - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/init.h> -#include <linux/export.h> -#include <linux/console.h> -#include <linux/kobject.h> -#include <linux/dma-mapping.h> -#include <linux/interrupt.h> -#include <linux/of.h> -#include <linux/slab.h> -#include <linux/stat.h> -#include <linux/of_platform.h> -#include <asm/ibmebus.h> - -static struct device ibmebus_bus_device = { /* fake "parent" device */ - .init_name = "ibmebus", -}; - -struct bus_type ibmebus_bus_type; - -/* These devices will automatically be added to the bus during init */ -static const struct of_device_id ibmebus_matches[] __initconst = { - { .compatible = "IBM,lhca" }, - { .compatible = "IBM,lhea" }, - {}, -}; - -static void *ibmebus_alloc_coherent(struct device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flag, - unsigned long attrs) -{ - void *mem; - - mem = kmalloc(size, flag); - *dma_handle = (dma_addr_t)mem; - - return mem; -} - -static void ibmebus_free_coherent(struct device *dev, - size_t size, void *vaddr, - dma_addr_t dma_handle, - unsigned long attrs) -{ - kfree(vaddr); -} - -static dma_addr_t ibmebus_map_page(struct device *dev, - struct page *page, - unsigned long offset, - size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - return (dma_addr_t)(page_address(page) + offset); -} - -static void ibmebus_unmap_page(struct device *dev, - dma_addr_t dma_addr, - size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - return; -} - -static int ibmebus_map_sg(struct device *dev, - struct scatterlist *sgl, - int nents, enum dma_data_direction direction, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) { - sg->dma_address = (dma_addr_t) sg_virt(sg); - sg->dma_length = sg->length; - } - - return nents; -} - -static void ibmebus_unmap_sg(struct device *dev, - struct scatterlist *sg, - int nents, enum dma_data_direction direction, - unsigned long attrs) -{ - return; -} - -static int ibmebus_dma_supported(struct device *dev, u64 mask) -{ - return mask == DMA_BIT_MASK(64); -} - -static u64 ibmebus_dma_get_required_mask(struct device *dev) -{ - return DMA_BIT_MASK(64); -} - -static struct dma_map_ops ibmebus_dma_ops = { - .alloc = ibmebus_alloc_coherent, - .free = ibmebus_free_coherent, - .map_sg = ibmebus_map_sg, - .unmap_sg = ibmebus_unmap_sg, - .dma_supported = ibmebus_dma_supported, - .get_required_mask = ibmebus_dma_get_required_mask, - .map_page = ibmebus_map_page, - .unmap_page = ibmebus_unmap_page, -}; - -static int ibmebus_match_path(struct device *dev, void *data) -{ - struct device_node *dn = to_platform_device(dev)->dev.of_node; - return (dn->full_name && - (strcasecmp((char *)data, dn->full_name) == 0)); -} - -static int ibmebus_match_node(struct device *dev, void *data) -{ - return to_platform_device(dev)->dev.of_node == data; -} - -static int ibmebus_create_device(struct device_node *dn) -{ - struct platform_device *dev; - int ret; - - dev = of_device_alloc(dn, NULL, &ibmebus_bus_device); - if (!dev) - return -ENOMEM; - - dev->dev.bus = &ibmebus_bus_type; - dev->dev.archdata.dma_ops = &ibmebus_dma_ops; - - ret = of_device_add(dev); - if (ret) - platform_device_put(dev); - return ret; -} - -static int ibmebus_create_devices(const struct of_device_id *matches) -{ - struct device_node *root, *child; - int ret = 0; - - root = of_find_node_by_path("/"); - - for_each_child_of_node(root, child) { - if (!of_match_node(matches, child)) - continue; - - if (bus_find_device(&ibmebus_bus_type, NULL, child, - ibmebus_match_node)) - continue; - - ret = ibmebus_create_device(child); - if (ret) { - printk(KERN_ERR "%s: failed to create device (%i)", - __func__, ret); - of_node_put(child); - break; - } - } - - of_node_put(root); - return ret; -} - -int ibmebus_register_driver(struct platform_driver *drv) -{ - /* If the driver uses devices that ibmebus doesn't know, add them */ - ibmebus_create_devices(drv->driver.of_match_table); - - drv->driver.bus = &ibmebus_bus_type; - return driver_register(&drv->driver); -} -EXPORT_SYMBOL(ibmebus_register_driver); - -void ibmebus_unregister_driver(struct platform_driver *drv) -{ - driver_unregister(&drv->driver); -} -EXPORT_SYMBOL(ibmebus_unregister_driver); - -int ibmebus_request_irq(u32 ist, irq_handler_t handler, - unsigned long irq_flags, const char *devname, - void *dev_id) -{ - unsigned int irq = irq_create_mapping(NULL, ist); - - if (!irq) - return -EINVAL; - - return request_irq(irq, handler, irq_flags, devname, dev_id); -} -EXPORT_SYMBOL(ibmebus_request_irq); - -void ibmebus_free_irq(u32 ist, void *dev_id) -{ - unsigned int irq = irq_find_mapping(NULL, ist); - - free_irq(irq, dev_id); - irq_dispose_mapping(irq); -} -EXPORT_SYMBOL(ibmebus_free_irq); - -static char *ibmebus_chomp(const char *in, size_t count) -{ - char *out = kmalloc(count + 1, GFP_KERNEL); - - if (!out) - return NULL; - - memcpy(out, in, count); - out[count] = '\0'; - if (out[count - 1] == '\n') - out[count - 1] = '\0'; - - return out; -} - -static ssize_t ibmebus_store_probe(struct bus_type *bus, - const char *buf, size_t count) -{ - struct device_node *dn = NULL; - char *path; - ssize_t rc = 0; - - path = ibmebus_chomp(buf, count); - if (!path) - return -ENOMEM; - - if (bus_find_device(&ibmebus_bus_type, NULL, path, - ibmebus_match_path)) { - printk(KERN_WARNING "%s: %s has already been probed\n", - __func__, path); - rc = -EEXIST; - goto out; - } - - if ((dn = of_find_node_by_path(path))) { - rc = ibmebus_create_device(dn); - of_node_put(dn); - } else { - printk(KERN_WARNING "%s: no such device node: %s\n", - __func__, path); - rc = -ENODEV; - } - -out: - kfree(path); - if (rc) - return rc; - return count; -} -static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe); - -static ssize_t ibmebus_store_remove(struct bus_type *bus, - const char *buf, size_t count) -{ - struct device *dev; - char *path; - - path = ibmebus_chomp(buf, count); - if (!path) - return -ENOMEM; - - if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path, - ibmebus_match_path))) { - of_device_unregister(to_platform_device(dev)); - - kfree(path); - return count; - } else { - printk(KERN_WARNING "%s: %s not on the bus\n", - __func__, path); - - kfree(path); - return -ENODEV; - } -} -static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove); - -static struct attribute *ibmbus_bus_attrs[] = { - &bus_attr_probe.attr, - &bus_attr_remove.attr, - NULL, -}; -ATTRIBUTE_GROUPS(ibmbus_bus); - -static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv) -{ - const struct of_device_id *matches = drv->of_match_table; - - if (!matches) - return 0; - - return of_match_device(matches, dev) != NULL; -} - -static int ibmebus_bus_device_probe(struct device *dev) -{ - int error = -ENODEV; - struct platform_driver *drv; - struct platform_device *of_dev; - - drv = to_platform_driver(dev->driver); - of_dev = to_platform_device(dev); - - if (!drv->probe) - return error; - - of_dev_get(of_dev); - - if (of_driver_match_device(dev, dev->driver)) - error = drv->probe(of_dev); - if (error) - of_dev_put(of_dev); - - return error; -} - -static int ibmebus_bus_device_remove(struct device *dev) -{ - struct platform_device *of_dev = to_platform_device(dev); - struct platform_driver *drv = to_platform_driver(dev->driver); - - if (dev->driver && drv->remove) - drv->remove(of_dev); - return 0; -} - -static void ibmebus_bus_device_shutdown(struct device *dev) -{ - struct platform_device *of_dev = to_platform_device(dev); - struct platform_driver *drv = to_platform_driver(dev->driver); - - if (dev->driver && drv->shutdown) - drv->shutdown(of_dev); -} - -/* - * ibmebus_bus_device_attrs - */ -static ssize_t devspec_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct platform_device *ofdev; - - ofdev = to_platform_device(dev); - return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name); -} - -static ssize_t name_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct platform_device *ofdev; - - ofdev = to_platform_device(dev); - return sprintf(buf, "%s\n", ofdev->dev.of_node->name); -} - -static ssize_t modalias_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - ssize_t len = of_device_get_modalias(dev, buf, PAGE_SIZE - 2); - buf[len] = '\n'; - buf[len+1] = 0; - return len+1; -} - -static struct device_attribute ibmebus_bus_device_attrs[] = { - __ATTR_RO(devspec), - __ATTR_RO(name), - __ATTR_RO(modalias), - __ATTR_NULL -}; - -#ifdef CONFIG_PM_SLEEP -static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) -{ - struct platform_device *of_dev = to_platform_device(dev); - struct platform_driver *drv = to_platform_driver(dev->driver); - int ret = 0; - - if (dev->driver && drv->suspend) - ret = drv->suspend(of_dev, mesg); - return ret; -} - -static int ibmebus_bus_legacy_resume(struct device *dev) -{ - struct platform_device *of_dev = to_platform_device(dev); - struct platform_driver *drv = to_platform_driver(dev->driver); - int ret = 0; - - if (dev->driver && drv->resume) - ret = drv->resume(of_dev); - return ret; -} - -static int ibmebus_bus_pm_prepare(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (drv && drv->pm && drv->pm->prepare) - ret = drv->pm->prepare(dev); - - return ret; -} - -static void ibmebus_bus_pm_complete(struct device *dev) -{ - struct device_driver *drv = dev->driver; - - if (drv && drv->pm && drv->pm->complete) - drv->pm->complete(dev); -} - -#ifdef CONFIG_SUSPEND - -static int ibmebus_bus_pm_suspend(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->suspend) - ret = drv->pm->suspend(dev); - } else { - ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND); - } - - return ret; -} - -static int ibmebus_bus_pm_suspend_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->suspend_noirq) - ret = drv->pm->suspend_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_resume(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->resume) - ret = drv->pm->resume(dev); - } else { - ret = ibmebus_bus_legacy_resume(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_resume_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->resume_noirq) - ret = drv->pm->resume_noirq(dev); - } - - return ret; -} - -#else /* !CONFIG_SUSPEND */ - -#define ibmebus_bus_pm_suspend NULL -#define ibmebus_bus_pm_resume NULL -#define ibmebus_bus_pm_suspend_noirq NULL -#define ibmebus_bus_pm_resume_noirq NULL - -#endif /* !CONFIG_SUSPEND */ - -#ifdef CONFIG_HIBERNATE_CALLBACKS - -static int ibmebus_bus_pm_freeze(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->freeze) - ret = drv->pm->freeze(dev); - } else { - ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE); - } - - return ret; -} - -static int ibmebus_bus_pm_freeze_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->freeze_noirq) - ret = drv->pm->freeze_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_thaw(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->thaw) - ret = drv->pm->thaw(dev); - } else { - ret = ibmebus_bus_legacy_resume(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_thaw_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->thaw_noirq) - ret = drv->pm->thaw_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_poweroff(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->poweroff) - ret = drv->pm->poweroff(dev); - } else { - ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE); - } - - return ret; -} - -static int ibmebus_bus_pm_poweroff_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->poweroff_noirq) - ret = drv->pm->poweroff_noirq(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_restore(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->restore) - ret = drv->pm->restore(dev); - } else { - ret = ibmebus_bus_legacy_resume(dev); - } - - return ret; -} - -static int ibmebus_bus_pm_restore_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->restore_noirq) - ret = drv->pm->restore_noirq(dev); - } - - return ret; -} - -#else /* !CONFIG_HIBERNATE_CALLBACKS */ - -#define ibmebus_bus_pm_freeze NULL -#define ibmebus_bus_pm_thaw NULL -#define ibmebus_bus_pm_poweroff NULL -#define ibmebus_bus_pm_restore NULL -#define ibmebus_bus_pm_freeze_noirq NULL -#define ibmebus_bus_pm_thaw_noirq NULL -#define ibmebus_bus_pm_poweroff_noirq NULL -#define ibmebus_bus_pm_restore_noirq NULL - -#endif /* !CONFIG_HIBERNATE_CALLBACKS */ - -static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { - .prepare = ibmebus_bus_pm_prepare, - .complete = ibmebus_bus_pm_complete, - .suspend = ibmebus_bus_pm_suspend, - .resume = ibmebus_bus_pm_resume, - .freeze = ibmebus_bus_pm_freeze, - .thaw = ibmebus_bus_pm_thaw, - .poweroff = ibmebus_bus_pm_poweroff, - .restore = ibmebus_bus_pm_restore, - .suspend_noirq = ibmebus_bus_pm_suspend_noirq, - .resume_noirq = ibmebus_bus_pm_resume_noirq, - .freeze_noirq = ibmebus_bus_pm_freeze_noirq, - .thaw_noirq = ibmebus_bus_pm_thaw_noirq, - .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq, - .restore_noirq = ibmebus_bus_pm_restore_noirq, -}; - -#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops) - -#else /* !CONFIG_PM_SLEEP */ - -#define IBMEBUS_BUS_PM_OPS_PTR NULL - -#endif /* !CONFIG_PM_SLEEP */ - -struct bus_type ibmebus_bus_type = { - .name = "ibmebus", - .uevent = of_device_uevent_modalias, - .bus_groups = ibmbus_bus_groups, - .match = ibmebus_bus_bus_match, - .probe = ibmebus_bus_device_probe, - .remove = ibmebus_bus_device_remove, - .shutdown = ibmebus_bus_device_shutdown, - .dev_attrs = ibmebus_bus_device_attrs, - .pm = IBMEBUS_BUS_PM_OPS_PTR, -}; -EXPORT_SYMBOL(ibmebus_bus_type); - -static int __init ibmebus_bus_init(void) -{ - int err; - - printk(KERN_INFO "IBM eBus Device Driver\n"); - - err = bus_register(&ibmebus_bus_type); - if (err) { - printk(KERN_ERR "%s: failed to register IBM eBus.\n", - __func__); - return err; - } - - err = device_register(&ibmebus_bus_device); - if (err) { - printk(KERN_WARNING "%s: device_register returned %i\n", - __func__, err); - bus_unregister(&ibmebus_bus_type); - - return err; - } - - err = ibmebus_create_devices(ibmebus_matches); - if (err) { - device_unregister(&ibmebus_bus_device); - bus_unregister(&ibmebus_bus_type); - return err; - } - - return 0; -} -postcore_initcall(ibmebus_bus_init); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 72dac0b58061..6fd08219248d 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -40,9 +40,7 @@ #define _WORC GPR11 #define _PTCR GPR12 -#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ - PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ - PSSCR_MTL_MASK +#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16 .text @@ -205,7 +203,7 @@ pnv_enter_arch207_idle_mode: stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f - IDLE_STATE_ENTER_SEQ(PPC_NAP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) /* No return */ 2: /* Sleep or winkle */ @@ -239,7 +237,7 @@ pnv_fastsleep_workaround_at_entry: common_enter: /* common code for all the threads entering sleep or winkle */ bgt cr3,enter_winkle - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) fastsleep_workaround_at_entry: ori r15,r15,PNV_CORE_IDLE_LOCK_BIT @@ -250,7 +248,7 @@ fastsleep_workaround_at_entry: /* Fast sleep workaround */ li r3,1 li r4,1 - bl opal_rm_config_cpu_idle_state + bl opal_config_cpu_idle_state /* Clear Lock bit */ li r0,0 @@ -261,10 +259,10 @@ fastsleep_workaround_at_entry: enter_winkle: bl save_sprs_to_stack - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) /* - * r3 - requested stop state + * r3 - PSSCR value corresponding to the requested stop state. */ power_enter_stop: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -274,14 +272,25 @@ power_enter_stop: stb r4,HSTATE_HWTHREAD_STATE(r13) #endif /* + * Check if we are executing the lite variant with ESL=EC=0 + */ + andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED + clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ + bne .Lhandle_esl_ec_set + IDLE_STATE_ENTER_SEQ(PPC_STOP) + li r3,0 /* Since we didn't lose state, return 0 */ + b pnv_wakeup_noloss + +.Lhandle_esl_ec_set: +/* * Check if the requested state is a deep idle state. */ LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 - bge 2f - IDLE_STATE_ENTER_SEQ(PPC_STOP) -2: + bge .Lhandle_deep_stop + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) +.Lhandle_deep_stop: /* * Entering deep idle state. * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to @@ -302,7 +311,7 @@ lwarx_loop_stop: bl save_sprs_to_stack - IDLE_STATE_ENTER_SEQ(PPC_STOP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ @@ -353,16 +362,17 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; - /* - * r3 - requested stop state + * r3 - The PSSCR value corresponding to the stop state. + * r4 - The PSSCR mask corrresonding to the stop state. */ _GLOBAL(power9_idle_stop) - LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) - or r4,r4,r3 - mtspr SPRN_PSSCR, r4 - li r4, 1 + mfspr r5,SPRN_PSSCR + andc r5,r5,r4 + or r3,r3,r5 + mtspr SPRN_PSSCR,r3 LOAD_REG_ADDR(r5,power_enter_stop) + li r4,1 b pnv_powersave_common /* No return */ /* @@ -439,9 +449,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) _GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) /* - * Before entering any idle state, the NVGPRs are saved in the stack - * and they are restored before switching to the process context. Hence - * until they are restored, they are free to be used. + * Before entering any idle state, the NVGPRs are saved in the stack. + * If there was a state loss, or PACA_NAPSTATELOST was set, then the + * NVGPRs are restored. If we are here, it is likely that state is lost, + * but not guaranteed -- neither ISA207 nor ISA300 tests to reach + * here are the same as the test to restore NVGPRS: + * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300, + * and SRR1 test for restoring NVGPRs. + * + * We are about to clobber NVGPRs now, so set NAPSTATELOST to + * guarantee they will always be restored. This might be tightened + * with careful reading of specs (particularly for ISA300) but this + * is already a slow wakeup path and it's simpler to be safe. + */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) + + /* * * Save SRR1 and LR in NVGPRs as they might be clobbered in * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required @@ -544,7 +568,7 @@ timebase_resync: */ ble cr3,clear_lock /* Time base re-sync */ - bl opal_rm_resync_timebase; + bl opal_resync_timebase; /* * If waking up from sleep, per core state is not lost, skip to * clear_lock. @@ -633,7 +657,7 @@ hypervisor_state_restored: fastsleep_workaround_at_exit: li r3,1 li r4,0 - bl opal_rm_config_cpu_idle_state + bl opal_config_cpu_idle_state b timebase_resync /* diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c new file mode 100644 index 000000000000..5ea42c937ca9 --- /dev/null +++ b/arch/powerpc/kernel/ima_kexec.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2016 IBM Corporation + * + * Authors: + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/of.h> +#include <linux/memblock.h> +#include <linux/libfdt.h> + +static int get_addr_size_cells(int *addr_cells, int *size_cells) +{ + struct device_node *root; + + root = of_find_node_by_path("/"); + if (!root) + return -EINVAL; + + *addr_cells = of_n_addr_cells(root); + *size_cells = of_n_size_cells(root); + + of_node_put(root); + + return 0; +} + +static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr, + size_t *size) +{ + int ret, addr_cells, size_cells; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + if (len < 4 * (addr_cells + size_cells)) + return -ENOENT; + + *addr = of_read_number(prop, addr_cells); + *size = of_read_number(prop + 4 * addr_cells, size_cells); + + return 0; +} + +/** + * ima_get_kexec_buffer - get IMA buffer from the previous kernel + * @addr: On successful return, set to point to the buffer contents. + * @size: On successful return, set to the buffer size. + * + * Return: 0 on success, negative errno on error. + */ +int ima_get_kexec_buffer(void **addr, size_t *size) +{ + int ret, len; + unsigned long tmp_addr; + size_t tmp_size; + const void *prop; + + prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size); + if (ret) + return ret; + + *addr = __va(tmp_addr); + *size = tmp_size; + + return 0; +} + +/** + * ima_free_kexec_buffer - free memory used by the IMA buffer + */ +int ima_free_kexec_buffer(void) +{ + int ret; + unsigned long addr; + size_t size; + struct property *prop; + + prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size); + if (ret) + return ret; + + ret = of_remove_property(of_chosen, prop); + if (ret) + return ret; + + return memblock_free(addr, size); + +} + +/** + * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt + * + * The IMA measurement buffer is of no use to a subsequent kernel, so we always + * remove it from the device tree. + */ +void remove_ima_buffer(void *fdt, int chosen_node) +{ + int ret, len; + unsigned long addr; + size_t size; + const void *prop; + + prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len); + if (!prop) + return; + + ret = do_get_kexec_buffer(prop, len, &addr, &size); + fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer"); + if (ret) + return; + + ret = delete_fdt_mem_rsv(fdt, addr, size); + if (!ret) + pr_debug("Removed old IMA buffer reservation.\n"); +} + +#ifdef CONFIG_IMA_KEXEC +/** + * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer + * + * Architectures should use this function to pass on the IMA buffer + * information to the next kernel. + * + * Return: 0 on success, negative errno on error. + */ +int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, + size_t size) +{ + image->arch.ima_buffer_addr = load_addr; + image->arch.ima_buffer_size = size; + + return 0; +} + +static int write_number(void *p, u64 value, int cells) +{ + if (cells == 1) { + u32 tmp; + + if (value > U32_MAX) + return -EINVAL; + + tmp = cpu_to_be32(value); + memcpy(p, &tmp, sizeof(tmp)); + } else if (cells == 2) { + u64 tmp; + + tmp = cpu_to_be64(value); + memcpy(p, &tmp, sizeof(tmp)); + } else + return -EINVAL; + + return 0; +} + +/** + * setup_ima_buffer - add IMA buffer information to the fdt + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @chosen_node: Offset to the chosen node. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node) +{ + int ret, addr_cells, size_cells, entry_size; + u8 value[16]; + + remove_ima_buffer(fdt, chosen_node); + if (!image->arch.ima_buffer_size) + return 0; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + entry_size = 4 * (addr_cells + size_cells); + + if (entry_size > sizeof(value)) + return -EINVAL; + + ret = write_number(value, image->arch.ima_buffer_addr, addr_cells); + if (ret) + return ret; + + ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size, + size_cells); + if (ret) + return ret; + + ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value, + entry_size); + if (ret < 0) + return -EINVAL; + + ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr, + image->arch.ima_buffer_size); + if (ret) + return -EINVAL; + + pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n", + image->arch.ima_buffer_addr, image->arch.ima_buffer_size); + + return 0; +} +#endif /* CONFIG_IMA_KEXEC */ diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 5f8613ceb97f..a582e0d42525 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -12,7 +12,7 @@ #undef DEBUG #include <linux/kernel.h> -#include <linux/sched.h> /* for init_mm */ +#include <linux/sched/mm.h> /* for init_mm */ #include <asm/io.h> #include <asm/machdep.h> diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 3963f0b68d52..a1854d1ded8b 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <asm/io.h> #include <asm/pci-bridge.h> +#include <asm/isa-bridge.h> /* * Here comes the ppc64 implementation of the IOMAP diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 3c05c311e35e..a018f5cae899 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -55,7 +55,7 @@ #include <linux/of.h> #include <linux/of_irq.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/pgtable.h> #include <asm/irq.h> diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index ae1316106e2b..bb6f8993412e 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -29,6 +29,7 @@ #include <asm/pci-bridge.h> #include <asm/machdep.h> #include <asm/ppc-pci.h> +#include <asm/isa-bridge.h> unsigned long isa_io_base; /* NULL if no ISA bus */ EXPORT_SYMBOL(isa_io_base); @@ -167,6 +168,97 @@ void __init isa_bridge_find_early(struct pci_controller *hose) } /** + * isa_bridge_find_early - Find and map the ISA IO space early before + * main PCI discovery. This is optionally called by + * the arch code when adding PCI PHBs to get early + * access to ISA IO ports + */ +void __init isa_bridge_init_non_pci(struct device_node *np) +{ + const __be32 *ranges, *pbasep = NULL; + int rlen, i, rs; + u32 na, ns, pna; + u64 cbase, pbase, size = 0; + + /* If we already have an ISA bridge, bail off */ + if (isa_bridge_devnode != NULL) + return; + + pna = of_n_addr_cells(np); + if (of_property_read_u32(np, "#address-cells", &na) || + of_property_read_u32(np, "#size-cells", &ns)) { + pr_warn("ISA: Non-PCI bridge %s is missing address format\n", + np->full_name); + return; + } + + /* Check it's a supported address format */ + if (na != 2 || ns != 1) { + pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n", + np->full_name); + return; + } + rs = na + ns + pna; + + /* Grab the ranges property */ + ranges = of_get_property(np, "ranges", &rlen); + if (ranges == NULL || rlen < rs) { + pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n", + np->full_name); + return; + } + + /* Parse it. We are only looking for IO space */ + for (i = 0; (i + rs - 1) < rlen; i += rs) { + if (be32_to_cpup(ranges + i) != 1) + continue; + cbase = be32_to_cpup(ranges + i + 1); + size = of_read_number(ranges + i + na + pna, ns); + pbasep = ranges + i + na; + break; + } + + /* Got something ? */ + if (!size || !pbasep) { + pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n", + np->full_name); + return; + } + + /* Align size and make sure it's cropped to 64K */ + size = PAGE_ALIGN(size); + if (size > 0x10000) + size = 0x10000; + + /* Map pbase */ + pbase = of_translate_address(np, pbasep); + if (pbase == OF_BAD_ADDR) { + pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n", + np->full_name); + return; + } + + /* We need page alignment */ + if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) { + pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n", + np->full_name); + return; + } + + /* Got it */ + isa_bridge_devnode = np; + + /* Set the global ISA io base to indicate we have an ISA bridge + * and map it + */ + isa_io_base = ISA_IO_BASE; + __ioremap_at(pbase, (void *)ISA_IO_BASE, + size, pgprot_val(pgprot_noncached(__pgprot(0)))); + + pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name); +} + +/** * isa_bridge_find_late - Find and map the ISA IO space upon discovery of * a new ISA bridge */ diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c new file mode 100644 index 000000000000..9a42309b091a --- /dev/null +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -0,0 +1,663 @@ +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "kexec_elf: " fmt + +#include <linux/elf.h> +#include <linux/kexec.h> +#include <linux/libfdt.h> +#include <linux/module.h> +#include <linux/of_fdt.h> +#include <linux/slab.h> +#include <linux/types.h> + +#define PURGATORY_STACK_SIZE (16 * 1024) + +#define elf_addr_to_cpu elf64_to_cpu + +#ifndef Elf_Rel +#define Elf_Rel Elf64_Rel +#endif /* Elf_Rel */ + +struct elf_info { + /* + * Where the ELF binary contents are kept. + * Memory managed by the user of the struct. + */ + const char *buffer; + + const struct elfhdr *ehdr; + const struct elf_phdr *proghdrs; + struct elf_shdr *sechdrs; +}; + +static inline bool elf_is_elf_file(const struct elfhdr *ehdr) +{ + return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0; +} + +static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le64_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be64_to_cpu(value); + + return value; +} + +static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le16_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be16_to_cpu(value); + + return value; +} + +static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le32_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be32_to_cpu(value); + + return value; +} + +/** + * elf_is_ehdr_sane - check that it is safe to use the ELF header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len) +{ + if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) { + pr_debug("Bad program header size.\n"); + return false; + } else if (ehdr->e_shnum > 0 && + ehdr->e_shentsize != sizeof(struct elf_shdr)) { + pr_debug("Bad section header size.\n"); + return false; + } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT || + ehdr->e_version != EV_CURRENT) { + pr_debug("Unknown ELF version.\n"); + return false; + } + + if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { + size_t phdr_size; + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; + + /* Sanity check the program header table location. */ + if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) { + pr_debug("Program headers at invalid location.\n"); + return false; + } else if (ehdr->e_phoff + phdr_size > buf_len) { + pr_debug("Program headers truncated.\n"); + return false; + } + } + + if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { + size_t shdr_size; + + /* + * e_shnum is at most 65536 so calculating + * the size of the section header cannot overflow. + */ + shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum; + + /* Sanity check the section header table location. */ + if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) { + pr_debug("Section headers at invalid location.\n"); + return false; + } else if (ehdr->e_shoff + shdr_size > buf_len) { + pr_debug("Section headers truncated.\n"); + return false; + } + } + + return true; +} + +static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr) +{ + struct elfhdr *buf_ehdr; + + if (len < sizeof(*buf_ehdr)) { + pr_debug("Buffer is too small to hold ELF header.\n"); + return -ENOEXEC; + } + + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident)); + if (!elf_is_elf_file(ehdr)) { + pr_debug("No ELF header magic.\n"); + return -ENOEXEC; + } + + if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) { + pr_debug("Not a supported ELF class.\n"); + return -ENOEXEC; + } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + pr_debug("Not a supported ELF data format.\n"); + return -ENOEXEC; + } + + buf_ehdr = (struct elfhdr *) buf; + if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) { + pr_debug("Bad ELF header size.\n"); + return -ENOEXEC; + } + + ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type); + ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine); + ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version); + ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry); + ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff); + ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff); + ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags); + ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize); + ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum); + ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize); + ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum); + ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx); + + return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_is_phdr_sane - check that it is safe to use the program header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len) +{ + + if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) { + pr_debug("ELF segment location wraps around.\n"); + return false; + } else if (phdr->p_offset + phdr->p_filesz > buf_len) { + pr_debug("ELF segment not in file.\n"); + return false; + } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) { + pr_debug("ELF segment address wraps around.\n"); + return false; + } + + return true; +} + +static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info, + int idx) +{ + /* Override the const in proghdrs, we are the ones doing the loading. */ + struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx]; + const char *pbuf; + struct elf_phdr *buf_phdr; + + pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr)); + buf_phdr = (struct elf_phdr *) pbuf; + + phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type); + phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset); + phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr); + phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr); + phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags); + + /* + * The following fields have a type equivalent to Elf_Addr + * both in 32 bit and 64 bit ELF. + */ + phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz); + phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz); + phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align); + + return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_read_phdrs - read the program headers from the buffer + * + * This function assumes that the program header table was checked for sanity. + * Use elf_is_ehdr_sane() if it wasn't. + */ +static int elf_read_phdrs(const char *buf, size_t len, + struct elf_info *elf_info) +{ + size_t phdr_size, i; + const struct elfhdr *ehdr = elf_info->ehdr; + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; + + elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL); + if (!elf_info->proghdrs) + return -ENOMEM; + + for (i = 0; i < ehdr->e_phnum; i++) { + int ret; + + ret = elf_read_phdr(buf, len, elf_info, i); + if (ret) { + kfree(elf_info->proghdrs); + elf_info->proghdrs = NULL; + return ret; + } + } + + return 0; +} + +/** + * elf_is_shdr_sane - check that it is safe to use the section header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len) +{ + bool size_ok; + + /* SHT_NULL headers have undefined values, so we can't check them. */ + if (shdr->sh_type == SHT_NULL) + return true; + + /* Now verify sh_entsize */ + switch (shdr->sh_type) { + case SHT_SYMTAB: + size_ok = shdr->sh_entsize == sizeof(Elf_Sym); + break; + case SHT_RELA: + size_ok = shdr->sh_entsize == sizeof(Elf_Rela); + break; + case SHT_DYNAMIC: + size_ok = shdr->sh_entsize == sizeof(Elf_Dyn); + break; + case SHT_REL: + size_ok = shdr->sh_entsize == sizeof(Elf_Rel); + break; + case SHT_NOTE: + case SHT_PROGBITS: + case SHT_HASH: + case SHT_NOBITS: + default: + /* + * This is a section whose entsize requirements + * I don't care about. If I don't know about + * the section I can't care about it's entsize + * requirements. + */ + size_ok = true; + break; + } + + if (!size_ok) { + pr_debug("ELF section with wrong entry size.\n"); + return false; + } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) { + pr_debug("ELF section address wraps around.\n"); + return false; + } + + if (shdr->sh_type != SHT_NOBITS) { + if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) { + pr_debug("ELF section location wraps around.\n"); + return false; + } else if (shdr->sh_offset + shdr->sh_size > buf_len) { + pr_debug("ELF section not in file.\n"); + return false; + } + } + + return true; +} + +static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info, + int idx) +{ + struct elf_shdr *shdr = &elf_info->sechdrs[idx]; + const struct elfhdr *ehdr = elf_info->ehdr; + const char *sbuf; + struct elf_shdr *buf_shdr; + + sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr); + buf_shdr = (struct elf_shdr *) sbuf; + + shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name); + shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type); + shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr); + shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset); + shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link); + shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info); + + /* + * The following fields have a type equivalent to Elf_Addr + * both in 32 bit and 64 bit ELF. + */ + shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags); + shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size); + shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign); + shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize); + + return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_read_shdrs - read the section headers from the buffer + * + * This function assumes that the section header table was checked for sanity. + * Use elf_is_ehdr_sane() if it wasn't. + */ +static int elf_read_shdrs(const char *buf, size_t len, + struct elf_info *elf_info) +{ + size_t shdr_size, i; + + /* + * e_shnum is at most 65536 so calculating + * the size of the section header cannot overflow. + */ + shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum; + + elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL); + if (!elf_info->sechdrs) + return -ENOMEM; + + for (i = 0; i < elf_info->ehdr->e_shnum; i++) { + int ret; + + ret = elf_read_shdr(buf, len, elf_info, i); + if (ret) { + kfree(elf_info->sechdrs); + elf_info->sechdrs = NULL; + return ret; + } + } + + return 0; +} + +/** + * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info + * @buf: Buffer to read ELF file from. + * @len: Size of @buf. + * @ehdr: Pointer to existing struct which will be populated. + * @elf_info: Pointer to existing struct which will be populated. + * + * This function allows reading ELF files with different byte order than + * the kernel, byte-swapping the fields as needed. + * + * Return: + * On success returns 0, and the caller should call elf_free_info(elf_info) to + * free the memory allocated for the section and program headers. + */ +int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr, + struct elf_info *elf_info) +{ + int ret; + + ret = elf_read_ehdr(buf, len, ehdr); + if (ret) + return ret; + + elf_info->buffer = buf; + elf_info->ehdr = ehdr; + if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { + ret = elf_read_phdrs(buf, len, elf_info); + if (ret) + return ret; + } + if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { + ret = elf_read_shdrs(buf, len, elf_info); + if (ret) { + kfree(elf_info->proghdrs); + return ret; + } + } + + return 0; +} + +/** + * elf_free_info - free memory allocated by elf_read_from_buffer + */ +void elf_free_info(struct elf_info *elf_info) +{ + kfree(elf_info->proghdrs); + kfree(elf_info->sechdrs); + memset(elf_info, 0, sizeof(*elf_info)); +} +/** + * build_elf_exec_info - read ELF executable and check that we can use it + */ +static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr, + struct elf_info *elf_info) +{ + int i; + int ret; + + ret = elf_read_from_buffer(buf, len, ehdr, elf_info); + if (ret) + return ret; + + /* Big endian vmlinux has type ET_DYN. */ + if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) { + pr_err("Not an ELF executable.\n"); + goto error; + } else if (!elf_info->proghdrs) { + pr_err("No ELF program header.\n"); + goto error; + } + + for (i = 0; i < ehdr->e_phnum; i++) { + /* + * Kexec does not support loading interpreters. + * In addition this check keeps us from attempting + * to kexec ordinay executables. + */ + if (elf_info->proghdrs[i].p_type == PT_INTERP) { + pr_err("Requires an ELF interpreter.\n"); + goto error; + } + } + + return 0; +error: + elf_free_info(elf_info); + return -ENOEXEC; +} + +static int elf64_probe(const char *buf, unsigned long len) +{ + struct elfhdr ehdr; + struct elf_info elf_info; + int ret; + + ret = build_elf_exec_info(buf, len, &ehdr, &elf_info); + if (ret) + return ret; + + elf_free_info(&elf_info); + + return elf_check_arch(&ehdr) ? 0 : -ENOEXEC; +} + +/** + * elf_exec_load - load ELF executable image + * @lowest_load_addr: On return, will be the address where the first PT_LOAD + * section will be loaded in memory. + * + * Return: + * 0 on success, negative value on failure. + */ +static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr, + struct elf_info *elf_info, + unsigned long *lowest_load_addr) +{ + unsigned long base = 0, lowest_addr = UINT_MAX; + int ret; + size_t i; + struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size, + .top_down = false }; + + /* Read in the PT_LOAD segments. */ + for (i = 0; i < ehdr->e_phnum; i++) { + unsigned long load_addr; + size_t size; + const struct elf_phdr *phdr; + + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.memsz = phdr->p_memsz; + kbuf.buf_align = phdr->p_align; + kbuf.buf_min = phdr->p_paddr + base; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + load_addr = kbuf.mem; + + if (load_addr < lowest_addr) + lowest_addr = load_addr; + } + + /* Update entry point to reflect new load address. */ + ehdr->e_entry += base; + + *lowest_load_addr = lowest_addr; + ret = 0; + out: + return ret; +} + +static void *elf64_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned int fdt_size; + unsigned long kernel_load_addr, purgatory_load_addr; + unsigned long initrd_load_addr = 0, fdt_load_addr; + void *fdt; + const void *slave_code; + struct elfhdr ehdr; + struct elf_info elf_info; + struct kexec_buf kbuf = { .image = image, .buf_min = 0, + .buf_max = ppc64_rma_size }; + + ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + goto out; + + ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr); + if (ret) + goto out; + + pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr); + + ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true, + &purgatory_load_addr); + if (ret) { + pr_err("Loading purgatory failed.\n"); + goto out; + } + + pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); + + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_load_addr = kbuf.mem; + + pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); + } + + fdt_size = fdt_totalsize(initial_boot_params) * 2; + fdt = kmalloc(fdt_size, GFP_KERNEL); + if (!fdt) { + pr_err("Not enough memory for the device tree.\n"); + ret = -ENOMEM; + goto out; + } + ret = fdt_open_into(initial_boot_params, fdt, fdt_size); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); + if (ret) + goto out; + + fdt_pack(fdt); + + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_size; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + fdt_load_addr = kbuf.mem; + + pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); + + slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; + ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); + if (ret) + pr_err("Error setting up the purgatory.\n"); + +out: + elf_free_info(&elf_info); + + /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ + return ret ? ERR_PTR(ret) : fdt; +} + +struct kexec_file_ops kexec_elf64_ops = { + .probe = elf64_probe, + .load = elf64_load, +}; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e785cc9e1ecd..fce05a38851c 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -35,7 +35,7 @@ #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -140,13 +140,16 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, regs->link = (unsigned long)kretprobe_trampoline; } -static int __kprobes kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; unsigned int *addr = (unsigned int *)regs->nip; struct kprobe_ctlblk *kcb; + if (user_mode(regs)) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing @@ -282,6 +285,7 @@ asm(".global kretprobe_trampoline\n" ".type kretprobe_trampoline, @function\n" "kretprobe_trampoline:\n" "nop\n" + "blr\n" ".size kretprobe_trampoline, .-kretprobe_trampoline\n"); /* @@ -334,6 +338,13 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_assert(ri, orig_ret_address, trampoline_address); regs->nip = orig_ret_address; + /* + * Make LR point to the orig_ret_address. + * When the 'nop' inside the kretprobe_trampoline + * is optimized, we can do a 'blr' after executing the + * detour buffer code. + */ + regs->link = orig_ret_address; reset_current_kprobe(); kretprobe_hash_unlock(current, &flags); @@ -359,12 +370,12 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_post_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (!cur) + if (!cur || user_mode(regs)) return 0; /* make sure we got here for instruction we have a kprobe on */ @@ -449,7 +460,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } @@ -464,33 +475,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) return 0; } -/* - * Wrapper routine to for handling exceptions. - */ -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - if (args->regs && user_mode(args->regs)) - return ret; - - switch (val) { - case DIE_BPT: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_SSTEP: - if (post_kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - unsigned long arch_deref_entry_point(void *entry) { return ppc_global_function_entry(entry); diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index bc525ea0dc09..0694d20f85b6 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -233,7 +233,8 @@ static int __init add_legacy_isa_port(struct device_node *np, * * Note: Don't even try on P8 lpc, we know it's not directly mapped */ - if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc")) { + if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") || + of_get_property(isa_brg, "ranges", NULL)) { taddr = of_translate_address(np, reg); if (taddr == OF_BAD_ADDR) taddr = 0; diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index a205fa3d9bf3..5c12e21d0d1a 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -310,7 +310,7 @@ void default_machine_kexec(struct kimage *image) if (!kdump_in_progress()) kexec_prepare_cpus(); - pr_debug("kexec: Starting switchover sequence.\n"); + printk("kexec: Starting switchover sequence.\n"); /* switch to a staticly allocated stack. Based on irq stack code. * We setup preempt_count to avoid using VMX in memcpy. diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c new file mode 100644 index 000000000000..992c0d258e5d --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -0,0 +1,347 @@ +/* + * ppc64 code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/memblock.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> +#include <asm/ima.h> + +#define SLAVE_CODE_SIZE 256 + +static struct kexec_file_ops *kexec_file_loaders[] = { + &kexec_elf64_ops, +}; + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int i, ret = -ENOEXEC; + struct kexec_file_ops *fops; + + /* We don't support crash kernels yet. */ + if (image->type == KEXEC_TYPE_CRASH) + return -ENOTSUPP; + + for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { + fops = kexec_file_loaders[i]; + if (!fops || !fops->probe) + continue; + + ret = fops->probe(buf, buf_len); + if (!ret) { + image->fops = fops; + return ret; + } + } + + return ret; +} + +void *arch_kexec_kernel_image_load(struct kimage *image) +{ + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + + return image->fops->load(image, image->kernel_buf, + image->kernel_buf_len, image->initrd_buf, + image->initrd_buf_len, image->cmdline_buf, + image->cmdline_buf_len); +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + if (!image->fops || !image->fops->cleanup) + return 0; + + return image->fops->cleanup(image->image_loader_data); +} + +/** + * arch_kexec_walk_mem - call func(data) for each unreserved memory block + * @kbuf: Context info for the search. Also passed to @func. + * @func: Function to call for each memory block. + * + * This function is used by kexec_add_buffer and kexec_locate_mem_hole + * to find unreserved memory to load kexec segments into. + * + * Return: The memory walk will stop when func returns a non-zero value + * and that value will be returned. If all free regions are visited without + * func returning non-zero, then zero will be returned. + */ +int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) +{ + int ret = 0; + u64 i; + phys_addr_t mstart, mend; + + if (kbuf->top_down) { + for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, + &mstart, &mend, NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + ret = func(mstart, mend - 1, kbuf); + if (ret) + break; + } + } else { + for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend, + NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + ret = func(mstart, mend - 1, kbuf); + if (ret) + break; + } + } + + return ret; +} + +/** + * setup_purgatory - initialize the purgatory's global variables + * @image: kexec image. + * @slave_code: Slave code for the purgatory. + * @fdt: Flattened device tree for the next kernel. + * @kernel_load_addr: Address where the kernel is loaded. + * @fdt_load_addr: Address where the flattened device tree is loaded. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_purgatory(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr) +{ + unsigned int *slave_code_buf, master_entry; + int ret; + + slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL); + if (!slave_code_buf) + return -ENOMEM; + + /* Get the slave code from the new kernel and put it in purgatory. */ + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + true); + if (ret) { + kfree(slave_code_buf); + return ret; + } + + master_entry = slave_code_buf[0]; + memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE); + slave_code_buf[0] = master_entry; + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + false); + kfree(slave_code_buf); + + ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr, + sizeof(kernel_load_addr), false); + if (ret) + return ret; + ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr, + sizeof(fdt_load_addr), false); + if (ret) + return ret; + + return 0; +} + +/** + * delete_fdt_mem_rsv - delete memory reservation with given address and size + * + * Return: 0 on success, or negative errno on error. + */ +int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) +{ + int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); + + for (i = 0; i < num_rsvs; i++) { + uint64_t rsv_start, rsv_size; + + ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size); + if (ret) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + + if (rsv_start == start && rsv_size == size) { + ret = fdt_del_mem_rsv(fdt, i); + if (ret) { + pr_err("Error deleting device tree reservation.\n"); + return -EINVAL; + } + + return 0; + } + } + + return -ENOENT; +} + +/* + * setup_new_fdt - modify /chosen and memory reservation for the next kernel + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @initrd_load_addr: Address where the next initrd will be loaded. + * @initrd_len: Size of the next initrd, or 0 if there will be none. + * @cmdline: Command line for the next kernel, or NULL if there will + * be none. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_new_fdt(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, unsigned long initrd_len, + const char *cmdline) +{ + int ret, chosen_node; + const void *prop; + + /* Remove memory reservation for the current device tree. */ + ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params), + fdt_totalsize(initial_boot_params)); + if (ret == 0) + pr_debug("Removed old device tree reservation.\n"); + else if (ret != -ENOENT) + return ret; + + chosen_node = fdt_path_offset(fdt, "/chosen"); + if (chosen_node == -FDT_ERR_NOTFOUND) { + chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), + "chosen"); + if (chosen_node < 0) { + pr_err("Error creating /chosen.\n"); + return -EINVAL; + } + } else if (chosen_node < 0) { + pr_err("Malformed device tree: error reading /chosen.\n"); + return -EINVAL; + } + + /* Did we boot using an initrd? */ + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); + if (prop) { + uint64_t tmp_start, tmp_end, tmp_size; + + tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); + + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); + if (!prop) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); + + /* + * kexec reserves exact initrd size, while firmware may + * reserve a multiple of PAGE_SIZE, so check for both. + */ + tmp_size = tmp_end - tmp_start; + ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size); + if (ret == -ENOENT) + ret = delete_fdt_mem_rsv(fdt, tmp_start, + round_up(tmp_size, PAGE_SIZE)); + if (ret == 0) + pr_debug("Removed old initrd reservation.\n"); + else if (ret != -ENOENT) + return ret; + + /* If there's no new initrd, delete the old initrd's info. */ + if (initrd_len == 0) { + ret = fdt_delprop(fdt, chosen_node, + "linux,initrd-start"); + if (ret) { + pr_err("Error deleting linux,initrd-start.\n"); + return -EINVAL; + } + + ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end"); + if (ret) { + pr_err("Error deleting linux,initrd-end.\n"); + return -EINVAL; + } + } + } + + if (initrd_len) { + ret = fdt_setprop_u64(fdt, chosen_node, + "linux,initrd-start", + initrd_load_addr); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + /* initrd-end is the first address after the initrd image. */ + ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", + initrd_load_addr + initrd_len); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); + if (ret) { + pr_err("Error reserving initrd memory: %s\n", + fdt_strerror(ret)); + return -EINVAL; + } + } + + if (cmdline != NULL) { + ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + } else { + ret = fdt_delprop(fdt, chosen_node, "bootargs"); + if (ret && ret != -FDT_ERR_NOTFOUND) { + pr_err("Error deleting bootargs.\n"); + return -EINVAL; + } + } + + ret = setup_ima_buffer(image, fdt, chosen_node); + if (ret) { + pr_err("Error setting up the new device tree.\n"); + return ret; + } + + ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); + if (ret) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + return 0; +} diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 5e7ece0fda9f..a1475e6aef3a 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce, case MCE_ERROR_TYPE_TLB: mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; break; + case MCE_ERROR_TYPE_USER: + mce->u.user_error.user_error_type = mce_err->u.user_error_type; + break; + case MCE_ERROR_TYPE_RA: + mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; + break; + case MCE_ERROR_TYPE_LINK: + mce->u.link_error.link_error_type = mce_err->u.link_error_type; + break; case MCE_ERROR_TYPE_UNKNOWN: default: break; @@ -72,7 +81,6 @@ void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr) { - uint64_t srr1; int index = __this_cpu_inc_return(mce_nest_count) - 1; struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); @@ -91,15 +99,14 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->gpr3 = regs->gpr[3]; mce->in_use = 1; - mce->initiator = MCE_INITIATOR_CPU; /* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI)) mce->disposition = MCE_DISPOSITION_RECOVERED; else mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; - mce->severity = MCE_SEV_ERROR_SYNC; - srr1 = regs->msr; + mce->initiator = mce_err->initiator; + mce->severity = mce_err->severity; /* * Populate the mce error_type and type-specific error_type. @@ -118,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled, } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { mce->u.erat_error.effective_address_provided = true; mce->u.erat_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_USER) { + mce->u.user_error.effective_address_provided = true; + mce->u.user_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_RA) { + mce->u.ra_error.effective_address_provided = true; + mce->u.ra_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { + mce->u.link_error.effective_address_provided = true; + mce->u.link_error.effective_address = addr; } else if (mce->error_type == MCE_ERROR_TYPE_UE) { mce->u.ue_error.effective_address_provided = true; mce->u.ue_error.effective_address = addr; @@ -242,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt) "Parity", "Multihit", }; + static const char *mc_user_types[] = { + "Indeterminate", + "tlbie(l) invalid", + }; + static const char *mc_ra_types[] = { + "Indeterminate", + "Instruction fetch (bad)", + "Page table walk ifetch (bad)", + "Page table walk ifetch (foreign)", + "Load (bad)", + "Store (bad)", + "Page table walk Load/Store (bad)", + "Page table walk Load/Store (foreign)", + "Load/Store (foreign)", + }; + static const char *mc_link_types[] = { + "Indeterminate", + "Instruction fetch (timeout)", + "Page table walk ifetch (timeout)", + "Load (timeout)", + "Store (timeout)", + "Page table walk Load/Store (timeout)", + }; /* Print things out */ if (evt->version != MCE_V1) { @@ -318,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt) printk("%s Effective address: %016llx\n", level, evt->u.tlb_error.effective_address); break; + case MCE_ERROR_TYPE_USER: + subtype = evt->u.user_error.user_error_type < + ARRAY_SIZE(mc_user_types) ? + mc_user_types[evt->u.user_error.user_error_type] + : "Unknown"; + printk("%s Error type: User [%s]\n", level, subtype); + if (evt->u.user_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.user_error.effective_address); + break; + case MCE_ERROR_TYPE_RA: + subtype = evt->u.ra_error.ra_error_type < + ARRAY_SIZE(mc_ra_types) ? + mc_ra_types[evt->u.ra_error.ra_error_type] + : "Unknown"; + printk("%s Error type: Real address [%s]\n", level, subtype); + if (evt->u.ra_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.ra_error.effective_address); + break; + case MCE_ERROR_TYPE_LINK: + subtype = evt->u.link_error.link_error_type < + ARRAY_SIZE(mc_link_types) ? + mc_link_types[evt->u.link_error.link_error_type] + : "Unknown"; + printk("%s Error type: Link [%s]\n", level, subtype); + if (evt->u.link_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.link_error.effective_address); + break; default: case MCE_ERROR_TYPE_UNKNOWN: printk("%s Error type: Unknown\n", level); @@ -344,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt) if (evt->u.tlb_error.effective_address_provided) return evt->u.tlb_error.effective_address; break; + case MCE_ERROR_TYPE_USER: + if (evt->u.user_error.effective_address_provided) + return evt->u.user_error.effective_address; + break; + case MCE_ERROR_TYPE_RA: + if (evt->u.ra_error.effective_address_provided) + return evt->u.ra_error.effective_address; + break; + case MCE_ERROR_TYPE_LINK: + if (evt->u.link_error.effective_address_provided) + return evt->u.link_error.effective_address; + break; default: case MCE_ERROR_TYPE_UNKNOWN: break; diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 7353991c4ece..763d6f58caa8 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -116,6 +116,51 @@ static void flush_and_reload_slb(void) } #endif +static void flush_erat(void) +{ + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); +} + +#define MCE_FLUSH_SLB 1 +#define MCE_FLUSH_TLB 2 +#define MCE_FLUSH_ERAT 3 + +static int mce_flush(int what) +{ +#ifdef CONFIG_PPC_STD_MMU_64 + if (what == MCE_FLUSH_SLB) { + flush_and_reload_slb(); + return 1; + } +#endif + if (what == MCE_FLUSH_ERAT) { + flush_erat(); + return 1; + } + if (what == MCE_FLUSH_TLB) { + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { + cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); + return 1; + } + } + + return 0; +} + +static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat) +{ + if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB)) + dsisr &= ~slb; + if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT)) + dsisr &= ~erat; + if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB)) + dsisr &= ~tlb; + /* Any other errors we don't understand? */ + if (dsisr) + return 0; + return 1; +} + static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) { long handled = 1; @@ -281,6 +326,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) long handled = 1; struct mce_error_info mce_error_info = { 0 }; + mce_error_info.severity = MCE_SEV_ERROR_SYNC; + mce_error_info.initiator = MCE_INITIATOR_CPU; + srr1 = regs->msr; nip = regs->nip; @@ -352,6 +400,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) long handled = 1; struct mce_error_info mce_error_info = { 0 }; + mce_error_info.severity = MCE_SEV_ERROR_SYNC; + mce_error_info.initiator = MCE_INITIATOR_CPU; + srr1 = regs->msr; nip = regs->nip; @@ -372,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) save_mce_event(regs, handled, &mce_error_info, nip, addr); return handled; } + +static int mce_handle_derror_p9(struct pt_regs *regs) +{ + uint64_t dsisr = regs->dsisr; + + return mce_handle_flush_derrors(dsisr, + P9_DSISR_MC_SLB_PARITY_MFSLB | + P9_DSISR_MC_SLB_MULTIHIT_MFSLB, + + P9_DSISR_MC_TLB_MULTIHIT_MFTLB, + + P9_DSISR_MC_ERAT_MULTIHIT); +} + +static int mce_handle_ierror_p9(struct pt_regs *regs) +{ + uint64_t srr1 = regs->msr; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_SLB_PARITY: + case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: + return mce_flush(MCE_FLUSH_SLB); + case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: + return mce_flush(MCE_FLUSH_TLB); + case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: + return mce_flush(MCE_FLUSH_ERAT); + default: + return 0; + } +} + +static void mce_get_derror_p9(struct pt_regs *regs, + struct mce_error_info *mce_err, uint64_t *addr) +{ + uint64_t dsisr = regs->dsisr; + + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; + + if (dsisr & P9_DSISR_MC_USER_TLBIE) + *addr = regs->nip; + else + *addr = regs->dar; + + if (dsisr & P9_DSISR_MC_UE) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) { + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT; + } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) { + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT; + } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) { + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_USER_TLBIE) { + mce_err->error_type = MCE_ERROR_TYPE_USER; + mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE; + } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_RA_LOAD) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD; + } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN; + } +} + +static void mce_get_ierror_p9(struct pt_regs *regs, + struct mce_error_info *mce_err, uint64_t *addr) +{ + uint64_t srr1 = regs->msr; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: + case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: + mce_err->severity = MCE_SEV_FATAL; + break; + default: + mce_err->severity = MCE_SEV_ERROR_SYNC; + break; + } + + mce_err->initiator = MCE_INITIATOR_CPU; + + *addr = regs->nip; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_UE: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; + break; + case P9_SRR1_MC_IFETCH_SLB_PARITY: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + break; + case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case P9_SRR1_MC_IFETCH_LINK_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_RA: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH; + break; + case P9_SRR1_MC_IFETCH_RA_TABLEWALK: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_STORE; + break; + case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN; + break; + default: + break; + } +} + +long __machine_check_early_realmode_p9(struct pt_regs *regs) +{ + uint64_t nip, addr; + long handled; + struct mce_error_info mce_error_info = { 0 }; + + nip = regs->nip; + + if (P9_SRR1_MC_LOADSTORE(regs->msr)) { + handled = mce_handle_derror_p9(regs); + mce_get_derror_p9(regs, &mce_error_info, &addr); + } else { + handled = mce_handle_ierror_p9(regs); + mce_get_ierror_p9(regs, &mce_error_info, &addr); + } + + /* Handle UE error. */ + if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) + handled = mce_handle_ue_error(regs); + + save_mce_event(regs, handled, &mce_error_info, nip, addr); + return handled; +} diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 93cf7a5846a6..84db14e435f5 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -296,7 +296,7 @@ _GLOBAL(flush_instruction_cache) lis r3, KERNELBASE@h iccci 0,r3 #endif -#elif CONFIG_FSL_BOOKE +#elif defined(CONFIG_FSL_BOOKE) BEGIN_FTR_SECTION mfspr r3,SPRN_L1CSR0 ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC @@ -614,7 +614,7 @@ _GLOBAL(start_secondary_resume) _GLOBAL(__main) blr -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* * Must be relocatable PIC code callable as a C function. */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4f178671f230..c119044cad0d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -67,7 +67,7 @@ PPC64_CACHES: * flush all bytes from start through stop-1 inclusive */ -_GLOBAL(flush_icache_range) +_GLOBAL_TOC(flush_icache_range) BEGIN_FTR_SECTION PURGE_PREFETCHED_INS blr @@ -80,12 +80,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) * each other. */ ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 @@ -96,12 +96,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* Now invalidate the instruction cache */ - lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ + lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 - lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ + lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 @@ -120,7 +120,7 @@ EXPORT_SYMBOL(flush_icache_range) * * flush all bytes from start to stop-1 inclusive */ -_GLOBAL(flush_dcache_range) +_GLOBAL_TOC(flush_dcache_range) /* * Flush the data cache to memory @@ -128,12 +128,12 @@ _GLOBAL(flush_dcache_range) * Different systems have different cache line sizes */ ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 @@ -156,12 +156,12 @@ EXPORT_SYMBOL(flush_dcache_range) */ _GLOBAL(flush_dcache_phys_range) ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mfmsr r5 /* Disable MMU Data Relocation */ @@ -184,12 +184,12 @@ _GLOBAL(flush_dcache_phys_range) _GLOBAL(flush_inval_dcache_range) ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ sync @@ -225,8 +225,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* Flush the dcache */ ld r7,PPC64_CACHES@toc(r2) clrrdi r3,r3,PAGE_SHIFT /* Page align */ - lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ - lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ + lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */ + lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */ mr r6,r3 mtctr r4 0: dcbst 0,r6 @@ -236,8 +236,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* Now invalidate the icache */ - lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ - lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ + lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */ + lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */ mtctr r4 1: icbi 0,r3 add r3,r3,r5 @@ -478,7 +478,7 @@ _GLOBAL(kexec_wait) addi r5,r5,kexec_flag-1b 99: HMT_LOW -#ifdef CONFIG_KEXEC /* use no memory without kexec */ +#ifdef CONFIG_KEXEC_CORE /* use no memory without kexec */ lwz r4,0(r5) cmpwi 0,r4,0 beq 99b @@ -503,7 +503,7 @@ kexec_flag: .long 0 -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_PPC_BOOK3E /* * BOOK3E has no real MMU mode, so we have to setup the initial TLB @@ -716,4 +716,4 @@ _GLOBAL(kexec_sequence) mtlr 4 li r5,0 blr /* image->start(physid, image->start, 0); */ -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 30b89d5cbb03..3f7ba0f5bf29 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -22,7 +22,7 @@ #include <linux/vmalloc.h> #include <linux/bug.h> #include <asm/module.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/firmware.h> #include <linux/sort.h> #include <asm/setup.h> diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 183368e008cf..0b0f89685b67 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -286,14 +286,6 @@ static void dedotify_versions(struct modversion_info *vers, for (end = (void *)vers + size; vers < end; vers++) if (vers->name[0] == '.') { memmove(vers->name, vers->name+1, strlen(vers->name)); -#ifdef ARCH_RELOCATES_KCRCTAB - /* The TOC symbol has no CRC computed. To avoid CRC - * check failing, we must force it to the expected - * value (see CRC check in module.c). - */ - if (!strcmp(vers->name, "TOC.")) - vers->crc = -(unsigned long)reloc_start; -#endif } } @@ -652,6 +644,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *location = value - (unsigned long)location; break; + case R_PPC64_REL32: + /* 32 bits relative (used by relative exception tables) */ + *(u32 *)location = value - (unsigned long)location; + break; + case R_PPC64_TOCSAVE: /* * Marker reloc indicates we don't have to save r2. diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 34d2c595de23..d5e2b8309939 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -28,7 +28,7 @@ #include <linux/pagemap.h> #include <linux/pstore.h> #include <linux/zlib.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/nvram.h> #include <asm/rtas.h> #include <asm/prom.h> diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index b60a67d92ebd..34aeac54f120 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -114,11 +114,6 @@ static struct platform_driver of_pci_phb_driver = { }, }; -static __init int of_pci_phb_init(void) -{ - return platform_driver_register(&of_pci_phb_driver); -} - -device_initcall(of_pci_phb_init); +builtin_platform_driver(of_pci_phb_driver); #endif /* CONFIG_PPC_OF_PLATFORM_PCI */ diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c new file mode 100644 index 000000000000..2282bf4e63cd --- /dev/null +++ b/arch/powerpc/kernel/optprobes.c @@ -0,0 +1,347 @@ +/* + * Code for Kernel probes Jump optimization. + * + * Copyright 2017, Anju T, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kprobes.h> +#include <linux/jump_label.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <asm/kprobes.h> +#include <asm/ptrace.h> +#include <asm/cacheflush.h> +#include <asm/code-patching.h> +#include <asm/sstep.h> +#include <asm/ppc-opcode.h> + +#define TMPL_CALL_HDLR_IDX \ + (optprobe_template_call_handler - optprobe_template_entry) +#define TMPL_EMULATE_IDX \ + (optprobe_template_call_emulate - optprobe_template_entry) +#define TMPL_RET_IDX \ + (optprobe_template_ret - optprobe_template_entry) +#define TMPL_OP_IDX \ + (optprobe_template_op_address - optprobe_template_entry) +#define TMPL_INSN_IDX \ + (optprobe_template_insn - optprobe_template_entry) +#define TMPL_END_IDX \ + (optprobe_template_end - optprobe_template_entry) + +DEFINE_INSN_CACHE_OPS(ppc_optinsn); + +static bool insn_page_in_use; + +static void *__ppc_alloc_insn_page(void) +{ + if (insn_page_in_use) + return NULL; + insn_page_in_use = true; + return &optinsn_slot; +} + +static void __ppc_free_insn_page(void *page __maybe_unused) +{ + insn_page_in_use = false; +} + +struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { + .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), + .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), + /* insn_size initialized later */ + .alloc = __ppc_alloc_insn_page, + .free = __ppc_free_insn_page, + .nr_garbage = 0, +}; + +/* + * Check if we can optimize this probe. Returns NIP post-emulation if this can + * be optimized and 0 otherwise. + */ +static unsigned long can_optimize(struct kprobe *p) +{ + struct pt_regs regs; + struct instruction_op op; + unsigned long nip = 0; + + /* + * kprobe placed for kretprobe during boot time + * has a 'nop' instruction, which can be emulated. + * So further checks can be skipped. + */ + if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) + return (unsigned long)p->addr + sizeof(kprobe_opcode_t); + + /* + * We only support optimizing kernel addresses, but not + * module addresses. + * + * FIXME: Optimize kprobes placed in module addresses. + */ + if (!is_kernel_addr((unsigned long)p->addr)) + return 0; + + memset(®s, 0, sizeof(struct pt_regs)); + regs.nip = (unsigned long)p->addr; + regs.trap = 0x0; + regs.msr = MSR_KERNEL; + + /* + * Kprobe placed in conditional branch instructions are + * not optimized, as we can't predict the nip prior with + * dummy pt_regs and can not ensure that the return branch + * from detour buffer falls in the range of address (i.e 32MB). + * A branch back from trampoline is set up in the detour buffer + * to the nip returned by the analyse_instr() here. + * + * Ensure that the instruction is not a conditional branch, + * and that can be emulated. + */ + if (!is_conditional_branch(*p->ainsn.insn) && + analyse_instr(&op, ®s, *p->ainsn.insn)) + nip = regs.nip; + + return nip; +} + +static void optimized_callback(struct optimized_kprobe *op, + struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long flags; + + /* This is possible if op is under delayed unoptimizing */ + if (kprobe_disabled(&op->kp)) + return; + + local_irq_save(flags); + hard_irq_disable(); + + if (kprobe_running()) { + kprobes_inc_nmissed_count(&op->kp); + } else { + __this_cpu_write(current_kprobe, &op->kp); + regs->nip = (unsigned long)op->kp.addr; + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + opt_pre_handler(&op->kp, regs); + __this_cpu_write(current_kprobe, NULL); + } + + /* + * No need for an explicit __hard_irq_enable() here. + * local_irq_restore() will re-enable interrupts, + * if they were hard disabled. + */ + local_irq_restore(flags); +} +NOKPROBE_SYMBOL(optimized_callback); + +void arch_remove_optimized_kprobe(struct optimized_kprobe *op) +{ + if (op->optinsn.insn) { + free_ppc_optinsn_slot(op->optinsn.insn, 1); + op->optinsn.insn = NULL; + } +} + +/* + * emulate_step() requires insn to be emulated as + * second parameter. Load register 'r4' with the + * instruction. + */ +void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) +{ + /* addis r4,0,(insn)@h */ + *addr++ = PPC_INST_ADDIS | ___PPC_RT(4) | + ((val >> 16) & 0xffff); + + /* ori r4,r4,(insn)@l */ + *addr = PPC_INST_ORI | ___PPC_RA(4) | ___PPC_RS(4) | + (val & 0xffff); +} + +/* + * Generate instructions to load provided immediate 64-bit value + * to register 'r3' and patch these instructions at 'addr'. + */ +void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) +{ + /* lis r3,(op)@highest */ + *addr++ = PPC_INST_ADDIS | ___PPC_RT(3) | + ((val >> 48) & 0xffff); + + /* ori r3,r3,(op)@higher */ + *addr++ = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) | + ((val >> 32) & 0xffff); + + /* rldicr r3,r3,32,31 */ + *addr++ = PPC_INST_RLDICR | ___PPC_RA(3) | ___PPC_RS(3) | + __PPC_SH64(32) | __PPC_ME64(31); + + /* oris r3,r3,(op)@h */ + *addr++ = PPC_INST_ORIS | ___PPC_RA(3) | ___PPC_RS(3) | + ((val >> 16) & 0xffff); + + /* ori r3,r3,(op)@l */ + *addr = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) | + (val & 0xffff); +} + +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) +{ + kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step; + kprobe_opcode_t *op_callback_addr, *emulate_step_addr; + long b_offset; + unsigned long nip; + + kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; + + nip = can_optimize(p); + if (!nip) + return -EILSEQ; + + /* Allocate instruction slot for detour buffer */ + buff = get_ppc_optinsn_slot(); + if (!buff) + return -ENOMEM; + + /* + * OPTPROBE uses 'b' instruction to branch to optinsn.insn. + * + * The target address has to be relatively nearby, to permit use + * of branch instruction in powerpc, because the address is specified + * in an immediate field in the instruction opcode itself, ie 24 bits + * in the opcode specify the address. Therefore the address should + * be within 32MB on either side of the current instruction. + */ + b_offset = (unsigned long)buff - (unsigned long)p->addr; + if (!is_offset_in_branch_range(b_offset)) + goto error; + + /* Check if the return address is also within 32MB range */ + b_offset = (unsigned long)(buff + TMPL_RET_IDX) - + (unsigned long)nip; + if (!is_offset_in_branch_range(b_offset)) + goto error; + + /* Setup template */ + memcpy(buff, optprobe_template_entry, + TMPL_END_IDX * sizeof(kprobe_opcode_t)); + + /* + * Fixup the template with instructions to: + * 1. load the address of the actual probepoint + */ + patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); + + /* + * 2. branch to optimized_callback() and emulate_step() + */ + kprobe_lookup_name("optimized_callback", op_callback_addr); + kprobe_lookup_name("emulate_step", emulate_step_addr); + if (!op_callback_addr || !emulate_step_addr) { + WARN(1, "kprobe_lookup_name() failed\n"); + goto error; + } + + branch_op_callback = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX, + (unsigned long)op_callback_addr, + BRANCH_SET_LINK); + + branch_emulate_step = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX, + (unsigned long)emulate_step_addr, + BRANCH_SET_LINK); + + if (!branch_op_callback || !branch_emulate_step) + goto error; + + buff[TMPL_CALL_HDLR_IDX] = branch_op_callback; + buff[TMPL_EMULATE_IDX] = branch_emulate_step; + + /* + * 3. load instruction to be emulated into relevant register, and + */ + patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); + + /* + * 4. branch back from trampoline + */ + buff[TMPL_RET_IDX] = create_branch((unsigned int *)buff + TMPL_RET_IDX, + (unsigned long)nip, 0); + + flush_icache_range((unsigned long)buff, + (unsigned long)(&buff[TMPL_END_IDX])); + + op->optinsn.insn = buff; + + return 0; + +error: + free_ppc_optinsn_slot(buff, 0); + return -ERANGE; + +} + +int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) +{ + return optinsn->insn != NULL; +} + +/* + * On powerpc, Optprobes always replaces one instruction (4 bytes + * aligned and 4 bytes long). It is impossible to encounter another + * kprobe in this address range. So always return 0. + */ +int arch_check_optimized_kprobe(struct optimized_kprobe *op) +{ + return 0; +} + +void arch_optimize_kprobes(struct list_head *oplist) +{ + struct optimized_kprobe *op; + struct optimized_kprobe *tmp; + + list_for_each_entry_safe(op, tmp, oplist, list) { + /* + * Backup instructions which will be replaced + * by jump address + */ + memcpy(op->optinsn.copied_insn, op->kp.addr, + RELATIVEJUMP_SIZE); + patch_instruction(op->kp.addr, + create_branch((unsigned int *)op->kp.addr, + (unsigned long)op->optinsn.insn, 0)); + list_del_init(&op->list); + } +} + +void arch_unoptimize_kprobe(struct optimized_kprobe *op) +{ + arch_arm_kprobe(&op->kp); +} + +void arch_unoptimize_kprobes(struct list_head *oplist, + struct list_head *done_list) +{ + struct optimized_kprobe *op; + struct optimized_kprobe *tmp; + + list_for_each_entry_safe(op, tmp, oplist, list) { + arch_unoptimize_kprobe(op); + list_move(&op->list, done_list); + } +} + +int arch_within_optimized_kprobe(struct optimized_kprobe *op, + unsigned long addr) +{ + return ((unsigned long)op->kp.addr <= addr && + (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); +} diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S new file mode 100644 index 000000000000..4937bef7652f --- /dev/null +++ b/arch/powerpc/kernel/optprobes_head.S @@ -0,0 +1,142 @@ +/* + * Code to prepare detour buffer for optprobes in Kernel. + * + * Copyright 2017, Anju T, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> + +#define OPT_SLOT_SIZE 65536 + + .balign 4 + + /* + * Reserve an area to allocate slots for detour buffer. + * This is part of .text section (rather than vmalloc area) + * as this needs to be within 32MB of the probed address. + */ + .global optinsn_slot +optinsn_slot: + .space OPT_SLOT_SIZE + + /* + * Optprobe template: + * This template gets copied into one of the slots in optinsn_slot + * and gets fixed up with real optprobe structures et al. + */ + .global optprobe_template_entry +optprobe_template_entry: + /* Create an in-memory pt_regs */ + stdu r1,-INT_FRAME_SIZE(r1) + SAVE_GPR(0,r1) + /* Save the previous SP into stack */ + addi r0,r1,INT_FRAME_SIZE + std r0,GPR1(r1) + SAVE_10GPRS(2,r1) + SAVE_10GPRS(12,r1) + SAVE_10GPRS(22,r1) + /* Save SPRS */ + mfmsr r5 + std r5,_MSR(r1) + li r5,0x700 + std r5,_TRAP(r1) + li r5,0 + std r5,ORIG_GPR3(r1) + std r5,RESULT(r1) + mfctr r5 + std r5,_CTR(r1) + mflr r5 + std r5,_LINK(r1) + mfspr r5,SPRN_XER + std r5,_XER(r1) + mfcr r5 + std r5,_CCR(r1) + lbz r5,PACASOFTIRQEN(r13) + std r5,SOFTE(r1) + mfdar r5 + std r5,_DAR(r1) + mfdsisr r5 + std r5,_DSISR(r1) + + /* + * We may get here from a module, so load the kernel TOC in r2. + * The original TOC gets restored when pt_regs is restored + * further below. + */ + ld r2,PACATOC(r13) + + .global optprobe_template_op_address +optprobe_template_op_address: + /* + * Parameters to optimized_callback(): + * 1. optimized_kprobe structure in r3 + */ + nop + nop + nop + nop + nop + /* 2. pt_regs pointer in r4 */ + addi r4,r1,STACK_FRAME_OVERHEAD + + .global optprobe_template_call_handler +optprobe_template_call_handler: + /* Branch to optimized_callback() */ + nop + + /* + * Parameters for instruction emulation: + * 1. Pass SP in register r3. + */ + addi r3,r1,STACK_FRAME_OVERHEAD + + .global optprobe_template_insn +optprobe_template_insn: + /* 2, Pass instruction to be emulated in r4 */ + nop + nop + + .global optprobe_template_call_emulate +optprobe_template_call_emulate: + /* Branch to emulate_step() */ + nop + + /* + * All done. + * Now, restore the registers... + */ + ld r5,_MSR(r1) + mtmsr r5 + ld r5,_CTR(r1) + mtctr r5 + ld r5,_LINK(r1) + mtlr r5 + ld r5,_XER(r1) + mtxer r5 + ld r5,_CCR(r1) + mtcr r5 + ld r5,_DAR(r1) + mtdar r5 + ld r5,_DSISR(r1) + mtdsisr r5 + REST_GPR(0,r1) + REST_10GPRS(2,r1) + REST_10GPRS(12,r1) + REST_10GPRS(22,r1) + /* Restore the previous SP */ + addi r1,r1,INT_FRAME_SIZE + + .global optprobe_template_ret +optprobe_template_ret: + /* ... and jump back from trampoline */ + nop + + .global optprobe_template_end +optprobe_template_end: diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index fa20060ff7a5..dfc479df9634 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -10,6 +10,7 @@ #include <linux/smp.h> #include <linux/export.h> #include <linux/memblock.h> +#include <linux/sched/task.h> #include <asm/lppaca.h> #include <asm/paca.h> diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 74bec5498972..ffda24a38dda 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -25,6 +25,7 @@ #include <linux/of_address.h> #include <linux/of_pci.h> #include <linux/mm.h> +#include <linux/shmem_fs.h> #include <linux/list.h> #include <linux/syscalls.h> #include <linux/irq.h> @@ -59,14 +60,14 @@ resource_size_t isa_mem_base; EXPORT_SYMBOL(isa_mem_base); -static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; +static const struct dma_map_ops *pci_dma_ops = &dma_direct_ops; -void set_pci_dma_ops(struct dma_map_ops *dma_ops) +void set_pci_dma_ops(const struct dma_map_ops *dma_ops) { pci_dma_ops = dma_ops; } -struct dma_map_ops *get_pci_dma_ops(void) +const struct dma_map_ops *get_pci_dma_ops(void) { return pci_dma_ops; } @@ -1559,16 +1560,10 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, /* Hookup PHB Memory resources */ for (i = 0; i < 3; ++i) { res = &hose->mem_resources[i]; - if (!res->flags) { - if (i == 0) - printk(KERN_ERR "PCI: Memory resource 0 not set for " - "host bridge %s (domain %d)\n", - hose->dn->full_name, hose->global_number); + if (!res->flags) continue; - } - offset = hose->mem_offset[i]; - + offset = hose->mem_offset[i]; pr_debug("PCI: PHB MEM resource %d = %pR off 0x%08llx\n", i, res, (unsigned long long)offset); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 678f87a63645..41c86c6b6e4d 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -24,7 +24,7 @@ #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> #include <asm/byteorder.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/machdep.h> #undef DEBUG diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index c30612aad68e..56548bf6231f 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -24,7 +24,7 @@ #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/rtas.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/prom.h> #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 49a680d5ae37..d645da302bf2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -16,6 +16,9 @@ #include <linux/errno.h> #include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/smp.h> @@ -730,6 +733,28 @@ static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) mtspr(SPRN_DABRX, dabrx); return 0; } +#elif defined(CONFIG_PPC_8xx) +static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) +{ + unsigned long addr = dabr & ~HW_BRK_TYPE_DABR; + unsigned long lctrl1 = 0x90000000; /* compare type: equal on E & F */ + unsigned long lctrl2 = 0x8e000002; /* watchpoint 1 on cmp E | F */ + + if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ) + lctrl1 |= 0xa0000; + else if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE) + lctrl1 |= 0xf0000; + else if ((dabr & HW_BRK_TYPE_RDWR) == 0) + lctrl2 = 0; + + mtspr(SPRN_LCTRL2, 0); + mtspr(SPRN_CMPE, addr); + mtspr(SPRN_CMPF, addr + 4); + mtspr(SPRN_LCTRL1, lctrl1); + mtspr(SPRN_LCTRL2, lctrl2); + + return 0; +} #else static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) { @@ -1051,14 +1076,6 @@ static inline void save_sprs(struct thread_struct *t) */ t->tar = mfspr(SPRN_TAR); } - - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - /* Conditionally save Load Monitor registers, if enabled */ - if (t->fscr & FSCR_LM) { - t->lmrr = mfspr(SPRN_LMRR); - t->lmser = mfspr(SPRN_LMSER); - } - } #endif } @@ -1094,16 +1111,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } - - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - /* Conditionally restore Load Monitor registers, if enabled */ - if (new_thread->fscr & FSCR_LM) { - if (old_thread->lmrr != new_thread->lmrr) - mtspr(SPRN_LMRR, new_thread->lmrr); - if (old_thread->lmser != new_thread->lmser) - mtspr(SPRN_LMSER, new_thread->lmser); - } - } #endif } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index b0245bed6f54..f5d399e46193 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -156,21 +156,22 @@ static struct ibm_pa_feature { unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0, 0}, - {0, 0, PPC_FEATURE_HAS_FPU, 0, 0, 1, 0}, - {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1}, - {0, MMU_FTR_CI_LARGE_PAGE, 0, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0}, + { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU }, + { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU }, + { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, + { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, + { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, + { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX }, + { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, + { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, + .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, /* * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n), * we don't want to turn on TM here, so we use the *_COMP versions * which are 0 if the kernel doesn't support TM. */ - {CPU_FTR_TM_COMP, 0, 0, - PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0}, - {0, MMU_FTR_TYPE_RADIX, 0, 0, 40, 0, 0}, + { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP, + .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -427,7 +428,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, tce_alloc_end = *lprop; #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); if (lprop) crashk_res.start = *lprop; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 88ac964f4858..1c1b44ec7642 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +static bool __initdata prom_radix_disable; + +struct platform_support { + bool hash_mmu; + bool radix_mmu; + bool radix_gtse; +}; + /* Platforms codes are now obsolete in the kernel. Now only used within this * file and ultimately gone too. Feel free to change them if you need, they * are not shared with anything outside of this file anymore @@ -461,14 +469,14 @@ static int __init prom_next_node(phandle *nodep) } } -static int inline prom_getprop(phandle node, const char *pname, +static inline int prom_getprop(phandle node, const char *pname, void *value, size_t valuelen) { return call_prom("getprop", 4, 1, node, ADDR(pname), (u32)(unsigned long) value, (u32) valuelen); } -static int inline prom_getproplen(phandle node, const char *pname) +static inline int prom_getproplen(phandle node, const char *pname) { return call_prom("getproplen", 2, 1, node, ADDR(pname)); } @@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void) prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); #endif } + + opt = strstr(prom_cmd_line, "disable_radix"); + if (opt) { + prom_debug("Radix disabled from cmdline\n"); + prom_radix_disable = true; + } } #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) @@ -635,13 +649,7 @@ static void __init early_cmdline_parse(void) * * See prom.h for the definition of the bits specified in the * architecture vector. - * - * Because the description vector contains a mix of byte and word - * values, we declare it as an unsigned char array, and use this - * macro to put word values in. */ -#define W(x) ((x) >> 24) & 0xff, ((x) >> 16) & 0xff, \ - ((x) >> 8) & 0xff, (x) & 0xff /* Firmware expects the value to be n - 1, where n is the # of vectors */ #define NUM_VECTORS(n) ((n) - 1) @@ -652,92 +660,224 @@ static void __init early_cmdline_parse(void) */ #define VECTOR_LENGTH(n) (1 + (n) - 2) -unsigned char ibm_architecture_vec[] = { - W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ - W(0xffff0000), W(0x003e0000), /* POWER6 */ - W(0xffff0000), W(0x003f0000), /* POWER7 */ - W(0xffff0000), W(0x004b0000), /* POWER8E */ - W(0xffff0000), W(0x004c0000), /* POWER8NVL */ - W(0xffff0000), W(0x004d0000), /* POWER8 */ - W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ - W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ - W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ - W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */ - NUM_VECTORS(6), /* 6 option vectors */ - - /* option vector 1: processor architectures supported */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't ignore, don't halt */ - OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | - OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, +struct option_vector1 { + u8 byte1; + u8 arch_versions; + u8 arch_versions3; +} __packed; + +struct option_vector2 { + u8 byte1; + __be16 reserved; + __be32 real_base; + __be32 real_size; + __be32 virt_base; + __be32 virt_size; + __be32 load_base; + __be32 min_rma; + __be32 min_load; + u8 min_rma_percent; + u8 max_pft_size; +} __packed; + +struct option_vector3 { + u8 byte1; + u8 byte2; +} __packed; + +struct option_vector4 { + u8 byte1; + u8 min_vp_cap; +} __packed; + +struct option_vector5 { + u8 byte1; + u8 byte2; + u8 byte3; + u8 cmo; + u8 associativity; + u8 bin_opts; + u8 micro_checkpoint; + u8 reserved0; + __be32 max_cpus; + __be16 papr_level; + __be16 reserved1; + u8 platform_facilities; + u8 reserved2; + __be16 reserved3; + u8 subprocessors; + u8 byte22; + u8 intarch; + u8 mmu; + u8 hash_ext; + u8 radix_ext; +} __packed; + +struct option_vector6 { + u8 reserved; + u8 secondary_pteg; + u8 os_name; +} __packed; + +struct ibm_arch_vec { + struct { u32 mask, val; } pvrs[12]; + + u8 num_vectors; + + u8 vec1_len; + struct option_vector1 vec1; + + u8 vec2_len; + struct option_vector2 vec2; + + u8 vec3_len; + struct option_vector3 vec3; + + u8 vec4_len; + struct option_vector4 vec4; + + u8 vec5_len; + struct option_vector5 vec5; + + u8 vec6_len; + struct option_vector6 vec6; +} __packed; + +struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { + .pvrs = { + { + .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */ + .val = cpu_to_be32(0x003a0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER6 */ + .val = cpu_to_be32(0x003e0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER7 */ + .val = cpu_to_be32(0x003f0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8E */ + .val = cpu_to_be32(0x004b0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8NVL */ + .val = cpu_to_be32(0x004c0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8 */ + .val = cpu_to_be32(0x004d0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER9 */ + .val = cpu_to_be32(0x004e0000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */ + .val = cpu_to_be32(0x0f000005), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ + .val = cpu_to_be32(0x0f000004), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.06-compliant */ + .val = cpu_to_be32(0x0f000003), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.05-compliant */ + .val = cpu_to_be32(0x0f000002), + }, + { + .mask = cpu_to_be32(0xfffffffe), /* all 2.04-compliant and earlier */ + .val = cpu_to_be32(0x0f000001), + }, + }, + + .num_vectors = NUM_VECTORS(6), + .vec1_len = VECTOR_LENGTH(sizeof(struct option_vector1)), + .vec1 = { + .byte1 = 0, + .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | + OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, + .arch_versions3 = OV1_PPC_3_00, + }, + + .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), /* option vector 2: Open Firmware options supported */ - VECTOR_LENGTH(33), /* length */ - OV2_REAL_MODE, - 0, 0, - W(0xffffffff), /* real_base */ - W(0xffffffff), /* real_size */ - W(0xffffffff), /* virt_base */ - W(0xffffffff), /* virt_size */ - W(0xffffffff), /* load_base */ - W(256), /* 256MB min RMA */ - W(0xffffffff), /* full client load */ - 0, /* min RMA percentage of total RAM */ - 48, /* max log_2(hash table size) */ + .vec2 = { + .byte1 = OV2_REAL_MODE, + .reserved = 0, + .real_base = cpu_to_be32(0xffffffff), + .real_size = cpu_to_be32(0xffffffff), + .virt_base = cpu_to_be32(0xffffffff), + .virt_size = cpu_to_be32(0xffffffff), + .load_base = cpu_to_be32(0xffffffff), + .min_rma = cpu_to_be32(256), /* 256MB min RMA */ + .min_load = cpu_to_be32(0xffffffff), /* full client load */ + .min_rma_percent = 0, /* min RMA percentage of total RAM */ + .max_pft_size = 48, /* max log_2(hash table size) */ + }, + .vec3_len = VECTOR_LENGTH(sizeof(struct option_vector3)), /* option vector 3: processor options supported */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't ignore, don't halt */ - OV3_FP | OV3_VMX | OV3_DFP, + .vec3 = { + .byte1 = 0, /* don't ignore, don't halt */ + .byte2 = OV3_FP | OV3_VMX | OV3_DFP, + }, + .vec4_len = VECTOR_LENGTH(sizeof(struct option_vector4)), /* option vector 4: IBM PAPR implementation */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't halt */ - OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ + .vec4 = { + .byte1 = 0, /* don't halt */ + .min_vp_cap = OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ + }, + .vec5_len = VECTOR_LENGTH(sizeof(struct option_vector5)), /* option vector 5: PAPR/OF options */ - VECTOR_LENGTH(21), /* length */ - 0, /* don't ignore, don't halt */ - OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | - OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | + .vec5 = { + .byte1 = 0, /* don't ignore, don't halt */ + .byte2 = OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | + OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | #ifdef CONFIG_PCI_MSI - /* PCIe/MSI support. Without MSI full PCIe is not supported */ - OV5_FEAT(OV5_MSI), + /* PCIe/MSI support. Without MSI full PCIe is not supported */ + OV5_FEAT(OV5_MSI), #else - 0, + 0, #endif - 0, + .byte3 = 0, + .cmo = #ifdef CONFIG_PPC_SMLPAR - OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO), + OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO), #else - 0, + 0, #endif - OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), - 0, - 0, - 0, - /* WARNING: The offset of the "number of cores" field below - * must match by the macro below. Update the definition if - * the structure layout changes. - */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 133 - W(NR_CPUS), /* number of cores supported */ - 0, - 0, - 0, - 0, - OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | - OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */ - 0, /* Byte 18 */ - 0, /* Byte 19 */ - 0, /* Byte 20 */ - OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */ + .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), + .bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT), + .micro_checkpoint = 0, + .reserved0 = 0, + .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */ + .papr_level = 0, + .reserved1 = 0, + .platform_facilities = OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | OV5_FEAT(OV5_PFO_HW_842), + .reserved2 = 0, + .reserved3 = 0, + .subprocessors = 1, + .intarch = 0, + .mmu = 0, + .hash_ext = 0, + .radix_ext = 0, + }, /* option vector 6: IBM PAPR hints */ - VECTOR_LENGTH(3), /* length */ - 0, - 0, - OV6_LINUX, + .vec6_len = VECTOR_LENGTH(sizeof(struct option_vector6)), + .vec6 = { + .reserved = 0, + .secondary_pteg = 0, + .os_name = OV6_LINUX, + }, }; /* Old method - ELF header with PT_NOTE sections only works on BE */ @@ -867,13 +1007,101 @@ static int __init prom_count_smt_threads(void) } +static void __init prom_parse_mmu_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_MMU_DYNAMIC): + case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */ + prom_debug("MMU - either supported\n"); + support->radix_mmu = !prom_radix_disable; + support->hash_mmu = true; + break; + case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */ + prom_debug("MMU - radix only\n"); + if (prom_radix_disable) { + /* + * If we __have__ to do radix, we're better off ignoring + * the command line rather than not booting. + */ + prom_printf("WARNING: Ignoring cmdline option disable_radix\n"); + } + support->radix_mmu = true; + break; + case OV5_FEAT(OV5_MMU_HASH): + prom_debug("MMU - hash only\n"); + support->hash_mmu = true; + break; + default: + prom_debug("Unknown mmu support option: 0x%x\n", val); + break; + } +} + +static void __init prom_parse_platform_support(u8 index, u8 val, + struct platform_support *support) +{ + switch (index) { + case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */ + prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); + break; + case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ + if (val & OV5_FEAT(OV5_RADIX_GTSE)) { + prom_debug("Radix - GTSE supported\n"); + support->radix_gtse = true; + } + break; + } +} + +static void __init prom_check_platform_support(void) +{ + struct platform_support supported = { + .hash_mmu = false, + .radix_mmu = false, + .radix_gtse = false + }; + int prop_len = prom_getproplen(prom.chosen, + "ibm,arch-vec-5-platform-support"); + if (prop_len > 1) { + int i; + u8 vec[prop_len]; + prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", + prop_len); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", + &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 + , vec[i] + , vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], + &supported); + } + } + + if (supported.radix_mmu && supported.radix_gtse) { + /* Radix preferred - but we require GTSE for now */ + prom_debug("Asking for radix with GTSE\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); + ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + } else if (supported.hash_mmu) { + /* Default to hash mmu (if we can) */ + prom_debug("Asking for hash\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH); + } else { + /* We're probably on a legacy hypervisor */ + prom_debug("Assuming legacy hash support\n"); + } +} static void __init prom_send_capabilities(void) { ihandle root; prom_arg_t ret; u32 cores; - unsigned char *ptcores; + + /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */ + prom_check_platform_support(); root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { @@ -884,37 +1112,18 @@ static void __init prom_send_capabilities(void) * divide NR_CPUS. */ - /* The core value may start at an odd address. If such a word - * access is made at a cache line boundary, this leads to an - * exception which may not be handled at this time. - * Forcing a per byte access to avoid exception. - */ - ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; - cores = 0; - cores |= ptcores[0] << 24; - cores |= ptcores[1] << 16; - cores |= ptcores[2] << 8; - cores |= ptcores[3]; - if (cores != NR_CPUS) { - prom_printf("WARNING ! " - "ibm_architecture_vec structure inconsistent: %lu!\n", - cores); - } else { - cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); - prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", - cores, NR_CPUS); - ptcores[0] = (cores >> 24) & 0xff; - ptcores[1] = (cores >> 16) & 0xff; - ptcores[2] = (cores >> 8) & 0xff; - ptcores[3] = cores & 0xff; - } + cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); + prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", + cores, NR_CPUS); + + ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores); /* try calling the ibm,client-architecture-support method */ prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, ADDR("ibm,client-architecture-support"), root, - ADDR(ibm_architecture_vec)) == 0) { + ADDR(&ibm_architecture_vec)) == 0) { /* the call exists... */ if (ret) prom_printf("\nWARNING: ibm,client-architecture" @@ -2747,6 +2956,9 @@ static void __init prom_find_boot_cpu(void) cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); + if (!PHANDLE_VALID(cpu_pkg)) + return; + prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); prom.cpu = be32_to_cpu(rval); @@ -2887,6 +3099,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * On pSeries, inform the firmware about our capabilities @@ -2903,11 +3120,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, copy_and_flush(0, kbase, 0x100, 0); /* - * Do early parsing of command line - */ - early_cmdline_parse(); - - /* * Initialize memory management within prom_init */ prom_init_mem(); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index b1ec62f2cc31..925a4ef90559 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -34,7 +34,7 @@ #include <linux/perf_event.h> #include <linux/context_tracking.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/switch_to.h> @@ -463,6 +463,10 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.TS_FPR(i); + buf[32] = target->thread.fp_state.fpscr; + /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) @@ -672,6 +676,9 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, flush_altivec_to_thread(target); flush_vsx_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); if (!ret) @@ -1019,6 +1026,10 @@ static int tm_cfpr_set(struct task_struct *target, flush_fp_to_thread(target); flush_altivec_to_thread(target); + for (i = 0; i < 32; i++) + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; + /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) @@ -1283,6 +1294,9 @@ static int tm_cvsx_set(struct task_struct *target, flush_altivec_to_thread(target); flush_vsx_to_thread(target); + for (i = 0; i < 32 ; i++) + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); if (!ret) diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 010b7b310237..f37eb53de1a1 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -29,7 +29,7 @@ #include <linux/signal.h> #include <linux/compat.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/switch_to.h> @@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &tmp, + if (ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) break; diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index c82eed97bd22..df56dfc4b681 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -24,7 +24,7 @@ #include <linux/bitops.h> #include <linux/rtc.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/processor.h> #include <asm/io.h> #include <asm/prom.h> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 6a3e5de544ce..b8a4987f58cf 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -35,7 +35,7 @@ #include <asm/page.h> #include <asm/param.h> #include <asm/delay.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/udbg.h> #include <asm/syscalls.h> #include <asm/smp.h> @@ -1145,31 +1145,29 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) void __init rtas_initialize(void) { unsigned long rtas_region = RTAS_INSTANTIATE_MAX; + u32 base, size, entry; + int no_base, no_size, no_entry; /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. */ rtas.dev = of_find_node_by_name(NULL, "rtas"); - if (rtas.dev) { - const __be32 *basep, *entryp, *sizep; - - basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); - sizep = of_get_property(rtas.dev, "rtas-size", NULL); - if (basep != NULL && sizep != NULL) { - rtas.base = __be32_to_cpu(*basep); - rtas.size = __be32_to_cpu(*sizep); - entryp = of_get_property(rtas.dev, - "linux,rtas-entry", NULL); - if (entryp == NULL) /* Ugh */ - rtas.entry = rtas.base; - else - rtas.entry = __be32_to_cpu(*entryp); - } else - rtas.dev = NULL; - } if (!rtas.dev) return; + no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base); + no_size = of_property_read_u32(rtas.dev, "rtas-size", &size); + if (no_base || no_size) { + of_node_put(rtas.dev); + rtas.dev = NULL; + return; + } + + rtas.base = base; + rtas.size = size; + no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); + rtas.entry = no_entry ? rtas.base : entry; + /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any */ diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index db2b482af658..f6f6a8a5103a 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -19,7 +19,7 @@ #include <linux/proc_fs.h> #include <linux/reboot.h> #include <asm/delay.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/rtas.h> #define MODULE_VERS "1.0" diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index a26a02006576..3650732639ed 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -21,8 +21,9 @@ #include <linux/cpu.h> #include <linux/workqueue.h> #include <linux/slab.h> +#include <linux/topology.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/rtas.h> #include <asm/prom.h> @@ -282,6 +283,7 @@ static void prrn_work_fn(struct work_struct *work) * the RTAS event. */ pseries_devicetree_update(-prrn_update_scope); + arch_update_cpu_topology(); } static DECLARE_WORK(prrn_work, prrn_work_fn); @@ -434,7 +436,10 @@ static void do_event_scan(void) } if (error == 0) { - pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); + if (rtas_error_type((struct rtas_error_log *)logdata) != + RTAS_TYPE_PRRN) + pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, + 0); handle_rtas_event((struct rtas_error_log *)logdata); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 270ee30abdcf..4697da895133 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -87,6 +87,15 @@ EXPORT_SYMBOL(machine_id); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +/* + * These are used in binfmt_elf.c to put aux entries on the stack + * for each elf executable being started. + */ +int dcache_bsize; +int icache_bsize; +int ucache_bsize; + + unsigned long klimit = (unsigned long) _end; /* @@ -915,7 +924,7 @@ void __init setup_arch(char **cmdline_p) init_mm.context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&init_mm.context); + mm_iommu_init(&init_mm); #endif irqstack_early_init(); exc_lvl_early_init(); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5fe79182f0fa..2f88f6cf1a42 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -29,7 +29,7 @@ #include <asm/bootx.h> #include <asm/btext.h> #include <asm/machdep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/pmac_feature.h> #include <asm/sections.h> #include <asm/nvram.h> @@ -59,14 +59,6 @@ EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); /* - * These are used in binfmt_elf.c to put aux entries on the stack - * for each elf executable being started. - */ -int dcache_bsize; -int icache_bsize; -int ucache_bsize; - -/* * We're called here very early in the boot. * * Note that the kernel may be running at an address which is different diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8d586cff8a41..f997154dfc41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -77,25 +77,18 @@ int spinning_secondaries; u64 ppc64_pft_size; -/* Pick defaults since we might want to patch instructions - * before we've read this from the device tree. - */ struct ppc64_caches ppc64_caches = { - .dline_size = 0x40, - .log_dline_size = 6, - .iline_size = 0x40, - .log_iline_size = 6 + .l1d = { + .block_size = 0x40, + .log_block_size = 6, + }, + .l1i = { + .block_size = 0x40, + .log_block_size = 6 + }, }; EXPORT_SYMBOL_GPL(ppc64_caches); -/* - * These are used in binfmt_elf.c to put aux entries on the stack - * for each elf executable being started. - */ -int dcache_bsize; -int icache_bsize; -int ucache_bsize; - #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) void __init setup_tlb_core_data(void) { @@ -120,14 +113,12 @@ void __init setup_tlb_core_data(void) * If we have threads, we need either tlbsrx. * or e6500 tablewalk mode, or else TLB handlers * will be racy and could produce duplicate entries. + * Should we panic instead? */ - if (smt_enabled_at_boot >= 2 && - !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && - book3e_htw_mode != PPC_HTW_E6500) { - /* Should we panic instead? */ - WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n", - __func__); - } + WARN_ONCE(smt_enabled_at_boot >= 2 && + !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && + book3e_htw_mode != PPC_HTW_E6500, + "%s: unsupported MMU configuration\n", __func__); } } #endif @@ -245,6 +236,15 @@ static void cpu_ready_for_interrupts(void) mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); } + /* + * Fixup HFSCR:TM based on CPU features. The bit is set by our + * early asm init because at that point we haven't updated our + * CPU features from firmware and device-tree. Here we have, + * so let's do it. + */ + if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP)) + mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM); + /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; } @@ -354,7 +354,7 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ -#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) static bool use_spinloop(void) { if (!IS_ENABLED(CONFIG_PPC_BOOK3E)) @@ -399,7 +399,7 @@ void smp_release_cpus(void) DBG(" <- smp_release_cpus()\n"); } -#endif /* CONFIG_SMP || CONFIG_KEXEC */ +#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ /* * Initialize some remaining members of the ppc64_caches and systemcfg @@ -408,74 +408,138 @@ void smp_release_cpus(void) * cache informations about the CPU that will be used by cache flush * routines and/or provided to userland */ + +static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, + u32 bsize, u32 sets) +{ + info->size = size; + info->sets = sets; + info->line_size = lsize; + info->block_size = bsize; + info->log_block_size = __ilog2(bsize); + if (bsize) + info->blocks_per_page = PAGE_SIZE / bsize; + else + info->blocks_per_page = 0; + + if (sets == 0) + info->assoc = 0xffff; + else + info->assoc = size / (sets * lsize); +} + +static bool __init parse_cache_info(struct device_node *np, + bool icache, + struct ppc_cache_info *info) +{ + static const char *ipropnames[] __initdata = { + "i-cache-size", + "i-cache-sets", + "i-cache-block-size", + "i-cache-line-size", + }; + static const char *dpropnames[] __initdata = { + "d-cache-size", + "d-cache-sets", + "d-cache-block-size", + "d-cache-line-size", + }; + const char **propnames = icache ? ipropnames : dpropnames; + const __be32 *sizep, *lsizep, *bsizep, *setsp; + u32 size, lsize, bsize, sets; + bool success = true; + + size = 0; + sets = -1u; + lsize = bsize = cur_cpu_spec->dcache_bsize; + sizep = of_get_property(np, propnames[0], NULL); + if (sizep != NULL) + size = be32_to_cpu(*sizep); + setsp = of_get_property(np, propnames[1], NULL); + if (setsp != NULL) + sets = be32_to_cpu(*setsp); + bsizep = of_get_property(np, propnames[2], NULL); + lsizep = of_get_property(np, propnames[3], NULL); + if (bsizep == NULL) + bsizep = lsizep; + if (lsizep != NULL) + lsize = be32_to_cpu(*lsizep); + if (bsizep != NULL) + bsize = be32_to_cpu(*bsizep); + if (sizep == NULL || bsizep == NULL || lsizep == NULL) + success = false; + + /* + * OF is weird .. it represents fully associative caches + * as "1 way" which doesn't make much sense and doesn't + * leave room for direct mapped. We'll assume that 0 + * in OF means direct mapped for that reason. + */ + if (sets == 1) + sets = 0; + else if (sets == 0) + sets = 1; + + init_cache_info(info, size, lsize, bsize, sets); + + return success; +} + void __init initialize_cache_info(void) { - struct device_node *np; - unsigned long num_cpus = 0; + struct device_node *cpu = NULL, *l2, *l3 = NULL; + u32 pvr; DBG(" -> initialize_cache_info()\n"); - for_each_node_by_type(np, "cpu") { - num_cpus += 1; + /* + * All shipping POWER8 machines have a firmware bug that + * puts incorrect information in the device-tree. This will + * be (hopefully) fixed for future chips but for now hard + * code the values if we are running on one of these + */ + pvr = PVR_VER(mfspr(SPRN_PVR)); + if (pvr == PVR_POWER8 || pvr == PVR_POWER8E || + pvr == PVR_POWER8NVL) { + /* size lsize blk sets */ + init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32); + init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64); + init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512); + init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192); + } else + cpu = of_find_node_by_type(NULL, "cpu"); + + /* + * We're assuming *all* of the CPUs have the same + * d-cache and i-cache sizes... -Peter + */ + if (cpu) { + if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) + DBG("Argh, can't find dcache properties !\n"); + + if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) + DBG("Argh, can't find icache properties !\n"); /* - * We're assuming *all* of the CPUs have the same - * d-cache and i-cache sizes... -Peter + * Try to find the L2 and L3 if any. Assume they are + * unified and use the D-side properties. */ - if (num_cpus == 1) { - const __be32 *sizep, *lsizep; - u32 size, lsize; - - size = 0; - lsize = cur_cpu_spec->dcache_bsize; - sizep = of_get_property(np, "d-cache-size", NULL); - if (sizep != NULL) - size = be32_to_cpu(*sizep); - lsizep = of_get_property(np, "d-cache-block-size", - NULL); - /* fallback if block size missing */ - if (lsizep == NULL) - lsizep = of_get_property(np, - "d-cache-line-size", - NULL); - if (lsizep != NULL) - lsize = be32_to_cpu(*lsizep); - if (sizep == NULL || lsizep == NULL) - DBG("Argh, can't find dcache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - ppc64_caches.dsize = size; - ppc64_caches.dline_size = lsize; - ppc64_caches.log_dline_size = __ilog2(lsize); - ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; - - size = 0; - lsize = cur_cpu_spec->icache_bsize; - sizep = of_get_property(np, "i-cache-size", NULL); - if (sizep != NULL) - size = be32_to_cpu(*sizep); - lsizep = of_get_property(np, "i-cache-block-size", - NULL); - if (lsizep == NULL) - lsizep = of_get_property(np, - "i-cache-line-size", - NULL); - if (lsizep != NULL) - lsize = be32_to_cpu(*lsizep); - if (sizep == NULL || lsizep == NULL) - DBG("Argh, can't find icache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - ppc64_caches.isize = size; - ppc64_caches.iline_size = lsize; - ppc64_caches.log_iline_size = __ilog2(lsize); - ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; + l2 = of_find_next_cache_node(cpu); + of_node_put(cpu); + if (l2) { + parse_cache_info(l2, false, &ppc64_caches.l2); + l3 = of_find_next_cache_node(l2); + of_node_put(l2); + } + if (l3) { + parse_cache_info(l3, false, &ppc64_caches.l3); + of_node_put(l3); } } /* For use by binfmt_elf */ - dcache_bsize = ppc64_caches.dline_size; - icache_bsize = ppc64_caches.iline_size; + dcache_bsize = ppc64_caches.l1d.block_size; + icache_bsize = ppc64_caches.l1i.block_size; DBG(" <- initialize_cache_info()\n"); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index bbe77aed198d..3a3671172436 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -15,7 +15,7 @@ #include <linux/key.h> #include <linux/context_tracking.h> #include <asm/hw_breakpoint.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unistd.h> #include <asm/debug.h> #include <asm/tm.h> diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 27aa913ac91d..97bb1385e771 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -37,7 +37,7 @@ #include <linux/binfmts.h> #endif -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/syscalls.h> #include <asm/sigcontext.h> diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 96698fdf93b4..c83c115858c1 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -27,7 +27,7 @@ #include <asm/sigcontext.h> #include <asm/ucontext.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/pgtable.h> #include <asm/unistd.h> #include <asm/cacheflush.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9c6f3fd58059..46f89e66a273 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -19,7 +19,8 @@ #include <linux/kernel.h> #include <linux/export.h> -#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/sched/topology.h> #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/delay.h> @@ -193,7 +194,7 @@ int smp_request_message_ipi(int virq, int msg) if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { return -EINVAL; } -#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC) +#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE) if (msg == PPC_MSG_DEBUGGER_BREAK) { return 1; } @@ -325,7 +326,7 @@ void tick_broadcast(const struct cpumask *mask) } #endif -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) void smp_send_debugger_break(void) { int cpu; @@ -340,7 +341,7 @@ void smp_send_debugger_break(void) } #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { crash_ipi_function_ptr = crash_ipi_callback; @@ -707,7 +708,7 @@ void start_secondary(void *unused) unsigned int cpu = smp_processor_id(); int i, base; - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; smp_store_cpu_info(cpu); @@ -795,7 +796,7 @@ void __init smp_cpus_done(unsigned int max_cpus) * se we pin us down to CPU 0 for a short while */ alloc_cpumask_var(&old_mask, GFP_NOWAIT); - cpumask_copy(old_mask, tsk_cpus_allowed(current)); + cpumask_copy(old_mask, ¤t->cpus_allowed); set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 4f24606afc3f..66711958493c 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -12,6 +12,7 @@ #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/stacktrace.h> #include <asm/ptrace.h> #include <asm/processor.h> diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c index 0e899e47c325..51db012808f5 100644 --- a/arch/powerpc/kernel/swsusp_64.c +++ b/arch/powerpc/kernel/swsusp_64.c @@ -10,6 +10,7 @@ #include <linux/irq.h> #include <linux/sched.h> #include <linux/interrupt.h> +#include <linux/nmi.h> void do_after_copyback(void) { diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 8a285876aef8..15f216d022e2 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -44,7 +44,7 @@ #include <asm/ptrace.h> #include <asm/types.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/unistd.h> #include <asm/time.h> #include <asm/mmu_context.h> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 644cce3d8dce..de04c9fbb5cd 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -36,7 +36,7 @@ #include <linux/file.h> #include <linux/personality.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/syscalls.h> #include <asm/time.h> #include <asm/unistd.h> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c4f1d1f7bae0..c1fb255a60d6 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -703,7 +703,7 @@ static struct device_attribute pa6t_attrs[] = { #endif /* HAS_PPC_PMC_PA6T */ #endif /* HAS_PPC_PMC_CLASSIC */ -static void register_cpu_online(unsigned int cpu) +static int register_cpu_online(unsigned int cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; @@ -782,11 +782,12 @@ static void register_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_online(cpu); + return 0; } -#ifdef CONFIG_HOTPLUG_CPU -static void unregister_cpu_online(unsigned int cpu) +static int unregister_cpu_online(unsigned int cpu) { +#ifdef CONFIG_HOTPLUG_CPU struct cpu *c = &per_cpu(cpu_devices, cpu); struct device *s = &c->dev; struct device_attribute *attrs, *pmc_attrs; @@ -863,6 +864,8 @@ static void unregister_cpu_online(unsigned int cpu) } #endif cacheinfo_cpu_offline(cpu); +#endif /* CONFIG_HOTPLUG_CPU */ + return 0; } #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE @@ -883,32 +886,6 @@ ssize_t arch_cpu_release(const char *buf, size_t count) } #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ -#endif /* CONFIG_HOTPLUG_CPU */ - -static int sysfs_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned int)(long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - register_cpu_online(cpu); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - unregister_cpu_online(cpu); - break; -#endif - } - return NOTIFY_OK; -} - -static struct notifier_block sysfs_cpu_nb = { - .notifier_call = sysfs_cpu_notify, -}; - static DEFINE_MUTEX(cpu_mutex); int cpu_add_dev_attr(struct device_attribute *attr) @@ -1023,12 +1000,10 @@ static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL); static int __init topology_init(void) { - int cpu; + int cpu, r; register_nodes(); - cpu_notifier_register_begin(); - for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -1047,15 +1022,10 @@ static int __init topology_init(void) device_create_file(&c->dev, &dev_attr_physical_id); } - - if (cpu_online(cpu)) - register_cpu_online(cpu); } - - __register_cpu_notifier(&sysfs_cpu_nb); - - cpu_notifier_register_done(); - + r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/topology:online", + register_cpu_online, unregister_cpu_online); + WARN_ON(r < 0); #ifdef CONFIG_PPC64 sysfs_create_dscr_default(); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc3f7d0d7b79..07b90725855e 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -34,6 +34,7 @@ #include <linux/errno.h> #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <linux/kernel.h> #include <linux/param.h> #include <linux/string.h> @@ -57,6 +58,7 @@ #include <linux/clk-provider.h> #include <linux/suspend.h> #include <linux/rtc.h> +#include <linux/sched/cputime.h> #include <asm/trace.h> #include <asm/io.h> @@ -64,7 +66,7 @@ #include <asm/nvram.h> #include <asm/cache.h> #include <asm/machdep.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/time.h> #include <asm/prom.h> #include <asm/irq.h> @@ -72,7 +74,6 @@ #include <asm/smp.h> #include <asm/vdso_datapage.h> #include <asm/firmware.h> -#include <asm/cputime.h> #include <asm/asm-prototypes.h> /* powerpc clocksource/clockevent code */ @@ -80,7 +81,7 @@ #include <linux/clockchips.h> #include <linux/timekeeper_internal.h> -static cycle_t rtc_read(struct clocksource *); +static u64 rtc_read(struct clocksource *); static struct clocksource clocksource_rtc = { .name = "rtc", .rating = 400, @@ -89,7 +90,7 @@ static struct clocksource clocksource_rtc = { .read = rtc_read, }; -static cycle_t timebase_read(struct clocksource *); +static u64 timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { .name = "timebase", .rating = 400, @@ -152,22 +153,11 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* - * Factors for converting from cputime_t (timebase ticks) to - * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). - * These are all stored as 0.64 fixed-point binary fractions. + * Factor for converting from cputime_t (timebase ticks) to + * microseconds. This is stored as 0.64 fixed-point binary fraction. */ -u64 __cputime_jiffies_factor; -EXPORT_SYMBOL(__cputime_jiffies_factor); u64 __cputime_usec_factor; EXPORT_SYMBOL(__cputime_usec_factor); -u64 __cputime_sec_factor; -EXPORT_SYMBOL(__cputime_sec_factor); -u64 __cputime_clockt_factor; -EXPORT_SYMBOL(__cputime_clockt_factor); -DEFINE_PER_CPU(unsigned long, cputime_last_delta); -DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); - -cputime_t cputime_one_jiffy; #ifdef CONFIG_PPC_SPLPAR void (*dtl_consumer)(struct dtl_entry *, u64); @@ -183,14 +173,8 @@ static void calc_cputime_factors(void) { struct div_result res; - div128_by_32(HZ, 0, tb_ticks_per_sec, &res); - __cputime_jiffies_factor = res.result_low; div128_by_32(1000000, 0, tb_ticks_per_sec, &res); __cputime_usec_factor = res.result_low; - div128_by_32(1, 0, tb_ticks_per_sec, &res); - __cputime_sec_factor = res.result_low; - div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); - __cputime_clockt_factor = res.result_low; } /* @@ -273,25 +257,19 @@ void accumulate_stolen_time(void) sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); - acct->system_time -= sst; - acct->user_time -= ust; - local_paca->stolen_time += ust + sst; + acct->stime -= sst; + acct->utime -= ust; + acct->steal_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; } static inline u64 calculate_stolen_time(u64 stop_tb) { - u64 stolen = 0; + if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) + return scan_dispatch_log(stop_tb); - if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) { - stolen = scan_dispatch_log(stop_tb); - get_paca()->accounting.system_time -= stolen; - } - - stolen += get_paca()->stolen_time; - get_paca()->stolen_time = 0; - return stolen; + return 0; } #else /* CONFIG_PPC_SPLPAR */ @@ -307,28 +285,27 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * or soft irq state. */ static unsigned long vtime_delta(struct task_struct *tsk, - unsigned long *sys_scaled, - unsigned long *stolen) + unsigned long *stime_scaled, + unsigned long *steal_time) { unsigned long now, nowscaled, deltascaled; - unsigned long udelta, delta, user_scaled; + unsigned long stime; + unsigned long utime, utime_scaled; struct cpu_accounting_data *acct = get_accounting(tsk); WARN_ON_ONCE(!irqs_disabled()); now = mftb(); nowscaled = read_spurr(now); - acct->system_time += now - acct->starttime; + stime = now - acct->starttime; acct->starttime = now; deltascaled = nowscaled - acct->startspurr; acct->startspurr = nowscaled; - *stolen = calculate_stolen_time(now); + *steal_time = calculate_stolen_time(now); - delta = acct->system_time; - acct->system_time = 0; - udelta = acct->user_time - acct->utime_sspurr; - acct->utime_sspurr = acct->user_time; + utime = acct->utime - acct->utime_sspurr; + acct->utime_sspurr = acct->utime; /* * Because we don't read the SPURR on every kernel entry/exit, @@ -340,60 +317,105 @@ static unsigned long vtime_delta(struct task_struct *tsk, * the user ticks get saved up in paca->user_time_scaled to be * used by account_process_tick. */ - *sys_scaled = delta; - user_scaled = udelta; - if (deltascaled != delta + udelta) { - if (udelta) { - *sys_scaled = deltascaled * delta / (delta + udelta); - user_scaled = deltascaled - *sys_scaled; + *stime_scaled = stime; + utime_scaled = utime; + if (deltascaled != stime + utime) { + if (utime) { + *stime_scaled = deltascaled * stime / (stime + utime); + utime_scaled = deltascaled - *stime_scaled; } else { - *sys_scaled = deltascaled; + *stime_scaled = deltascaled; } } - acct->user_time_scaled += user_scaled; + acct->utime_scaled += utime_scaled; - return delta; + return stime; } void vtime_account_system(struct task_struct *tsk) { - unsigned long delta, sys_scaled, stolen; + unsigned long stime, stime_scaled, steal_time; + struct cpu_accounting_data *acct = get_accounting(tsk); + + stime = vtime_delta(tsk, &stime_scaled, &steal_time); - delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_system_time(tsk, 0, delta, sys_scaled); - if (stolen) - account_steal_time(stolen); + stime -= min(stime, steal_time); + acct->steal_time += steal_time; + + if ((tsk->flags & PF_VCPU) && !irq_count()) { + acct->gtime += stime; + acct->utime_scaled += stime_scaled; + } else { + if (hardirq_count()) + acct->hardirq_time += stime; + else if (in_serving_softirq()) + acct->softirq_time += stime; + else + acct->stime += stime; + + acct->stime_scaled += stime_scaled; + } } EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { - unsigned long delta, sys_scaled, stolen; + unsigned long stime, stime_scaled, steal_time; + struct cpu_accounting_data *acct = get_accounting(tsk); - delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_idle_time(delta + stolen); + stime = vtime_delta(tsk, &stime_scaled, &steal_time); + acct->idle_time += stime + steal_time; } /* - * Transfer the user time accumulated in the paca - * by the exception entry and exit code to the generic - * process user time records. + * Account the whole cputime accumulated in the paca * Must be called with interrupts disabled. * Assumes that vtime_account_system/idle() has been called * recently (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { - cputime_t utime, utimescaled; struct cpu_accounting_data *acct = get_accounting(tsk); - utime = acct->user_time; - utimescaled = acct->user_time_scaled; - acct->user_time = 0; - acct->user_time_scaled = 0; + if (acct->utime) + account_user_time(tsk, cputime_to_nsecs(acct->utime)); + + if (acct->utime_scaled) + tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled); + + if (acct->gtime) + account_guest_time(tsk, cputime_to_nsecs(acct->gtime)); + + if (acct->steal_time) + account_steal_time(cputime_to_nsecs(acct->steal_time)); + + if (acct->idle_time) + account_idle_time(cputime_to_nsecs(acct->idle_time)); + + if (acct->stime) + account_system_index_time(tsk, cputime_to_nsecs(acct->stime), + CPUTIME_SYSTEM); + if (acct->stime_scaled) + tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled); + + if (acct->hardirq_time) + account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time), + CPUTIME_IRQ); + if (acct->softirq_time) + account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time), + CPUTIME_SOFTIRQ); + + acct->utime = 0; + acct->utime_scaled = 0; acct->utime_sspurr = 0; - account_user_time(tsk, utime, utimescaled); + acct->gtime = 0; + acct->steal_time = 0; + acct->idle_time = 0; + acct->stime = 0; + acct->stime_scaled = 0; + acct->hardirq_time = 0; + acct->softirq_time = 0; } #ifdef CONFIG_PPC32 @@ -407,8 +429,7 @@ void arch_vtime_task_switch(struct task_struct *prev) struct cpu_accounting_data *acct = get_accounting(current); acct->starttime = get_accounting(prev)->starttime; - acct->system_time = 0; - acct->user_time = 0; + acct->startspurr = get_accounting(prev)->startspurr; } #endif /* CONFIG_PPC32 */ @@ -689,7 +710,7 @@ unsigned long long running_clock(void) * time and on a host which doesn't do any virtualisation TB *should* equal * VTB so it makes no difference anyway. */ - return local_clock() - cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]); + return local_clock() - kcpustat_this_cpu->cpustat[CPUTIME_STEAL]; } #endif @@ -802,18 +823,18 @@ void read_persistent_clock(struct timespec *ts) } /* clocksource code */ -static cycle_t rtc_read(struct clocksource *cs) +static u64 rtc_read(struct clocksource *cs) { - return (cycle_t)get_rtc(); + return (u64)get_rtc(); } -static cycle_t timebase_read(struct clocksource *cs) +static u64 timebase_read(struct clocksource *cs) { - return (cycle_t)get_tb(); + return (u64)get_tb(); } void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult, cycle_t cycle_last) + struct clocksource *clock, u32 mult, u64 cycle_last) { u64 new_tb_to_xs, new_stamp_xsec; u32 frac_sec; @@ -1018,7 +1039,6 @@ void __init time_init(void) tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; calc_cputime_factors(); - setup_cputime_one_jiffy(); /* * Compute scale factor for sched_clock. diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 023a462725b5..ff365f9de27a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/stddef.h> @@ -40,7 +41,7 @@ #include <asm/emulated_ops.h> #include <asm/pgtable.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/io.h> #include <asm/machdep.h> #include <asm/rtas.h> @@ -64,8 +65,9 @@ #include <asm/asm-prototypes.h> #include <asm/hmi.h> #include <sysdev/fsl_pci.h> +#include <asm/kprobes.h> -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly; int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly; @@ -122,9 +124,6 @@ static unsigned long oops_begin(struct pt_regs *regs) int cpu; unsigned long flags; - if (debugger(regs)) - return 1; - oops_enter(); /* racy, but better than risking deadlock. */ @@ -150,14 +149,15 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { bust_spinlocks(0); - die_owner = -1; add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); die_nest_count--; oops_exit(); printk("\n"); - if (!die_nest_count) + if (!die_nest_count) { /* Nest count reaches zero, release the lock. */ + die_owner = -1; arch_spin_unlock(&die_lock); + } raw_local_irq_restore(flags); crash_fadump(regs, "die oops"); @@ -227,8 +227,12 @@ NOKPROBE_SYMBOL(__die); void die(const char *str, struct pt_regs *regs, long err) { - unsigned long flags = oops_begin(regs); + unsigned long flags; + if (debugger(regs)) + return; + + flags = oops_begin(regs); if (__die(str, regs, err)) err = 0; oops_end(flags, regs, err); @@ -365,7 +369,7 @@ static inline int check_io_access(struct pt_regs *regs) (*nip & 0x100)? "OUT to": "IN from", regs->gpr[rb] - _IO_BASE, nip); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } } @@ -824,6 +828,9 @@ void single_step_exception(struct pt_regs *regs) clear_single_step(regs); + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) goto bail; @@ -1177,6 +1184,9 @@ void program_check_exception(struct pt_regs *regs) if (debugger_bpt(regs)) goto bail; + if (kprobe_handler(regs)) + goto bail; + /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) @@ -1430,7 +1440,6 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", - [FSCR_LM_LG] = "LM", }; char *facility = "unknown"; u64 value; @@ -1488,14 +1497,6 @@ void facility_unavailable_exception(struct pt_regs *regs) emulate_single_step(regs); } return; - } else if ((status == FSCR_LM_LG) && cpu_has_feature(CPU_FTR_ARCH_300)) { - /* - * This process has touched LM, so turn it on forever - * for this process - */ - current->thread.fscr |= FSCR_LM; - mtspr(SPRN_FSCR, current->thread.fscr); - return; } if (status == FSCR_TM_LG) { @@ -1519,7 +1520,8 @@ void facility_unavailable_exception(struct pt_regs *regs) return; } - if ((status < ARRAY_SIZE(facility_strings)) && + if ((hv || status >= 2) && + (status < ARRAY_SIZE(facility_strings)) && facility_strings[status]) facility = facility_strings[status]; @@ -1527,9 +1529,8 @@ void facility_unavailable_exception(struct pt_regs *regs) if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - pr_err_ratelimited( - "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", - hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); + pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n", + hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr); out: if (user_mode(regs)) { @@ -1754,6 +1755,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) return; } + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "block_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) { return; @@ -1768,6 +1772,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) /* Clear the instruction completion event */ mtspr(SPRN_DBSR, DBSR_IC); + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) { return; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 4111d30badfa..22b01a3962f0 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -736,16 +736,14 @@ static int __init vdso_init(void) if (firmware_has_feature(FW_FEATURE_LPAR)) vdso_data->platform |= 1; vdso_data->physicalMemorySize = memblock_phys_mem_size(); - vdso_data->dcache_size = ppc64_caches.dsize; - vdso_data->dcache_line_size = ppc64_caches.dline_size; - vdso_data->icache_size = ppc64_caches.isize; - vdso_data->icache_line_size = ppc64_caches.iline_size; - - /* XXXOJN: Blocks should be added to ppc64_caches and used instead */ - vdso_data->dcache_block_size = ppc64_caches.dline_size; - vdso_data->icache_block_size = ppc64_caches.iline_size; - vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size; - vdso_data->icache_log_block_size = ppc64_caches.log_iline_size; + vdso_data->dcache_size = ppc64_caches.l1d.size; + vdso_data->dcache_line_size = ppc64_caches.l1d.line_size; + vdso_data->icache_size = ppc64_caches.l1i.size; + vdso_data->icache_line_size = ppc64_caches.l1i.line_size; + vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; + vdso_data->icache_block_size = ppc64_caches.l1i.block_size; + vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; + vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; /* * Calculate the size of the 64 bits vDSO diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index c4bfadb2606b..2d8f6d8ccafc 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -7,7 +7,7 @@ #include <linux/sched.h> #include <asm/ptrace.h> #include <asm/processor.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> /* Functions in vector.S */ extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c deleted file mode 100644 index b3813ddb2fb4..000000000000 --- a/arch/powerpc/kernel/vio.c +++ /dev/null @@ -1,1702 +0,0 @@ -/* - * IBM PowerPC Virtual I/O Infrastructure Support. - * - * Copyright (c) 2003,2008 IBM Corp. - * Dave Engebretsen engebret@us.ibm.com - * Santiago Leon santil@us.ibm.com - * Hollis Blanchard <hollisb@us.ibm.com> - * Stephen Rothwell - * Robert Jennings <rcjenn@us.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpu.h> -#include <linux/types.h> -#include <linux/delay.h> -#include <linux/stat.h> -#include <linux/device.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/console.h> -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-mapping.h> -#include <linux/kobject.h> - -#include <asm/iommu.h> -#include <asm/dma.h> -#include <asm/vio.h> -#include <asm/prom.h> -#include <asm/firmware.h> -#include <asm/tce.h> -#include <asm/page.h> -#include <asm/hvcall.h> - -static struct vio_dev vio_bus_device = { /* fake "parent" device */ - .name = "vio", - .type = "", - .dev.init_name = "vio", - .dev.bus = &vio_bus_type, -}; - -#ifdef CONFIG_PPC_SMLPAR -/** - * vio_cmo_pool - A pool of IO memory for CMO use - * - * @size: The size of the pool in bytes - * @free: The amount of free memory in the pool - */ -struct vio_cmo_pool { - size_t size; - size_t free; -}; - -/* How many ms to delay queued balance work */ -#define VIO_CMO_BALANCE_DELAY 100 - -/* Portion out IO memory to CMO devices by this chunk size */ -#define VIO_CMO_BALANCE_CHUNK 131072 - -/** - * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement - * - * @vio_dev: struct vio_dev pointer - * @list: pointer to other devices on bus that are being tracked - */ -struct vio_cmo_dev_entry { - struct vio_dev *viodev; - struct list_head list; -}; - -/** - * vio_cmo - VIO bus accounting structure for CMO entitlement - * - * @lock: spinlock for entire structure - * @balance_q: work queue for balancing system entitlement - * @device_list: list of CMO-enabled devices requiring entitlement - * @entitled: total system entitlement in bytes - * @reserve: pool of memory from which devices reserve entitlement, incl. spare - * @excess: pool of excess entitlement not needed for device reserves or spare - * @spare: IO memory for device hotplug functionality - * @min: minimum necessary for system operation - * @desired: desired memory for system operation - * @curr: bytes currently allocated - * @high: high water mark for IO data usage - */ -static struct vio_cmo { - spinlock_t lock; - struct delayed_work balance_q; - struct list_head device_list; - size_t entitled; - struct vio_cmo_pool reserve; - struct vio_cmo_pool excess; - size_t spare; - size_t min; - size_t desired; - size_t curr; - size_t high; -} vio_cmo; - -/** - * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows - */ -static int vio_cmo_num_OF_devs(void) -{ - struct device_node *node_vroot; - int count = 0; - - /* - * Count the number of vdevice entries with an - * ibm,my-dma-window OF property - */ - node_vroot = of_find_node_by_name(NULL, "vdevice"); - if (node_vroot) { - struct device_node *of_node; - struct property *prop; - - for_each_child_of_node(node_vroot, of_node) { - prop = of_find_property(of_node, "ibm,my-dma-window", - NULL); - if (prop) - count++; - } - } - of_node_put(node_vroot); - return count; -} - -/** - * vio_cmo_alloc - allocate IO memory for CMO-enable devices - * - * @viodev: VIO device requesting IO memory - * @size: size of allocation requested - * - * Allocations come from memory reserved for the devices and any excess - * IO memory available to all devices. The spare pool used to service - * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be - * made available. - * - * Return codes: - * 0 for successful allocation and -ENOMEM for a failure - */ -static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size) -{ - unsigned long flags; - size_t reserve_free = 0; - size_t excess_free = 0; - int ret = -ENOMEM; - - spin_lock_irqsave(&vio_cmo.lock, flags); - - /* Determine the amount of free entitlement available in reserve */ - if (viodev->cmo.entitled > viodev->cmo.allocated) - reserve_free = viodev->cmo.entitled - viodev->cmo.allocated; - - /* If spare is not fulfilled, the excess pool can not be used. */ - if (vio_cmo.spare >= VIO_CMO_MIN_ENT) - excess_free = vio_cmo.excess.free; - - /* The request can be satisfied */ - if ((reserve_free + excess_free) >= size) { - vio_cmo.curr += size; - if (vio_cmo.curr > vio_cmo.high) - vio_cmo.high = vio_cmo.curr; - viodev->cmo.allocated += size; - size -= min(reserve_free, size); - vio_cmo.excess.free -= size; - ret = 0; - } - - spin_unlock_irqrestore(&vio_cmo.lock, flags); - return ret; -} - -/** - * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices - * @viodev: VIO device freeing IO memory - * @size: size of deallocation - * - * IO memory is freed by the device back to the correct memory pools. - * The spare pool is replenished first from either memory pool, then - * the reserve pool is used to reduce device entitlement, the excess - * pool is used to increase the reserve pool toward the desired entitlement - * target, and then the remaining memory is returned to the pools. - * - */ -static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size) -{ - unsigned long flags; - size_t spare_needed = 0; - size_t excess_freed = 0; - size_t reserve_freed = size; - size_t tmp; - int balance = 0; - - spin_lock_irqsave(&vio_cmo.lock, flags); - vio_cmo.curr -= size; - - /* Amount of memory freed from the excess pool */ - if (viodev->cmo.allocated > viodev->cmo.entitled) { - excess_freed = min(reserve_freed, (viodev->cmo.allocated - - viodev->cmo.entitled)); - reserve_freed -= excess_freed; - } - - /* Remove allocation from device */ - viodev->cmo.allocated -= (reserve_freed + excess_freed); - - /* Spare is a subset of the reserve pool, replenish it first. */ - spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare; - - /* - * Replenish the spare in the reserve pool from the excess pool. - * This moves entitlement into the reserve pool. - */ - if (spare_needed && excess_freed) { - tmp = min(excess_freed, spare_needed); - vio_cmo.excess.size -= tmp; - vio_cmo.reserve.size += tmp; - vio_cmo.spare += tmp; - excess_freed -= tmp; - spare_needed -= tmp; - balance = 1; - } - - /* - * Replenish the spare in the reserve pool from the reserve pool. - * This removes entitlement from the device down to VIO_CMO_MIN_ENT, - * if needed, and gives it to the spare pool. The amount of used - * memory in this pool does not change. - */ - if (spare_needed && reserve_freed) { - tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT)); - - vio_cmo.spare += tmp; - viodev->cmo.entitled -= tmp; - reserve_freed -= tmp; - spare_needed -= tmp; - balance = 1; - } - - /* - * Increase the reserve pool until the desired allocation is met. - * Move an allocation freed from the excess pool into the reserve - * pool and schedule a balance operation. - */ - if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) { - tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size)); - - vio_cmo.excess.size -= tmp; - vio_cmo.reserve.size += tmp; - excess_freed -= tmp; - balance = 1; - } - - /* Return memory from the excess pool to that pool */ - if (excess_freed) - vio_cmo.excess.free += excess_freed; - - if (balance) - schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY); - spin_unlock_irqrestore(&vio_cmo.lock, flags); -} - -/** - * vio_cmo_entitlement_update - Manage system entitlement changes - * - * @new_entitlement: new system entitlement to attempt to accommodate - * - * Increases in entitlement will be used to fulfill the spare entitlement - * and the rest is given to the excess pool. Decreases, if they are - * possible, come from the excess pool and from unused device entitlement - * - * Returns: 0 on success, -ENOMEM when change can not be made - */ -int vio_cmo_entitlement_update(size_t new_entitlement) -{ - struct vio_dev *viodev; - struct vio_cmo_dev_entry *dev_ent; - unsigned long flags; - size_t avail, delta, tmp; - - spin_lock_irqsave(&vio_cmo.lock, flags); - - /* Entitlement increases */ - if (new_entitlement > vio_cmo.entitled) { - delta = new_entitlement - vio_cmo.entitled; - - /* Fulfill spare allocation */ - if (vio_cmo.spare < VIO_CMO_MIN_ENT) { - tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare)); - vio_cmo.spare += tmp; - vio_cmo.reserve.size += tmp; - delta -= tmp; - } - - /* Remaining new allocation goes to the excess pool */ - vio_cmo.entitled += delta; - vio_cmo.excess.size += delta; - vio_cmo.excess.free += delta; - - goto out; - } - - /* Entitlement decreases */ - delta = vio_cmo.entitled - new_entitlement; - avail = vio_cmo.excess.free; - - /* - * Need to check how much unused entitlement each device can - * sacrifice to fulfill entitlement change. - */ - list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { - if (avail >= delta) - break; - - viodev = dev_ent->viodev; - if ((viodev->cmo.entitled > viodev->cmo.allocated) && - (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) - avail += viodev->cmo.entitled - - max_t(size_t, viodev->cmo.allocated, - VIO_CMO_MIN_ENT); - } - - if (delta <= avail) { - vio_cmo.entitled -= delta; - - /* Take entitlement from the excess pool first */ - tmp = min(vio_cmo.excess.free, delta); - vio_cmo.excess.size -= tmp; - vio_cmo.excess.free -= tmp; - delta -= tmp; - - /* - * Remove all but VIO_CMO_MIN_ENT bytes from devices - * until entitlement change is served - */ - list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { - if (!delta) - break; - - viodev = dev_ent->viodev; - tmp = 0; - if ((viodev->cmo.entitled > viodev->cmo.allocated) && - (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) - tmp = viodev->cmo.entitled - - max_t(size_t, viodev->cmo.allocated, - VIO_CMO_MIN_ENT); - viodev->cmo.entitled -= min(tmp, delta); - delta -= min(tmp, delta); - } - } else { - spin_unlock_irqrestore(&vio_cmo.lock, flags); - return -ENOMEM; - } - -out: - schedule_delayed_work(&vio_cmo.balance_q, 0); - spin_unlock_irqrestore(&vio_cmo.lock, flags); - return 0; -} - -/** - * vio_cmo_balance - Balance entitlement among devices - * - * @work: work queue structure for this operation - * - * Any system entitlement above the minimum needed for devices, or - * already allocated to devices, can be distributed to the devices. - * The list of devices is iterated through to recalculate the desired - * entitlement level and to determine how much entitlement above the - * minimum entitlement is allocated to devices. - * - * Small chunks of the available entitlement are given to devices until - * their requirements are fulfilled or there is no entitlement left to give. - * Upon completion sizes of the reserve and excess pools are calculated. - * - * The system minimum entitlement level is also recalculated here. - * Entitlement will be reserved for devices even after vio_bus_remove to - * accommodate reloading the driver. The OF tree is walked to count the - * number of devices present and this will remove entitlement for devices - * that have actually left the system after having vio_bus_remove called. - */ -static void vio_cmo_balance(struct work_struct *work) -{ - struct vio_cmo *cmo; - struct vio_dev *viodev; - struct vio_cmo_dev_entry *dev_ent; - unsigned long flags; - size_t avail = 0, level, chunk, need; - int devcount = 0, fulfilled; - - cmo = container_of(work, struct vio_cmo, balance_q.work); - - spin_lock_irqsave(&vio_cmo.lock, flags); - - /* Calculate minimum entitlement and fulfill spare */ - cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT; - BUG_ON(cmo->min > cmo->entitled); - cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min)); - cmo->min += cmo->spare; - cmo->desired = cmo->min; - - /* - * Determine how much entitlement is available and reset device - * entitlements - */ - avail = cmo->entitled - cmo->spare; - list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { - viodev = dev_ent->viodev; - devcount++; - viodev->cmo.entitled = VIO_CMO_MIN_ENT; - cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT); - avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT); - } - - /* - * Having provided each device with the minimum entitlement, loop - * over the devices portioning out the remaining entitlement - * until there is nothing left. - */ - level = VIO_CMO_MIN_ENT; - while (avail) { - fulfilled = 0; - list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { - viodev = dev_ent->viodev; - - if (viodev->cmo.desired <= level) { - fulfilled++; - continue; - } - - /* - * Give the device up to VIO_CMO_BALANCE_CHUNK - * bytes of entitlement, but do not exceed the - * desired level of entitlement for the device. - */ - chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK); - chunk = min(chunk, (viodev->cmo.desired - - viodev->cmo.entitled)); - viodev->cmo.entitled += chunk; - - /* - * If the memory for this entitlement increase was - * already allocated to the device it does not come - * from the available pool being portioned out. - */ - need = max(viodev->cmo.allocated, viodev->cmo.entitled)- - max(viodev->cmo.allocated, level); - avail -= need; - - } - if (fulfilled == devcount) - break; - level += VIO_CMO_BALANCE_CHUNK; - } - - /* Calculate new reserve and excess pool sizes */ - cmo->reserve.size = cmo->min; - cmo->excess.free = 0; - cmo->excess.size = 0; - need = 0; - list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { - viodev = dev_ent->viodev; - /* Calculated reserve size above the minimum entitlement */ - if (viodev->cmo.entitled) - cmo->reserve.size += (viodev->cmo.entitled - - VIO_CMO_MIN_ENT); - /* Calculated used excess entitlement */ - if (viodev->cmo.allocated > viodev->cmo.entitled) - need += viodev->cmo.allocated - viodev->cmo.entitled; - } - cmo->excess.size = cmo->entitled - cmo->reserve.size; - cmo->excess.free = cmo->excess.size - need; - - cancel_delayed_work(to_delayed_work(work)); - spin_unlock_irqrestore(&vio_cmo.lock, flags); -} - -static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - struct vio_dev *viodev = to_vio_dev(dev); - void *ret; - - if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) { - atomic_inc(&viodev->cmo.allocs_failed); - return NULL; - } - - ret = dma_iommu_ops.alloc(dev, size, dma_handle, flag, attrs); - if (unlikely(ret == NULL)) { - vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE)); - atomic_inc(&viodev->cmo.allocs_failed); - } - - return ret; -} - -static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - struct vio_dev *viodev = to_vio_dev(dev); - - dma_iommu_ops.free(dev, size, vaddr, dma_handle, attrs); - - vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE)); -} - -static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct iommu_table *tbl; - dma_addr_t ret = DMA_ERROR_CODE; - - tbl = get_iommu_table_base(dev); - if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) { - atomic_inc(&viodev->cmo.allocs_failed); - return ret; - } - - ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs); - if (unlikely(dma_mapping_error(dev, ret))) { - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); - atomic_inc(&viodev->cmo.allocs_failed); - } - - return ret; -} - -static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, - size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct iommu_table *tbl; - - tbl = get_iommu_table_base(dev); - dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); - - vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); -} - -static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction, - unsigned long attrs) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct iommu_table *tbl; - struct scatterlist *sgl; - int ret, count; - size_t alloc_size = 0; - - tbl = get_iommu_table_base(dev); - for_each_sg(sglist, sgl, nelems, count) - alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl)); - - if (vio_cmo_alloc(viodev, alloc_size)) { - atomic_inc(&viodev->cmo.allocs_failed); - return 0; - } - - ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs); - - if (unlikely(!ret)) { - vio_cmo_dealloc(viodev, alloc_size); - atomic_inc(&viodev->cmo.allocs_failed); - return ret; - } - - for_each_sg(sglist, sgl, ret, count) - alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); - if (alloc_size) - vio_cmo_dealloc(viodev, alloc_size); - - return ret; -} - -static void vio_dma_iommu_unmap_sg(struct device *dev, - struct scatterlist *sglist, int nelems, - enum dma_data_direction direction, - unsigned long attrs) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct iommu_table *tbl; - struct scatterlist *sgl; - size_t alloc_size = 0; - int count; - - tbl = get_iommu_table_base(dev); - for_each_sg(sglist, sgl, nelems, count) - alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); - - dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); - - vio_cmo_dealloc(viodev, alloc_size); -} - -static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask) -{ - return dma_iommu_ops.dma_supported(dev, mask); -} - -static u64 vio_dma_get_required_mask(struct device *dev) -{ - return dma_iommu_ops.get_required_mask(dev); -} - -static struct dma_map_ops vio_dma_mapping_ops = { - .alloc = vio_dma_iommu_alloc_coherent, - .free = vio_dma_iommu_free_coherent, - .mmap = dma_direct_mmap_coherent, - .map_sg = vio_dma_iommu_map_sg, - .unmap_sg = vio_dma_iommu_unmap_sg, - .map_page = vio_dma_iommu_map_page, - .unmap_page = vio_dma_iommu_unmap_page, - .dma_supported = vio_dma_iommu_dma_supported, - .get_required_mask = vio_dma_get_required_mask, -}; - -/** - * vio_cmo_set_dev_desired - Set desired entitlement for a device - * - * @viodev: struct vio_dev for device to alter - * @desired: new desired entitlement level in bytes - * - * For use by devices to request a change to their entitlement at runtime or - * through sysfs. The desired entitlement level is changed and a balancing - * of system resources is scheduled to run in the future. - */ -void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) -{ - unsigned long flags; - struct vio_cmo_dev_entry *dev_ent; - int found = 0; - - if (!firmware_has_feature(FW_FEATURE_CMO)) - return; - - spin_lock_irqsave(&vio_cmo.lock, flags); - if (desired < VIO_CMO_MIN_ENT) - desired = VIO_CMO_MIN_ENT; - - /* - * Changes will not be made for devices not in the device list. - * If it is not in the device list, then no driver is loaded - * for the device and it can not receive entitlement. - */ - list_for_each_entry(dev_ent, &vio_cmo.device_list, list) - if (viodev == dev_ent->viodev) { - found = 1; - break; - } - if (!found) { - spin_unlock_irqrestore(&vio_cmo.lock, flags); - return; - } - - /* Increase/decrease in desired device entitlement */ - if (desired >= viodev->cmo.desired) { - /* Just bump the bus and device values prior to a balance*/ - vio_cmo.desired += desired - viodev->cmo.desired; - viodev->cmo.desired = desired; - } else { - /* Decrease bus and device values for desired entitlement */ - vio_cmo.desired -= viodev->cmo.desired - desired; - viodev->cmo.desired = desired; - /* - * If less entitlement is desired than current entitlement, move - * any reserve memory in the change region to the excess pool. - */ - if (viodev->cmo.entitled > desired) { - vio_cmo.reserve.size -= viodev->cmo.entitled - desired; - vio_cmo.excess.size += viodev->cmo.entitled - desired; - /* - * If entitlement moving from the reserve pool to the - * excess pool is currently unused, add to the excess - * free counter. - */ - if (viodev->cmo.allocated < viodev->cmo.entitled) - vio_cmo.excess.free += viodev->cmo.entitled - - max(viodev->cmo.allocated, desired); - viodev->cmo.entitled = desired; - } - } - schedule_delayed_work(&vio_cmo.balance_q, 0); - spin_unlock_irqrestore(&vio_cmo.lock, flags); -} - -/** - * vio_cmo_bus_probe - Handle CMO specific bus probe activities - * - * @viodev - Pointer to struct vio_dev for device - * - * Determine the devices IO memory entitlement needs, attempting - * to satisfy the system minimum entitlement at first and scheduling - * a balance operation to take care of the rest at a later time. - * - * Returns: 0 on success, -EINVAL when device doesn't support CMO, and - * -ENOMEM when entitlement is not available for device or - * device entry. - * - */ -static int vio_cmo_bus_probe(struct vio_dev *viodev) -{ - struct vio_cmo_dev_entry *dev_ent; - struct device *dev = &viodev->dev; - struct iommu_table *tbl; - struct vio_driver *viodrv = to_vio_driver(dev->driver); - unsigned long flags; - size_t size; - bool dma_capable = false; - - tbl = get_iommu_table_base(dev); - - /* A device requires entitlement if it has a DMA window property */ - switch (viodev->family) { - case VDEVICE: - if (of_get_property(viodev->dev.of_node, - "ibm,my-dma-window", NULL)) - dma_capable = true; - break; - case PFO: - dma_capable = false; - break; - default: - dev_warn(dev, "unknown device family: %d\n", viodev->family); - BUG(); - break; - } - - /* Configure entitlement for the device. */ - if (dma_capable) { - /* Check that the driver is CMO enabled and get desired DMA */ - if (!viodrv->get_desired_dma) { - dev_err(dev, "%s: device driver does not support CMO\n", - __func__); - return -EINVAL; - } - - viodev->cmo.desired = - IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl); - if (viodev->cmo.desired < VIO_CMO_MIN_ENT) - viodev->cmo.desired = VIO_CMO_MIN_ENT; - size = VIO_CMO_MIN_ENT; - - dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry), - GFP_KERNEL); - if (!dev_ent) - return -ENOMEM; - - dev_ent->viodev = viodev; - spin_lock_irqsave(&vio_cmo.lock, flags); - list_add(&dev_ent->list, &vio_cmo.device_list); - } else { - viodev->cmo.desired = 0; - size = 0; - spin_lock_irqsave(&vio_cmo.lock, flags); - } - - /* - * If the needs for vio_cmo.min have not changed since they - * were last set, the number of devices in the OF tree has - * been constant and the IO memory for this is already in - * the reserve pool. - */ - if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) * - VIO_CMO_MIN_ENT)) { - /* Updated desired entitlement if device requires it */ - if (size) - vio_cmo.desired += (viodev->cmo.desired - - VIO_CMO_MIN_ENT); - } else { - size_t tmp; - - tmp = vio_cmo.spare + vio_cmo.excess.free; - if (tmp < size) { - dev_err(dev, "%s: insufficient free " - "entitlement to add device. " - "Need %lu, have %lu\n", __func__, - size, (vio_cmo.spare + tmp)); - spin_unlock_irqrestore(&vio_cmo.lock, flags); - return -ENOMEM; - } - - /* Use excess pool first to fulfill request */ - tmp = min(size, vio_cmo.excess.free); - vio_cmo.excess.free -= tmp; - vio_cmo.excess.size -= tmp; - vio_cmo.reserve.size += tmp; - - /* Use spare if excess pool was insufficient */ - vio_cmo.spare -= size - tmp; - - /* Update bus accounting */ - vio_cmo.min += size; - vio_cmo.desired += viodev->cmo.desired; - } - spin_unlock_irqrestore(&vio_cmo.lock, flags); - return 0; -} - -/** - * vio_cmo_bus_remove - Handle CMO specific bus removal activities - * - * @viodev - Pointer to struct vio_dev for device - * - * Remove the device from the cmo device list. The minimum entitlement - * will be reserved for the device as long as it is in the system. The - * rest of the entitlement the device had been allocated will be returned - * to the system. - */ -static void vio_cmo_bus_remove(struct vio_dev *viodev) -{ - struct vio_cmo_dev_entry *dev_ent; - unsigned long flags; - size_t tmp; - - spin_lock_irqsave(&vio_cmo.lock, flags); - if (viodev->cmo.allocated) { - dev_err(&viodev->dev, "%s: device had %lu bytes of IO " - "allocated after remove operation.\n", - __func__, viodev->cmo.allocated); - BUG(); - } - - /* - * Remove the device from the device list being maintained for - * CMO enabled devices. - */ - list_for_each_entry(dev_ent, &vio_cmo.device_list, list) - if (viodev == dev_ent->viodev) { - list_del(&dev_ent->list); - kfree(dev_ent); - break; - } - - /* - * Devices may not require any entitlement and they do not need - * to be processed. Otherwise, return the device's entitlement - * back to the pools. - */ - if (viodev->cmo.entitled) { - /* - * This device has not yet left the OF tree, it's - * minimum entitlement remains in vio_cmo.min and - * vio_cmo.desired - */ - vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT); - - /* - * Save min allocation for device in reserve as long - * as it exists in OF tree as determined by later - * balance operation - */ - viodev->cmo.entitled -= VIO_CMO_MIN_ENT; - - /* Replenish spare from freed reserve pool */ - if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) { - tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT - - vio_cmo.spare)); - vio_cmo.spare += tmp; - viodev->cmo.entitled -= tmp; - } - - /* Remaining reserve goes to excess pool */ - vio_cmo.excess.size += viodev->cmo.entitled; - vio_cmo.excess.free += viodev->cmo.entitled; - vio_cmo.reserve.size -= viodev->cmo.entitled; - - /* - * Until the device is removed it will keep a - * minimum entitlement; this will guarantee that - * a module unload/load will result in a success. - */ - viodev->cmo.entitled = VIO_CMO_MIN_ENT; - viodev->cmo.desired = VIO_CMO_MIN_ENT; - atomic_set(&viodev->cmo.allocs_failed, 0); - } - - spin_unlock_irqrestore(&vio_cmo.lock, flags); -} - -static void vio_cmo_set_dma_ops(struct vio_dev *viodev) -{ - set_dma_ops(&viodev->dev, &vio_dma_mapping_ops); -} - -/** - * vio_cmo_bus_init - CMO entitlement initialization at bus init time - * - * Set up the reserve and excess entitlement pools based on available - * system entitlement and the number of devices in the OF tree that - * require entitlement in the reserve pool. - */ -static void vio_cmo_bus_init(void) -{ - struct hvcall_mpp_data mpp_data; - int err; - - memset(&vio_cmo, 0, sizeof(struct vio_cmo)); - spin_lock_init(&vio_cmo.lock); - INIT_LIST_HEAD(&vio_cmo.device_list); - INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance); - - /* Get current system entitlement */ - err = h_get_mpp(&mpp_data); - - /* - * On failure, continue with entitlement set to 0, will panic() - * later when spare is reserved. - */ - if (err != H_SUCCESS) { - printk(KERN_ERR "%s: unable to determine system IO "\ - "entitlement. (%d)\n", __func__, err); - vio_cmo.entitled = 0; - } else { - vio_cmo.entitled = mpp_data.entitled_mem; - } - - /* Set reservation and check against entitlement */ - vio_cmo.spare = VIO_CMO_MIN_ENT; - vio_cmo.reserve.size = vio_cmo.spare; - vio_cmo.reserve.size += (vio_cmo_num_OF_devs() * - VIO_CMO_MIN_ENT); - if (vio_cmo.reserve.size > vio_cmo.entitled) { - printk(KERN_ERR "%s: insufficient system entitlement\n", - __func__); - panic("%s: Insufficient system entitlement", __func__); - } - - /* Set the remaining accounting variables */ - vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size; - vio_cmo.excess.free = vio_cmo.excess.size; - vio_cmo.min = vio_cmo.reserve.size; - vio_cmo.desired = vio_cmo.reserve.size; -} - -/* sysfs device functions and data structures for CMO */ - -#define viodev_cmo_rd_attr(name) \ -static ssize_t viodev_cmo_##name##_show(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ -{ \ - return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \ -} - -static ssize_t viodev_cmo_allocs_failed_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct vio_dev *viodev = to_vio_dev(dev); - return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed)); -} - -static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct vio_dev *viodev = to_vio_dev(dev); - atomic_set(&viodev->cmo.allocs_failed, 0); - return count; -} - -static ssize_t viodev_cmo_desired_set(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct vio_dev *viodev = to_vio_dev(dev); - size_t new_desired; - int ret; - - ret = kstrtoul(buf, 10, &new_desired); - if (ret) - return ret; - - vio_cmo_set_dev_desired(viodev, new_desired); - return count; -} - -viodev_cmo_rd_attr(desired); -viodev_cmo_rd_attr(entitled); -viodev_cmo_rd_attr(allocated); - -static ssize_t name_show(struct device *, struct device_attribute *, char *); -static ssize_t devspec_show(struct device *, struct device_attribute *, char *); -static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, - char *buf); -static struct device_attribute vio_cmo_dev_attrs[] = { - __ATTR_RO(name), - __ATTR_RO(devspec), - __ATTR_RO(modalias), - __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, - viodev_cmo_desired_show, viodev_cmo_desired_set), - __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL), - __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL), - __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, - viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset), - __ATTR_NULL -}; - -/* sysfs bus functions and data structures for CMO */ - -#define viobus_cmo_rd_attr(name) \ -static ssize_t cmo_##name##_show(struct bus_type *bt, char *buf) \ -{ \ - return sprintf(buf, "%lu\n", vio_cmo.name); \ -} \ -static BUS_ATTR_RO(cmo_##name) - -#define viobus_cmo_pool_rd_attr(name, var) \ -static ssize_t \ -cmo_##name##_##var##_show(struct bus_type *bt, char *buf) \ -{ \ - return sprintf(buf, "%lu\n", vio_cmo.name.var); \ -} \ -static BUS_ATTR_RO(cmo_##name##_##var) - -viobus_cmo_rd_attr(entitled); -viobus_cmo_rd_attr(spare); -viobus_cmo_rd_attr(min); -viobus_cmo_rd_attr(desired); -viobus_cmo_rd_attr(curr); -viobus_cmo_pool_rd_attr(reserve, size); -viobus_cmo_pool_rd_attr(excess, size); -viobus_cmo_pool_rd_attr(excess, free); - -static ssize_t cmo_high_show(struct bus_type *bt, char *buf) -{ - return sprintf(buf, "%lu\n", vio_cmo.high); -} - -static ssize_t cmo_high_store(struct bus_type *bt, const char *buf, - size_t count) -{ - unsigned long flags; - - spin_lock_irqsave(&vio_cmo.lock, flags); - vio_cmo.high = vio_cmo.curr; - spin_unlock_irqrestore(&vio_cmo.lock, flags); - - return count; -} -static BUS_ATTR_RW(cmo_high); - -static struct attribute *vio_bus_attrs[] = { - &bus_attr_cmo_entitled.attr, - &bus_attr_cmo_spare.attr, - &bus_attr_cmo_min.attr, - &bus_attr_cmo_desired.attr, - &bus_attr_cmo_curr.attr, - &bus_attr_cmo_high.attr, - &bus_attr_cmo_reserve_size.attr, - &bus_attr_cmo_excess_size.attr, - &bus_attr_cmo_excess_free.attr, - NULL, -}; -ATTRIBUTE_GROUPS(vio_bus); - -static void vio_cmo_sysfs_init(void) -{ - vio_bus_type.dev_attrs = vio_cmo_dev_attrs; - vio_bus_type.bus_groups = vio_bus_groups; -} -#else /* CONFIG_PPC_SMLPAR */ -int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } -void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} -static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } -static void vio_cmo_bus_remove(struct vio_dev *viodev) {} -static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {} -static void vio_cmo_bus_init(void) {} -static void vio_cmo_sysfs_init(void) { } -#endif /* CONFIG_PPC_SMLPAR */ -EXPORT_SYMBOL(vio_cmo_entitlement_update); -EXPORT_SYMBOL(vio_cmo_set_dev_desired); - - -/* - * Platform Facilities Option (PFO) support - */ - -/** - * vio_h_cop_sync - Perform a synchronous PFO co-processor operation - * - * @vdev - Pointer to a struct vio_dev for device - * @op - Pointer to a struct vio_pfo_op for the operation parameters - * - * Calls the hypervisor to synchronously perform the PFO operation - * described in @op. In the case of a busy response from the hypervisor, - * the operation will be re-submitted indefinitely unless a non-zero timeout - * is specified or an error occurs. The timeout places a limit on when to - * stop re-submitting a operation, the total time can be exceeded if an - * operation is in progress. - * - * If op->hcall_ret is not NULL, this will be set to the return from the - * last h_cop_op call or it will be 0 if an error not involving the h_call - * was encountered. - * - * Returns: - * 0 on success, - * -EINVAL if the h_call fails due to an invalid parameter, - * -E2BIG if the h_call can not be performed synchronously, - * -EBUSY if a timeout is specified and has elapsed, - * -EACCES if the memory area for data/status has been rescinded, or - * -EPERM if a hardware fault has been indicated - */ -int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op) -{ - struct device *dev = &vdev->dev; - unsigned long deadline = 0; - long hret = 0; - int ret = 0; - - if (op->timeout) - deadline = jiffies + msecs_to_jiffies(op->timeout); - - while (true) { - hret = plpar_hcall_norets(H_COP, op->flags, - vdev->resource_id, - op->in, op->inlen, op->out, - op->outlen, op->csbcpb); - - if (hret == H_SUCCESS || - (hret != H_NOT_ENOUGH_RESOURCES && - hret != H_BUSY && hret != H_RESOURCE) || - (op->timeout && time_after(deadline, jiffies))) - break; - - dev_dbg(dev, "%s: hcall ret(%ld), retrying.\n", __func__, hret); - } - - switch (hret) { - case H_SUCCESS: - ret = 0; - break; - case H_OP_MODE: - case H_TOO_BIG: - ret = -E2BIG; - break; - case H_RESCINDED: - ret = -EACCES; - break; - case H_HARDWARE: - ret = -EPERM; - break; - case H_NOT_ENOUGH_RESOURCES: - case H_RESOURCE: - case H_BUSY: - ret = -EBUSY; - break; - default: - ret = -EINVAL; - break; - } - - if (ret) - dev_dbg(dev, "%s: Sync h_cop_op failure (ret:%d) (hret:%ld)\n", - __func__, ret, hret); - - op->hcall_err = hret; - return ret; -} -EXPORT_SYMBOL(vio_h_cop_sync); - -static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) -{ - const __be32 *dma_window; - struct iommu_table *tbl; - unsigned long offset, size; - - dma_window = of_get_property(dev->dev.of_node, - "ibm,my-dma-window", NULL); - if (!dma_window) - return NULL; - - tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); - if (tbl == NULL) - return NULL; - - of_parse_dma_window(dev->dev.of_node, dma_window, - &tbl->it_index, &offset, &size); - - /* TCE table size - measured in tce entries */ - tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; - tbl->it_size = size >> tbl->it_page_shift; - /* offset for VIO should always be 0 */ - tbl->it_offset = offset >> tbl->it_page_shift; - tbl->it_busno = 0; - tbl->it_type = TCE_VB; - tbl->it_blocksize = 16; - - if (firmware_has_feature(FW_FEATURE_LPAR)) - tbl->it_ops = &iommu_table_lpar_multi_ops; - else - tbl->it_ops = &iommu_table_pseries_ops; - - return iommu_init_table(tbl, -1); -} - -/** - * vio_match_device: - Tell if a VIO device has a matching - * VIO device id structure. - * @ids: array of VIO device id structures to search in - * @dev: the VIO device structure to match against - * - * Used by a driver to check whether a VIO device present in the - * system is in its list of supported devices. Returns the matching - * vio_device_id structure or NULL if there is no match. - */ -static const struct vio_device_id *vio_match_device( - const struct vio_device_id *ids, const struct vio_dev *dev) -{ - while (ids->type[0] != '\0') { - if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) && - of_device_is_compatible(dev->dev.of_node, - ids->compat)) - return ids; - ids++; - } - return NULL; -} - -/* - * Convert from struct device to struct vio_dev and pass to driver. - * dev->driver has already been set by generic code because vio_bus_match - * succeeded. - */ -static int vio_bus_probe(struct device *dev) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct vio_driver *viodrv = to_vio_driver(dev->driver); - const struct vio_device_id *id; - int error = -ENODEV; - - if (!viodrv->probe) - return error; - - id = vio_match_device(viodrv->id_table, viodev); - if (id) { - memset(&viodev->cmo, 0, sizeof(viodev->cmo)); - if (firmware_has_feature(FW_FEATURE_CMO)) { - error = vio_cmo_bus_probe(viodev); - if (error) - return error; - } - error = viodrv->probe(viodev, id); - if (error && firmware_has_feature(FW_FEATURE_CMO)) - vio_cmo_bus_remove(viodev); - } - - return error; -} - -/* convert from struct device to struct vio_dev and pass to driver. */ -static int vio_bus_remove(struct device *dev) -{ - struct vio_dev *viodev = to_vio_dev(dev); - struct vio_driver *viodrv = to_vio_driver(dev->driver); - struct device *devptr; - int ret = 1; - - /* - * Hold a reference to the device after the remove function is called - * to allow for CMO accounting cleanup for the device. - */ - devptr = get_device(dev); - - if (viodrv->remove) - ret = viodrv->remove(viodev); - - if (!ret && firmware_has_feature(FW_FEATURE_CMO)) - vio_cmo_bus_remove(viodev); - - put_device(devptr); - return ret; -} - -/** - * vio_register_driver: - Register a new vio driver - * @viodrv: The vio_driver structure to be registered. - */ -int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, - const char *mod_name) -{ - pr_debug("%s: driver %s registering\n", __func__, viodrv->name); - - /* fill in 'struct driver' fields */ - viodrv->driver.name = viodrv->name; - viodrv->driver.pm = viodrv->pm; - viodrv->driver.bus = &vio_bus_type; - viodrv->driver.owner = owner; - viodrv->driver.mod_name = mod_name; - - return driver_register(&viodrv->driver); -} -EXPORT_SYMBOL(__vio_register_driver); - -/** - * vio_unregister_driver - Remove registration of vio driver. - * @viodrv: The vio_driver struct to be removed form registration - */ -void vio_unregister_driver(struct vio_driver *viodrv) -{ - driver_unregister(&viodrv->driver); -} -EXPORT_SYMBOL(vio_unregister_driver); - -/* vio_dev refcount hit 0 */ -static void vio_dev_release(struct device *dev) -{ - struct iommu_table *tbl = get_iommu_table_base(dev); - - if (tbl) - iommu_free_table(tbl, of_node_full_name(dev->of_node)); - of_node_put(dev->of_node); - kfree(to_vio_dev(dev)); -} - -/** - * vio_register_device_node: - Register a new vio device. - * @of_node: The OF node for this device. - * - * Creates and initializes a vio_dev structure from the data in - * of_node and adds it to the list of virtual devices. - * Returns a pointer to the created vio_dev or NULL if node has - * NULL device_type or compatible fields. - */ -struct vio_dev *vio_register_device_node(struct device_node *of_node) -{ - struct vio_dev *viodev; - struct device_node *parent_node; - const __be32 *prop; - enum vio_dev_family family; - const char *of_node_name = of_node->name ? of_node->name : "<unknown>"; - - /* - * Determine if this node is a under the /vdevice node or under the - * /ibm,platform-facilities node. This decides the device's family. - */ - parent_node = of_get_parent(of_node); - if (parent_node) { - if (!strcmp(parent_node->full_name, "/ibm,platform-facilities")) - family = PFO; - else if (!strcmp(parent_node->full_name, "/vdevice")) - family = VDEVICE; - else { - pr_warn("%s: parent(%s) of %s not recognized.\n", - __func__, - parent_node->full_name, - of_node_name); - of_node_put(parent_node); - return NULL; - } - of_node_put(parent_node); - } else { - pr_warn("%s: could not determine the parent of node %s.\n", - __func__, of_node_name); - return NULL; - } - - if (family == PFO) { - if (of_get_property(of_node, "interrupt-controller", NULL)) { - pr_debug("%s: Skipping the interrupt controller %s.\n", - __func__, of_node_name); - return NULL; - } - } - - /* allocate a vio_dev for this node */ - viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL); - if (viodev == NULL) { - pr_warn("%s: allocation failure for VIO device.\n", __func__); - return NULL; - } - - /* we need the 'device_type' property, in order to match with drivers */ - viodev->family = family; - if (viodev->family == VDEVICE) { - unsigned int unit_address; - - if (of_node->type != NULL) - viodev->type = of_node->type; - else { - pr_warn("%s: node %s is missing the 'device_type' " - "property.\n", __func__, of_node_name); - goto out; - } - - prop = of_get_property(of_node, "reg", NULL); - if (prop == NULL) { - pr_warn("%s: node %s missing 'reg'\n", - __func__, of_node_name); - goto out; - } - unit_address = of_read_number(prop, 1); - dev_set_name(&viodev->dev, "%x", unit_address); - viodev->irq = irq_of_parse_and_map(of_node, 0); - viodev->unit_address = unit_address; - } else { - /* PFO devices need their resource_id for submitting COP_OPs - * This is an optional field for devices, but is required when - * performing synchronous ops */ - prop = of_get_property(of_node, "ibm,resource-id", NULL); - if (prop != NULL) - viodev->resource_id = of_read_number(prop, 1); - - dev_set_name(&viodev->dev, "%s", of_node_name); - viodev->type = of_node_name; - viodev->irq = 0; - } - - viodev->name = of_node->name; - viodev->dev.of_node = of_node_get(of_node); - - set_dev_node(&viodev->dev, of_node_to_nid(of_node)); - - /* init generic 'struct device' fields: */ - viodev->dev.parent = &vio_bus_device.dev; - viodev->dev.bus = &vio_bus_type; - viodev->dev.release = vio_dev_release; - - if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) { - if (firmware_has_feature(FW_FEATURE_CMO)) - vio_cmo_set_dma_ops(viodev); - else - set_dma_ops(&viodev->dev, &dma_iommu_ops); - - set_iommu_table_base(&viodev->dev, - vio_build_iommu_table(viodev)); - - /* needed to ensure proper operation of coherent allocations - * later, in case driver doesn't set it explicitly */ - viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64); - viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask; - } - - /* register with generic device framework */ - if (device_register(&viodev->dev)) { - printk(KERN_ERR "%s: failed to register device %s\n", - __func__, dev_name(&viodev->dev)); - put_device(&viodev->dev); - return NULL; - } - - return viodev; - -out: /* Use this exit point for any return prior to device_register */ - kfree(viodev); - - return NULL; -} -EXPORT_SYMBOL(vio_register_device_node); - -/* - * vio_bus_scan_for_devices - Scan OF and register each child device - * @root_name - OF node name for the root of the subtree to search. - * This must be non-NULL - * - * Starting from the root node provide, register the device node for - * each child beneath the root. - */ -static void vio_bus_scan_register_devices(char *root_name) -{ - struct device_node *node_root, *node_child; - - if (!root_name) - return; - - node_root = of_find_node_by_name(NULL, root_name); - if (node_root) { - - /* - * Create struct vio_devices for each virtual device in - * the device tree. Drivers will associate with them later. - */ - node_child = of_get_next_child(node_root, NULL); - while (node_child) { - vio_register_device_node(node_child); - node_child = of_get_next_child(node_root, node_child); - } - of_node_put(node_root); - } -} - -/** - * vio_bus_init: - Initialize the virtual IO bus - */ -static int __init vio_bus_init(void) -{ - int err; - - if (firmware_has_feature(FW_FEATURE_CMO)) - vio_cmo_sysfs_init(); - - err = bus_register(&vio_bus_type); - if (err) { - printk(KERN_ERR "failed to register VIO bus\n"); - return err; - } - - /* - * The fake parent of all vio devices, just to give us - * a nice directory - */ - err = device_register(&vio_bus_device.dev); - if (err) { - printk(KERN_WARNING "%s: device_register returned %i\n", - __func__, err); - return err; - } - - if (firmware_has_feature(FW_FEATURE_CMO)) - vio_cmo_bus_init(); - - return 0; -} -postcore_initcall(vio_bus_init); - -static int __init vio_device_init(void) -{ - vio_bus_scan_register_devices("vdevice"); - vio_bus_scan_register_devices("ibm,platform-facilities"); - - return 0; -} -device_initcall(vio_device_init); - -static ssize_t name_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "%s\n", to_vio_dev(dev)->name); -} - -static ssize_t devspec_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct device_node *of_node = dev->of_node; - - return sprintf(buf, "%s\n", of_node_full_name(of_node)); -} - -static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - const struct vio_dev *vio_dev = to_vio_dev(dev); - struct device_node *dn; - const char *cp; - - dn = dev->of_node; - if (!dn) { - strcpy(buf, "\n"); - return strlen(buf); - } - cp = of_get_property(dn, "compatible", NULL); - if (!cp) { - strcpy(buf, "\n"); - return strlen(buf); - } - - return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); -} - -static struct device_attribute vio_dev_attrs[] = { - __ATTR_RO(name), - __ATTR_RO(devspec), - __ATTR_RO(modalias), - __ATTR_NULL -}; - -void vio_unregister_device(struct vio_dev *viodev) -{ - device_unregister(&viodev->dev); -} -EXPORT_SYMBOL(vio_unregister_device); - -static int vio_bus_match(struct device *dev, struct device_driver *drv) -{ - const struct vio_dev *vio_dev = to_vio_dev(dev); - struct vio_driver *vio_drv = to_vio_driver(drv); - const struct vio_device_id *ids = vio_drv->id_table; - - return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); -} - -static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) -{ - const struct vio_dev *vio_dev = to_vio_dev(dev); - struct device_node *dn; - const char *cp; - - dn = dev->of_node; - if (!dn) - return -ENODEV; - cp = of_get_property(dn, "compatible", NULL); - if (!cp) - return -ENODEV; - - add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp); - return 0; -} - -struct bus_type vio_bus_type = { - .name = "vio", - .dev_attrs = vio_dev_attrs, - .uevent = vio_hotplug, - .match = vio_bus_match, - .probe = vio_bus_probe, - .remove = vio_bus_remove, -}; - -/** - * vio_get_attribute: - get attribute for virtual device - * @vdev: The vio device to get property. - * @which: The property/attribute to be extracted. - * @length: Pointer to length of returned data size (unused if NULL). - * - * Calls prom.c's of_get_property() to return the value of the - * attribute specified by @which -*/ -const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length) -{ - return of_get_property(vdev->dev.of_node, which, length); -} -EXPORT_SYMBOL(vio_get_attribute); - -#ifdef CONFIG_PPC_PSERIES -/* vio_find_name() - internal because only vio.c knows how we formatted the - * kobject name - */ -static struct vio_dev *vio_find_name(const char *name) -{ - struct device *found; - - found = bus_find_device_by_name(&vio_bus_type, NULL, name); - if (!found) - return NULL; - - return to_vio_dev(found); -} - -/** - * vio_find_node - find an already-registered vio_dev - * @vnode: device_node of the virtual device we're looking for - */ -struct vio_dev *vio_find_node(struct device_node *vnode) -{ - char kobj_name[20]; - struct device_node *vnode_parent; - const char *dev_type; - - vnode_parent = of_get_parent(vnode); - if (!vnode_parent) - return NULL; - - dev_type = of_get_property(vnode_parent, "device_type", NULL); - of_node_put(vnode_parent); - if (!dev_type) - return NULL; - - /* construct the kobject name from the device node */ - if (!strcmp(dev_type, "vdevice")) { - const __be32 *prop; - - prop = of_get_property(vnode, "reg", NULL); - if (!prop) - return NULL; - snprintf(kobj_name, sizeof(kobj_name), "%x", - (uint32_t)of_read_number(prop, 1)); - } else if (!strcmp(dev_type, "ibm,platform-facilities")) - snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name); - else - return NULL; - - return vio_find_name(kobj_name); -} -EXPORT_SYMBOL(vio_find_node); - -int vio_enable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); - if (rc != H_SUCCESS) - printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); - return rc; -} -EXPORT_SYMBOL(vio_enable_interrupts); - -int vio_disable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); - if (rc != H_SUCCESS) - printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); - return rc; -} -EXPORT_SYMBOL(vio_disable_interrupts); -#endif /* CONFIG_PPC_PSERIES */ |