summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile5
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S2
-rw-r--r--arch/powerpc/kernel/cputable.c4
-rw-r--r--arch/powerpc/kernel/crash.c2
-rw-r--r--arch/powerpc/kernel/eeh_cache.c8
-rw-r--r--arch/powerpc/kernel/eeh_driver.c9
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S73
-rw-r--r--arch/powerpc/kernel/ftrace.c35
-rw-r--r--arch/powerpc/kernel/head_64.S5
-rw-r--r--arch/powerpc/kernel/idle_book3s.S (renamed from arch/powerpc/kernel/idle_power7.S)371
-rw-r--r--arch/powerpc/kernel/irq.c17
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c9
-rw-r--r--arch/powerpc/kernel/misc_64.S2
-rw-r--r--arch/powerpc/kernel/module_64.c5
-rw-r--r--arch/powerpc/kernel/pci-common.c6
-rw-r--r--arch/powerpc/kernel/pci_64.c1
-rw-r--r--arch/powerpc/kernel/process.c10
-rw-r--r--arch/powerpc/kernel/prom.c25
-rw-r--r--arch/powerpc/kernel/rtas.c6
-rw-r--r--arch/powerpc/kernel/rtasd.c22
-rw-r--r--arch/powerpc/kernel/setup-common.c189
-rw-r--r--arch/powerpc/kernel/setup.h58
-rw-r--r--arch/powerpc/kernel/setup_32.c113
-rw-r--r--arch/powerpc/kernel/setup_64.c285
-rw-r--r--arch/powerpc/kernel/tm.S64
-rw-r--r--arch/powerpc/kernel/vector.S9
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
27 files changed, 797 insertions, 540 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380fcc34c..fe4c075bcf50 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -42,12 +42,11 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
-obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o
obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
-obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o
+obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o
procfs-y := proc_powerpc.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
@@ -87,7 +86,7 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
extra-$(CONFIG_8xx) := head_8xx.o
extra-y += vmlinux.lds
-obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o
+obj-$(CONFIG_RELOCATABLE) += reloc_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index ec8a228df2f6..52ff3f025437 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -99,6 +99,7 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
+ ori r3, r3, LPCR_HVICE
bl __init_LPCR
bl __init_HFSCR
bl __init_tlb_power9
@@ -118,6 +119,7 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
+ ori r3, r3, LPCR_HVICE
bl __init_LPCR
bl __init_HFSCR
bl __init_tlb_power9
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index eeeacf6235a3..d81f826d1029 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -137,7 +137,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER4 (gp)",
.cpu_features = CPU_FTRS_POWER4,
.cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4,
+ .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
@@ -152,7 +152,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.cpu_name = "POWER4+ (gq)",
.cpu_features = CPU_FTRS_POWER4,
.cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4,
+ .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 888bdf198c3e..47b63de81f9b 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -351,7 +351,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
old_handler = __debugger_fault_handler;
__debugger_fault_handler = handle_fault;
crash_shutdown_cpu = smp_processor_id();
- for (i = 0; crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX; i++) {
+ for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
if (setjmp(crash_shutdown_buf) == 0) {
/*
* Insert syncs and delay to ensure
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index ddbcfab7efdf..d4cc26618809 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -114,9 +114,9 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
while (n) {
struct pci_io_addr_range *piar;
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
- pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
+ pr_debug("PCI: %s addr range %d [%pap-%pap]: %s\n",
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
- piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
+ &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
cnt++;
n = rb_next(n);
}
@@ -159,8 +159,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
piar->flags = flags;
#ifdef DEBUG
- pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
- alo, ahi, pci_name(dev));
+ pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
+ &alo, &ahi, pci_name(dev));
#endif
rb_link_node(&piar->rb_node, parent, p);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 389b0d3988dc..5f36e8a70daa 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -642,13 +642,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
if (pe->type & EEH_PE_VF) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
} else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_lock_rescan_remove();
pci_hp_remove_devices(bus);
pci_unlock_rescan_remove();
}
} else if (frozen_bus) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data);
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
}
/*
@@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- if (pe->type & EEH_PE_VF)
+ if (pe->type & EEH_PE_VF) {
eeh_add_virt_device(edev, NULL);
- else
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_hp_add_devices(bus);
+ }
} else if (frozen_bus && rmv_data->removed) {
pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
ssleep(5);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4c9440629128..6200e4925d26 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -107,25 +107,9 @@ BEGIN_FTR_SECTION
beq 9f
cmpwi cr3,r13,2
-
- /*
- * Check if last bit of HSPGR0 is set. This indicates whether we are
- * waking up from winkle.
- */
GET_PACA(r13)
- clrldi r5,r13,63
- clrrdi r13,r13,1
- cmpwi cr4,r5,1
- mtspr SPRN_HSPRG0,r13
-
- lbz r0,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr2,r0,PNV_THREAD_NAP
- bgt cr2,8f /* Either sleep or Winkle */
-
- /* Waking up from nap should not cause hypervisor state loss */
- bgt cr3,.
+ bl pnv_restore_hyp_resource
- /* Waking up from nap */
li r0,PNV_THREAD_RUNNING
stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
@@ -143,13 +127,9 @@ BEGIN_FTR_SECTION
/* Return SRR1 from power7_nap() */
mfspr r3,SPRN_SRR1
- beq cr3,2f
- b power7_wakeup_noloss
-2: b power7_wakeup_loss
-
- /* Fast Sleep wakeup on PowerNV */
-8: GET_PACA(r13)
- b power7_wakeup_tb_loss
+ blt cr3,2f
+ b pnv_wakeup_loss
+2: b pnv_wakeup_noloss
9:
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
@@ -351,6 +331,12 @@ hv_doorbell_trampoline:
EXCEPTION_PROLOG_0(PACA_EXGEN)
b h_doorbell_hv
+ . = 0xea0
+hv_virt_irq_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b h_virt_irq_hv
+
/* We need to deal with the Altivec unavailable exception
* here which is at 0xf20, thus in the middle of the
* prolog code of the PerformanceMonitor one. A little
@@ -601,6 +587,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
+ MASKABLE_EXCEPTION_HV_OOL(0xea2, h_virt_irq)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xea2)
+
/* moved from 0xf00 */
STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00)
@@ -680,6 +669,8 @@ _GLOBAL(__replay_interrupt)
BEGIN_FTR_SECTION
cmpwi r3,0xe80
beq h_doorbell_common
+ cmpwi r3,0xea0
+ beq h_virt_irq_common
FTR_SECTION_ELSE
cmpwi r3,0xa00
beq doorbell_super_common
@@ -754,6 +745,7 @@ kvmppc_skip_Hinterrupt:
#else
STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception)
#endif
+ STD_EXCEPTION_COMMON_ASYNC(0xea0, h_virt_irq, do_IRQ)
STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception)
STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception)
STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception)
@@ -762,11 +754,6 @@ kvmppc_skip_Hinterrupt:
#else
STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception)
#endif
-#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception)
- STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
- STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
-#endif /* CONFIG_CBE_RAS */
/*
* Relocation-on interrupts: A subset of the interrupts can be delivered
@@ -877,6 +864,12 @@ h_doorbell_relon_trampoline:
EXCEPTION_PROLOG_0(PACA_EXGEN)
b h_doorbell_relon_hv
+ . = 0x4ea0
+h_virt_irq_relon_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b h_virt_irq_relon_hv
+
. = 0x4f00
performance_monitor_relon_pseries_trampoline:
SET_SCRATCH0(r13)
@@ -1131,12 +1124,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
bl vsx_unavailable_exception
b ret_from_except
- STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
- STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
-
/* Equivalents to the above handlers for relocation-on interrupt vectors */
STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
+ MASKABLE_RELON_EXCEPTION_HV_OOL(0xea0, h_virt_irq)
STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
@@ -1170,6 +1161,15 @@ fwnmi_data_area:
. = 0x8000
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+ STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
+ STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
+
+#ifdef CONFIG_CBE_RAS
+ STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception)
+ STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
+ STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
+#endif /* CONFIG_CBE_RAS */
+
.globl hmi_exception_early
hmi_exception_early:
EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
@@ -1289,7 +1289,7 @@ machine_check_handle_early:
GET_PACA(r13)
ld r1,PACAR1(r13)
li r3,PNV_THREAD_NAP
- b power7_enter_nap_mode
+ b pnv_enter_arch207_idle_mode
4:
#endif
/*
@@ -1399,11 +1399,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10
-BEGIN_MMU_FTR_SECTION
- b 2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+BEGIN_MMU_FTR_SECTION
beq- 2f
+FTR_SECTION_ELSE
+ b 2f
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
.machine push
.machine "power4"
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 7af6c4de044b..cc52d9795f88 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -144,6 +144,21 @@ __ftrace_make_nop(struct module *mod,
return -EINVAL;
}
+#ifdef CC_USING_MPROFILE_KERNEL
+ /* When using -mkernel_profile there is no load to jump over */
+ pop = PPC_INST_NOP;
+
+ if (probe_kernel_read(&op, (void *)(ip - 4), 4)) {
+ pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+ return -EFAULT;
+ }
+
+ /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
+ if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
+ pr_err("Unexpected instruction %08x around bl _mcount\n", op);
+ return -EINVAL;
+ }
+#else
/*
* Our original call site looks like:
*
@@ -170,24 +185,10 @@ __ftrace_make_nop(struct module *mod,
}
if (op != PPC_INST_LD_TOC) {
- unsigned int inst;
-
- if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) {
- pr_err("Fetching instruction at %lx failed.\n", ip - 4);
- return -EFAULT;
- }
-
- /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */
- if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) {
- pr_err("Unexpected instructions around bl _mcount\n"
- "when enabling dynamic ftrace!\t"
- "(%08x,bl,%08x)\n", inst, op);
- return -EINVAL;
- }
-
- /* When using -mkernel_profile there is no load to jump over */
- pop = PPC_INST_NOP;
+ pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
+ return -EINVAL;
}
+#endif /* CC_USING_MPROFILE_KERNEL */
if (patch_instruction((unsigned int *)ip, pop)) {
pr_err("Patching NOP failed.\n");
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 064cd9397836..f765b0434731 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -941,7 +941,7 @@ start_here_multiplatform:
mtspr SPRN_SRR1,r4
RFI
b . /* prevent speculative execution */
-
+
/* This is where all platforms converge execution */
start_here_common:
@@ -951,9 +951,6 @@ start_here_common:
/* Load the TOC (virtual address) */
ld r2,PACATOC(r13)
- /* Do more system initializations in virtual mode */
- bl setup_system
-
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_book3s.S
index 470ceebd2d23..335eb6cedae5 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -1,5 +1,6 @@
/*
- * This file contains the power_save function for Power7 CPUs.
+ * This file contains idle entry/exit functions for POWER7,
+ * POWER8 and POWER9 CPUs.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -20,6 +21,7 @@
#include <asm/opal.h>
#include <asm/cpuidle.h>
#include <asm/book3s/64/mmu-hash.h>
+#include <asm/mmu.h>
#undef DEBUG
@@ -36,6 +38,11 @@
#define _AMOR GPR9
#define _WORT GPR10
#define _WORC GPR11
+#define _PTCR GPR12
+
+#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \
+ PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
+ PSSCR_MTL_MASK
/* Idle state entry routines */
@@ -52,6 +59,45 @@
.text
/*
+ * Used by threads before entering deep idle states. Saves SPRs
+ * in interrupt stack frame
+ */
+save_sprs_to_stack:
+ /*
+ * Note all register i.e per-core, per-subcore or per-thread is saved
+ * here since any thread in the core might wake up first
+ */
+BEGIN_FTR_SECTION
+ mfspr r3,SPRN_PTCR
+ std r3,_PTCR(r1)
+ /*
+ * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
+ * SDR1 here
+ */
+FTR_SECTION_ELSE
+ mfspr r3,SPRN_SDR1
+ std r3,_SDR1(r1)
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
+ mfspr r3,SPRN_RPR
+ std r3,_RPR(r1)
+ mfspr r3,SPRN_SPURR
+ std r3,_SPURR(r1)
+ mfspr r3,SPRN_PURR
+ std r3,_PURR(r1)
+ mfspr r3,SPRN_TSCR
+ std r3,_TSCR(r1)
+ mfspr r3,SPRN_DSCR
+ std r3,_DSCR(r1)
+ mfspr r3,SPRN_AMOR
+ std r3,_AMOR(r1)
+ mfspr r3,SPRN_WORT
+ std r3,_WORT(r1)
+ mfspr r3,SPRN_WORC
+ std r3,_WORC(r1)
+
+ blr
+
+/*
* Used by threads when the lock bit of core_idle_state is set.
* Threads will spin in HMT_LOW until the lock bit is cleared.
* r14 - pointer to core_idle_state
@@ -69,13 +115,16 @@ core_idle_lock_held:
/*
* Pass requested state in r3:
- * r3 - PNV_THREAD_NAP/SLEEP/WINKLE
+ * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
+ * - Requested STOP state in POWER9
*
* To check IRQ_HAPPENED in r4
* 0 - don't check
* 1 - check
+ *
+ * Address to 'rfid' to in r5
*/
-_GLOBAL(power7_powersave_common)
+_GLOBAL(pnv_powersave_common)
/* Use r3 to pass state nap/sleep/winkle */
/* NAP is a state loss, we create a regs frame on the
* stack, fill it up with the state we care about and
@@ -126,28 +175,28 @@ _GLOBAL(power7_powersave_common)
std r9,_MSR(r1)
std r1,PACAR1(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* Tell KVM we're entering idle */
+ li r4,KVM_HWTHREAD_IN_IDLE
+ stb r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
+
/*
* Go to real mode to do the nap, as required by the architecture.
* Also, we need to be in real mode before setting hwthread_state,
* because as soon as we do that, another thread can switch
* the MMU context to the guest.
*/
- LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
+ LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
li r6, MSR_RI
andc r6, r9, r6
- LOAD_REG_ADDR(r7, power7_enter_nap_mode)
mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
- mtspr SPRN_SRR0, r7
- mtspr SPRN_SRR1, r5
+ mtspr SPRN_SRR0, r5
+ mtspr SPRN_SRR1, r7
rfid
- .globl power7_enter_nap_mode
-power7_enter_nap_mode:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're napping */
- li r4,KVM_HWTHREAD_IN_NAP
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
+ .globl pnv_enter_arch207_idle_mode
+pnv_enter_arch207_idle_mode:
stb r3,PACA_THREAD_IDLE_STATE(r13)
cmpwi cr3,r3,PNV_THREAD_SLEEP
bge cr3,2f
@@ -196,8 +245,7 @@ fastsleep_workaround_at_entry:
/* Fast sleep workaround */
li r3,1
li r4,1
- li r0,OPAL_CONFIG_CPU_IDLE_STATE
- bl opal_call_realmode
+ bl opal_rm_config_cpu_idle_state
/* Clear Lock bit */
li r0,0
@@ -206,30 +254,45 @@ fastsleep_workaround_at_entry:
b common_enter
enter_winkle:
- /*
- * Note all register i.e per-core, per-subcore or per-thread is saved
- * here since any thread in the core might wake up first
- */
- mfspr r3,SPRN_SDR1
- std r3,_SDR1(r1)
- mfspr r3,SPRN_RPR
- std r3,_RPR(r1)
- mfspr r3,SPRN_SPURR
- std r3,_SPURR(r1)
- mfspr r3,SPRN_PURR
- std r3,_PURR(r1)
- mfspr r3,SPRN_TSCR
- std r3,_TSCR(r1)
- mfspr r3,SPRN_DSCR
- std r3,_DSCR(r1)
- mfspr r3,SPRN_AMOR
- std r3,_AMOR(r1)
- mfspr r3,SPRN_WORT
- std r3,_WORT(r1)
- mfspr r3,SPRN_WORC
- std r3,_WORC(r1)
+ bl save_sprs_to_stack
+
IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
+/*
+ * r3 - requested stop state
+ */
+power_enter_stop:
+/*
+ * Check if the requested state is a deep idle state.
+ */
+ LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
+ ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
+ cmpd r3,r4
+ bge 2f
+ IDLE_STATE_ENTER_SEQ(PPC_STOP)
+2:
+/*
+ * Entering deep idle state.
+ * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
+ * stack and enter stop
+ */
+ lbz r7,PACA_THREAD_MASK(r13)
+ ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
+
+lwarx_loop_stop:
+ lwarx r15,0,r14
+ andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+ bnel core_idle_lock_held
+ andc r15,r15,r7 /* Clear thread bit */
+
+ stwcx. r15,0,r14
+ bne- lwarx_loop_stop
+ isync
+
+ bl save_sprs_to_stack
+
+ IDLE_STATE_ENTER_SEQ(PPC_STOP)
+
_GLOBAL(power7_idle)
/* Now check if user or arch enabled NAP mode */
LOAD_REG_ADDRBASE(r3,powersave_nap)
@@ -242,19 +305,22 @@ _GLOBAL(power7_idle)
_GLOBAL(power7_nap)
mr r4,r3
li r3,PNV_THREAD_NAP
- b power7_powersave_common
+ LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
+ b pnv_powersave_common
/* No return */
_GLOBAL(power7_sleep)
li r3,PNV_THREAD_SLEEP
li r4,1
- b power7_powersave_common
+ LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
+ b pnv_powersave_common
/* No return */
_GLOBAL(power7_winkle)
- li r3,3
+ li r3,PNV_THREAD_WINKLE
li r4,1
- b power7_powersave_common
+ LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
+ b pnv_powersave_common
/* No return */
#define CHECK_HMI_INTERRUPT \
@@ -270,25 +336,104 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
ld r2,PACATOC(r13); \
ld r1,PACAR1(r13); \
std r3,ORIG_GPR3(r1); /* Save original r3 */ \
- li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
- bl opal_call_realmode; \
+ bl opal_rm_handle_hmi; \
ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
20: nop;
-_GLOBAL(power7_wakeup_tb_loss)
+/*
+ * r3 - requested stop state
+ */
+_GLOBAL(power9_idle_stop)
+ LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE)
+ or r4,r4,r3
+ mtspr SPRN_PSSCR, r4
+ li r4, 1
+ LOAD_REG_ADDR(r5,power_enter_stop)
+ b pnv_powersave_common
+ /* No return */
+/*
+ * Called from reset vector. Check whether we have woken up with
+ * hypervisor state loss. If yes, restore hypervisor state and return
+ * back to reset vector.
+ *
+ * r13 - Contents of HSPRG0
+ * cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ */
+_GLOBAL(pnv_restore_hyp_resource)
ld r2,PACATOC(r13);
+BEGIN_FTR_SECTION
+ /*
+ * POWER ISA 3. Use PSSCR to determine if we
+ * are waking up from deep idle state
+ */
+ LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
+ ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
+
+ mfspr r5,SPRN_PSSCR
+ /*
+ * 0-3 bits correspond to Power-Saving Level Status
+ * which indicates the idle state we are waking up from
+ */
+ rldicl r5,r5,4,60
+ cmpd cr4,r5,r4
+ bge cr4,pnv_wakeup_tb_loss
+ /*
+ * Waking up without hypervisor state loss. Return to
+ * reset vector
+ */
+ blr
+
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
+ /*
+ * POWER ISA 2.07 or less.
+ * Check if last bit of HSPGR0 is set. This indicates whether we are
+ * waking up from winkle.
+ */
+ clrldi r5,r13,63
+ clrrdi r13,r13,1
+ cmpwi cr4,r5,1
+ mtspr SPRN_HSPRG0,r13
+
+ lbz r0,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi cr2,r0,PNV_THREAD_NAP
+ bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
+
+ /*
+ * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
+ * up from nap. At this stage CR3 shouldn't contains 'gt' since that
+ * indicates we are waking with hypervisor state loss from nap.
+ */
+ bgt cr3,.
+
+ blr /* Return back to System Reset vector from where
+ pnv_restore_hyp_resource was invoked */
+
+/*
+ * Called if waking up from idle state which can cause either partial or
+ * complete hyp state loss.
+ * In POWER8, called if waking up from fastsleep or winkle
+ * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state
+ *
+ * r13 - PACA
+ * cr3 - gt if waking up with partial/complete hypervisor state loss
+ * cr4 - eq if waking up from complete hypervisor state loss.
+ */
+_GLOBAL(pnv_wakeup_tb_loss)
ld r1,PACAR1(r13)
/*
* Before entering any idle state, the NVGPRs are saved in the stack
* and they are restored before switching to the process context. Hence
* until they are restored, they are free to be used.
*
- * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
- * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
- * wakeup reason if we branch to kvm_start_guest.
+ * Save SRR1 and LR in NVGPRs as they might be clobbered in
+ * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
+ * to determine the wakeup reason if we branch to kvm_start_guest. LR
+ * is required to return back to reset vector after hypervisor state
+ * restore is complete.
*/
-
+ mflr r17
mfspr r16,SPRN_SRR1
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
@@ -310,35 +455,35 @@ lwarx_loop2:
bnel core_idle_lock_held
cmpwi cr2,r15,0
- lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
- and r4,r4,r15
- cmpwi cr1,r4,0 /* Check if first in subcore */
/*
* At this stage
- * cr1 - 0b0100 if first thread to wakeup in subcore
- * cr2 - 0b0100 if first thread to wakeup in core
- * cr3- 0b0010 if waking up from sleep or winkle
- * cr4 - 0b0100 if waking up from winkle
+ * cr2 - eq if first thread to wakeup in core
+ * cr3- gt if waking up with partial/complete hypervisor state loss
+ * cr4 - eq if waking up from complete hypervisor state loss.
*/
- or r15,r15,r7 /* Set thread bit */
-
- beq cr1,first_thread_in_subcore
-
- /* Not first thread in subcore to wake up */
- stwcx. r15,0,r14
- bne- lwarx_loop2
- isync
- b common_exit
-
-first_thread_in_subcore:
- /* First thread in subcore to wakeup */
ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
stwcx. r15,0,r14
bne- lwarx_loop2
isync
+BEGIN_FTR_SECTION
+ lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
+ and r4,r4,r15
+ cmpwi r4,0 /* Check if first in subcore */
+
+ or r15,r15,r7 /* Set thread bit */
+ beq first_thread_in_subcore
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
+ or r15,r15,r7 /* Set thread bit */
+ beq cr2,first_thread_in_core
+
+ /* Not first thread in core or subcore to wake up */
+ b clear_lock
+
+first_thread_in_subcore:
/*
* If waking up from sleep, subcore state is not lost. Hence
* skip subcore state restore
@@ -348,6 +493,7 @@ first_thread_in_subcore:
/* Restore per-subcore state */
ld r4,_SDR1(r1)
mtspr SPRN_SDR1,r4
+
ld r4,_RPR(r1)
mtspr SPRN_RPR,r4
ld r4,_AMOR(r1)
@@ -363,32 +509,44 @@ subcore_state_restored:
first_thread_in_core:
/*
- * First thread in the core waking up from fastsleep. It needs to
+ * First thread in the core waking up from any state which can cause
+ * partial or complete hypervisor state loss. It needs to
* call the fastsleep workaround code if the platform requires it.
* Call it unconditionally here. The below branch instruction will
- * be patched out when the idle states are discovered if platform
- * does not require workaround.
+ * be patched out if the platform does not have fastsleep or does not
+ * require the workaround. Patching will be performed during the
+ * discovery of idle-states.
*/
.global pnv_fastsleep_workaround_at_exit
pnv_fastsleep_workaround_at_exit:
b fastsleep_workaround_at_exit
timebase_resync:
- /* Do timebase resync if we are waking up from sleep. Use cr3 value
- * set in exceptions-64s.S */
+ /*
+ * Use cr3 which indicates that we are waking up with atleast partial
+ * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
+ */
ble cr3,clear_lock
/* Time base re-sync */
- li r0,OPAL_RESYNC_TIMEBASE
- bl opal_call_realmode;
- /* TODO: Check r3 for failure */
-
+ bl opal_rm_resync_timebase;
/*
* If waking up from sleep, per core state is not lost, skip to
* clear_lock.
*/
bne cr4,clear_lock
- /* Restore per core state */
+ /*
+ * First thread in the core to wake up and its waking up with
+ * complete hypervisor state loss. Restore per core hypervisor
+ * state.
+ */
+BEGIN_FTR_SECTION
+ ld r4,_PTCR(r1)
+ mtspr SPRN_PTCR,r4
+ ld r4,_RPR(r1)
+ mtspr SPRN_RPR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
ld r4,_TSCR(r1)
mtspr SPRN_TSCR,r4
ld r4,_WORC(r1)
@@ -410,9 +568,9 @@ common_exit:
/* Waking up from winkle */
- /* Restore per thread state */
- bl __restore_cpu_power8
-
+BEGIN_MMU_FTR_SECTION
+ b no_segments
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
/* Restore SLB from PACA */
ld r8,PACA_SLBSHADOWPTR(r13)
@@ -426,6 +584,9 @@ common_exit:
slbmte r6,r5
1: addi r8,r8,16
.endr
+no_segments:
+
+ /* Restore per thread state */
ld r4,_SPURR(r1)
mtspr SPRN_SPURR,r4
@@ -436,48 +597,34 @@ common_exit:
ld r4,_WORT(r1)
mtspr SPRN_WORT,r4
-hypervisor_state_restored:
+ /* Call cur_cpu_spec->cpu_restore() */
+ LOAD_REG_ADDR(r4, cur_cpu_spec)
+ ld r4,0(r4)
+ ld r12,CPU_SPEC_RESTORE(r4)
+#ifdef PPC64_ELF_ABI_v1
+ ld r12,0(r12)
+#endif
+ mtctr r12
+ bctrl
- li r5,PNV_THREAD_RUNNING
- stb r5,PACA_THREAD_IDLE_STATE(r13)
+hypervisor_state_restored:
mtspr SPRN_SRR1,r16
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
- lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 6f
- b kvm_start_guest
-6:
-#endif
-
- REST_NVGPRS(r1)
- REST_GPR(2, r1)
- ld r3,_CCR(r1)
- ld r4,_MSR(r1)
- ld r5,_NIP(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtcr r3
- mfspr r3,SPRN_SRR1 /* Return SRR1 */
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
+ mtlr r17
+ blr /* Return back to System Reset vector from where
+ pnv_restore_hyp_resource was invoked */
fastsleep_workaround_at_exit:
li r3,1
li r4,0
- li r0,OPAL_CONFIG_CPU_IDLE_STATE
- bl opal_call_realmode
+ bl opal_rm_config_cpu_idle_state
b timebase_resync
/*
* R3 here contains the value that will be returned to the caller
* of power7_nap.
*/
-_GLOBAL(power7_wakeup_loss)
+_GLOBAL(pnv_wakeup_loss)
ld r1,PACAR1(r13)
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
@@ -497,10 +644,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* R3 here contains the value that will be returned to the caller
* of power7_nap.
*/
-_GLOBAL(power7_wakeup_noloss)
+_GLOBAL(pnv_wakeup_noloss)
lbz r0,PACA_NAPSTATELOST(r13)
cmpwi r0,0
- bne power7_wakeup_loss
+ bne pnv_wakeup_loss
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 3cb46a3b1de7..ac910d9982df 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -250,7 +250,7 @@ notrace void arch_local_irq_restore(unsigned long en)
if (WARN_ON(mfmsr() & MSR_EE))
__hard_irq_disable();
}
-#endif /* CONFIG_TRACE_IRQFLAG */
+#endif /* CONFIG_TRACE_IRQFLAGS */
set_soft_enabled(0);
@@ -342,6 +342,21 @@ bool prep_irq_for_idle(void)
return true;
}
+/*
+ * Force a replay of the external interrupt handler on this CPU.
+ */
+void force_external_irq_replay(void)
+{
+ /*
+ * This must only be called with interrupts soft-disabled,
+ * the replay will happen when re-enabling.
+ */
+ WARN_ON(!arch_irqs_disabled());
+
+ /* Indicate in the PACA that we have an interrupt to replay */
+ local_paca->irq_happened |= PACA_IRQ_EE;
+}
+
#endif /* CONFIG_PPC64 */
int arch_show_interrupts(struct seq_file *p, int prec)
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 50bf55135ef8..4c780a342282 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -55,7 +55,7 @@ int default_machine_kexec_prepare(struct kimage *image)
const unsigned long *basep;
const unsigned int *sizep;
- if (!ppc_md.hpte_clear_all)
+ if (!mmu_hash_ops.hpte_clear_all)
return -ENOENT;
/*
@@ -380,7 +380,12 @@ void default_machine_kexec(struct kimage *image)
*/
kexec_sequence(&kexec_stack, image->start, image,
page_address(image->control_code_page),
- ppc_md.hpte_clear_all);
+#ifdef CONFIG_PPC_STD_MMU
+ mmu_hash_ops.hpte_clear_all
+#else
+ NULL
+#endif
+ );
/* NOTREACHED */
}
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 7a8519052b14..cb195157b318 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -667,7 +667,7 @@ _GLOBAL(kexec_sequence)
mr r12,r27
#endif
mtctr r12
- bctrl /* ppc_md.hpte_clear_all(void); */
+ bctrl /* mmu_hash_ops.hpte_clear_all(void); */
#endif /* !CONFIG_PPC_BOOK3E */
/*
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index f703f343358e..183368e008cf 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -494,9 +494,10 @@ static bool is_early_mcount_callsite(u32 *instruction)
restore r2. */
static int restore_r2(u32 *instruction, struct module *me)
{
+ if (is_early_mcount_callsite(instruction - 1))
+ return 1;
+
if (*instruction != PPC_INST_NOP) {
- if (is_early_mcount_callsite(instruction - 1))
- return 1;
pr_err("%s: Expect noop after relocate, got %08x\n",
me->name, *instruction);
return 0;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index c6ac4f01dd56..f93942b4b6a6 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1418,8 +1418,10 @@ void __init pcibios_resource_survey(void)
/* Allocate and assign resources */
list_for_each_entry(b, &pci_root_buses, node)
pcibios_allocate_bus_resources(b);
- pcibios_allocate_resources(0);
- pcibios_allocate_resources(1);
+ if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+ pcibios_allocate_resources(0);
+ pcibios_allocate_resources(1);
+ }
/* Before we start assigning unassigned resource, we try to reserve
* the low IO area and the VGA memory area if they intersect the
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index f71b79a8992b..ed5e9ff61a68 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -47,7 +47,6 @@ static int __init pcibios_init(void)
printk(KERN_INFO "PCI: Probing PCI hardware\n");
- pci_io_base = ISA_IO_BASE;
/* For now, override phys_mem_access_prot. If we need it,g
* later, we may move that initialization to each ppc_md
*/
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ddceeb96e8fb..a8cca88e972f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1527,6 +1527,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.regs = regs - 1;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Clear any transactional state, we're exec()ing. The cause is
+ * not important as there will never be a recheckpoint so it's not
+ * user visible.
+ */
+ if (MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(0);
+#endif
+
memset(regs->gpr, 0, sizeof(regs->gpr));
regs->ctr = 0;
regs->link = 0;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 946e34ffeae9..bae3db791150 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -56,6 +56,8 @@
#include <asm/opal.h>
#include <asm/fadump.h>
#include <asm/debug.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/firmware.h>
#include <mm/mmu_decl.h>
@@ -645,6 +647,14 @@ static void __init early_reserve_mem(void)
#endif
}
+static bool disable_radix;
+static int __init parse_disable_radix(char *p)
+{
+ disable_radix = true;
+ return 0;
+}
+early_param("disable_radix", parse_disable_radix);
+
void __init early_init_devtree(void *params)
{
phys_addr_t limit;
@@ -734,11 +744,26 @@ void __init early_init_devtree(void *params)
*/
spinning_secondaries = boot_cpu_count - 1;
#endif
+ /*
+ * now fixup radix MMU mode based on kernel command line
+ */
+ if (disable_radix)
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_RADIX;
#ifdef CONFIG_PPC_POWERNV
/* Scan and build the list of machine check recoverable ranges */
of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL);
#endif
+ epapr_paravirt_early_init();
+
+ /* Now try to figure out if we are running on LPAR and so on */
+ pseries_probe_fw_features();
+
+#ifdef CONFIG_PPC_PS3
+ /* Identify PS3 firmware */
+ if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3"))
+ powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE;
+#endif
DBG(" <- early_init_devtree()\n");
}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8da209fdf480..6a3e5de544ce 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -685,7 +685,7 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value)
return rc;
}
-void rtas_restart(char *cmd)
+void __noreturn rtas_restart(char *cmd)
{
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_RESTART);
@@ -704,7 +704,7 @@ void rtas_power_off(void)
for (;;);
}
-void rtas_halt(void)
+void __noreturn rtas_halt(void)
{
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_HALT);
@@ -1174,7 +1174,7 @@ void __init rtas_initialize(void)
* the stop-self token if any
*/
#ifdef CONFIG_PPC64
- if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
ibm_suspend_me_token = rtas_token("ibm,suspend-me");
}
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index e864b7c5884e..a26a02006576 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -526,10 +526,8 @@ void rtas_cancel_event_scan(void)
}
EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
-static int __init rtas_init(void)
+static int __init rtas_event_scan_init(void)
{
- struct proc_dir_entry *entry;
-
if (!machine_is(pseries) && !machine_is(chrp))
return 0;
@@ -562,13 +560,27 @@ static int __init rtas_init(void)
return -ENOMEM;
}
+ start_event_scan();
+
+ return 0;
+}
+arch_initcall(rtas_event_scan_init);
+
+static int __init rtas_init(void)
+{
+ struct proc_dir_entry *entry;
+
+ if (!machine_is(pseries) && !machine_is(chrp))
+ return 0;
+
+ if (!rtas_log_buf)
+ return -ENODEV;
+
entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL,
&proc_rtas_log_operations);
if (!entry)
printk(KERN_ERR "Failed to create error_log proc entry\n");
- start_event_scan();
-
return 0;
}
__initcall(rtas_init);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 8ca79b7503d8..714b4ba7ab86 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -35,6 +35,7 @@
#include <linux/percpu.h>
#include <linux/memblock.h>
#include <linux/of_platform.h>
+#include <linux/hugetlb.h>
#include <asm/io.h>
#include <asm/paca.h>
#include <asm/prom.h>
@@ -61,6 +62,12 @@
#include <asm/cputhreads.h>
#include <mm/mmu_decl.h>
#include <asm/fadump.h>
+#include <asm/udbg.h>
+#include <asm/hugetlb.h>
+#include <asm/livepatch.h>
+#include <asm/mmu_context.h>
+
+#include "setup.h"
#ifdef DEBUG
#include <asm/udbg.h>
@@ -494,7 +501,7 @@ void __init smp_setup_cpu_maps(void)
* On pSeries LPAR, we need to know how many cpus
* could possibly be added to this partition.
*/
- if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) &&
+ if (firmware_has_feature(FW_FEATURE_LPAR) &&
(dn = of_find_node_by_path("/rtas"))) {
int num_addr_cell, num_size_cell, maxcpus;
const __be32 *ireg;
@@ -575,6 +582,7 @@ void probe_machine(void)
{
extern struct machdep_calls __machine_desc_start;
extern struct machdep_calls __machine_desc_end;
+ unsigned int i;
/*
* Iterate all ppc_md structures until we find the proper
@@ -582,6 +590,17 @@ void probe_machine(void)
*/
DBG("Probing machine type ...\n");
+ /*
+ * Check ppc_md is empty, if not we have a bug, ie, we setup an
+ * entry before probe_machine() which will be overwritten
+ */
+ for (i = 0; i < (sizeof(ppc_md) / sizeof(void *)); i++) {
+ if (((void **)&ppc_md)[i]) {
+ printk(KERN_ERR "Entry %d in ppc_md non empty before"
+ " machine probe !\n", i);
+ }
+ }
+
for (machine_id = &__machine_desc_start;
machine_id < &__machine_desc_end;
machine_id++) {
@@ -676,6 +695,8 @@ static struct notifier_block ppc_panic_block = {
void __init setup_panic(void)
{
+ if (!ppc_md.panic)
+ return;
atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
}
@@ -744,3 +765,169 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
pdev->dev.dma_mask = &pdev->archdata.dma_mask;
set_dma_ops(&pdev->dev, &dma_direct_ops);
}
+
+static __init void print_system_info(void)
+{
+ pr_info("-----------------------------------------------------\n");
+#ifdef CONFIG_PPC_STD_MMU_64
+ pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
+#endif
+#ifdef CONFIG_PPC_STD_MMU_32
+ pr_info("Hash_size = 0x%lx\n", Hash_size);
+#endif
+ pr_info("phys_mem_size = 0x%llx\n",
+ (unsigned long long)memblock_phys_mem_size());
+
+ pr_info("dcache_bsize = 0x%x\n", dcache_bsize);
+ pr_info("icache_bsize = 0x%x\n", icache_bsize);
+ if (ucache_bsize != 0)
+ pr_info("ucache_bsize = 0x%x\n", ucache_bsize);
+
+ pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features);
+ pr_info(" possible = 0x%016lx\n",
+ (unsigned long)CPU_FTRS_POSSIBLE);
+ pr_info(" always = 0x%016lx\n",
+ (unsigned long)CPU_FTRS_ALWAYS);
+ pr_info("cpu_user_features = 0x%08x 0x%08x\n",
+ cur_cpu_spec->cpu_user_features,
+ cur_cpu_spec->cpu_user_features2);
+ pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
+#ifdef CONFIG_PPC64
+ pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+#endif
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ if (htab_address)
+ pr_info("htab_address = 0x%p\n", htab_address);
+ if (htab_hash_mask)
+ pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
+#endif
+#ifdef CONFIG_PPC_STD_MMU_32
+ if (Hash)
+ pr_info("Hash = 0x%p\n", Hash);
+ if (Hash_mask)
+ pr_info("Hash_mask = 0x%lx\n", Hash_mask);
+#endif
+
+ if (PHYSICAL_START > 0)
+ pr_info("physical_start = 0x%llx\n",
+ (unsigned long long)PHYSICAL_START);
+ pr_info("-----------------------------------------------------\n");
+}
+
+/*
+ * Called into from start_kernel this initializes memblock, which is used
+ * to manage page allocation until mem_init is called.
+ */
+void __init setup_arch(char **cmdline_p)
+{
+ *cmdline_p = boot_command_line;
+
+ /* Set a half-reasonable default so udelay does something sensible */
+ loops_per_jiffy = 500000000 / HZ;
+
+ /* Unflatten the device-tree passed by prom_init or kexec */
+ unflatten_device_tree();
+
+ /*
+ * Initialize cache line/block info from device-tree (on ppc64) or
+ * just cputable (on ppc32).
+ */
+ initialize_cache_info();
+
+ /* Initialize RTAS if available. */
+ rtas_initialize();
+
+ /* Check if we have an initrd provided via the device-tree. */
+ check_for_initrd();
+
+ /* Probe the machine type, establish ppc_md. */
+ probe_machine();
+
+ /* Setup panic notifier if requested by the platform. */
+ setup_panic();
+
+ /*
+ * Configure ppc_md.power_save (ppc32 only, 64-bit machines do
+ * it from their respective probe() function.
+ */
+ setup_power_save();
+
+ /* Discover standard serial ports. */
+ find_legacy_serial_ports();
+
+ /* Register early console with the printk subsystem. */
+ register_early_udbg_console();
+
+ /* Setup the various CPU maps based on the device-tree. */
+ smp_setup_cpu_maps();
+
+ /* Initialize xmon. */
+ xmon_setup();
+
+ /* Check the SMT related command line arguments (ppc64). */
+ check_smt_enabled();
+
+ /* On BookE, setup per-core TLB data structures. */
+ setup_tlb_core_data();
+
+ /*
+ * Release secondary cpus out of their spinloops at 0x60 now that
+ * we can map physical -> logical CPU ids.
+ *
+ * Freescale Book3e parts spin in a loop provided by firmware,
+ * so smp_release_cpus() does nothing for them.
+ */
+#ifdef CONFIG_SMP
+ smp_release_cpus();
+#endif
+
+ /* Print various info about the machine that has been gathered so far. */
+ print_system_info();
+
+ /* Reserve large chunks of memory for use by CMA for KVM. */
+ kvm_cma_reserve();
+
+ /*
+ * Reserve any gigantic pages requested on the command line.
+ * memblock needs to have been initialized by the time this is
+ * called since this will reserve memory.
+ */
+ reserve_hugetlb_gpages();
+
+ klp_init_thread_info(&init_thread_info);
+
+ init_mm.start_code = (unsigned long)_stext;
+ init_mm.end_code = (unsigned long) _etext;
+ init_mm.end_data = (unsigned long) _edata;
+ init_mm.brk = klimit;
+#ifdef CONFIG_PPC_64K_PAGES
+ init_mm.context.pte_frag = NULL;
+#endif
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ mm_iommu_init(&init_mm.context);
+#endif
+ irqstack_early_init();
+ exc_lvl_early_init();
+ emergency_stack_init();
+
+ initmem_init();
+
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#endif
+ if (ppc_md.setup_arch)
+ ppc_md.setup_arch();
+
+ paging_init();
+
+ /* Initialize the MMU context management stuff. */
+ mmu_context_init();
+
+#ifdef CONFIG_PPC64
+ /* Interrupt code needs to be 64K-aligned. */
+ if ((unsigned long)_stext & 0xffff)
+ panic("Kernelbase not 64K-aligned (0x%lx)!\n",
+ (unsigned long)_stext);
+#endif
+}
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
new file mode 100644
index 000000000000..cfba134b3024
--- /dev/null
+++ b/arch/powerpc/kernel/setup.h
@@ -0,0 +1,58 @@
+/*
+ * Prototypes for functions that are shared between setup_(32|64|common).c
+ *
+ * Copyright 2016 Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __ARCH_POWERPC_KERNEL_SETUP_H
+#define __ARCH_POWERPC_KERNEL_SETUP_H
+
+void initialize_cache_info(void);
+void irqstack_early_init(void);
+
+#ifdef CONFIG_PPC32
+void setup_power_save(void);
+#else
+static inline void setup_power_save(void) { };
+#endif
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
+void check_smt_enabled(void);
+#else
+static inline void check_smt_enabled(void) { };
+#endif
+
+#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
+void setup_tlb_core_data(void);
+#else
+static inline void setup_tlb_core_data(void) { };
+#endif
+
+#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+void exc_lvl_early_init(void);
+#else
+static inline void exc_lvl_early_init(void) { };
+#endif
+
+#ifdef CONFIG_PPC64
+void emergency_stack_init(void);
+#else
+static inline void emergency_stack_init(void) { };
+#endif
+
+/*
+ * Having this in kvm_ppc.h makes include dependencies too
+ * tricky to solve for setup-common.c so have it here.
+ */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvm_cma_reserve(void);
+#else
+static inline void kvm_cma_reserve(void) { };
+#endif
+
+#endif /* __ARCH_POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index d544fa311757..00f57754407e 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -36,8 +36,6 @@
#include <asm/time.h>
#include <asm/serial.h>
#include <asm/udbg.h>
-#include <asm/mmu_context.h>
-#include <asm/epapr_hcalls.h>
#include <asm/code-patching.h>
#define DBG(fmt...)
@@ -62,9 +60,7 @@ int icache_bsize;
int ucache_bsize;
/*
- * We're called here very early in the boot. We determine the machine
- * type and call the appropriate low-level setup functions.
- * -- Cort <cort@fsmlabs.com>
+ * We're called here very early in the boot.
*
* Note that the kernel may be running at an address which is different
* from the address that it was linked at, so we must use RELOC/PTRRELOC
@@ -73,7 +69,6 @@ int ucache_bsize;
notrace unsigned long __init early_init(unsigned long dt_ptr)
{
unsigned long offset = reloc_offset();
- struct cpu_spec *spec;
/* First zero the BSS -- use memset_io, some platforms don't have
* caches on yet */
@@ -84,27 +79,19 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
* Identify the CPU type and fix up code sections
* that depend on which cpu we have.
*/
- spec = identify_cpu(offset, mfspr(SPRN_PVR));
+ identify_cpu(offset, mfspr(SPRN_PVR));
- do_feature_fixups(spec->cpu_features,
- PTRRELOC(&__start___ftr_fixup),
- PTRRELOC(&__stop___ftr_fixup));
-
- do_feature_fixups(spec->mmu_features,
- PTRRELOC(&__start___mmu_ftr_fixup),
- PTRRELOC(&__stop___mmu_ftr_fixup));
-
- do_lwsync_fixups(spec->cpu_features,
- PTRRELOC(&__start___lwsync_fixup),
- PTRRELOC(&__stop___lwsync_fixup));
-
- do_final_fixups();
+ apply_feature_fixups();
return KERNELBASE + offset;
}
/*
+ * This is run before start_kernel(), the kernel has been relocated
+ * and we are running with enough of the MMU enabled to have our
+ * proper kernel virtual addresses
+ *
* Find out what kind of machine we're on and save any data we need
* from the early boot process (devtree is copied on pmac by prom_init()).
* This is called very early on the boot process, after a minimal
@@ -123,27 +110,9 @@ notrace void __init machine_init(u64 dt_ptr)
/* Do some early initialization based on the flat device tree */
early_init_devtree(__va(dt_ptr));
- epapr_paravirt_early_init();
-
early_init_mmu();
- probe_machine();
-
setup_kdump_trampoline();
-
-#ifdef CONFIG_6xx
- if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
- cpu_has_feature(CPU_FTR_CAN_NAP))
- ppc_md.power_save = ppc6xx_idle;
-#endif
-
-#ifdef CONFIG_E500
- if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
- cpu_has_feature(CPU_FTR_CAN_NAP))
- ppc_md.power_save = e500_idle;
-#endif
- if (ppc_md.progress)
- ppc_md.progress("id mach(): done", 0x200);
}
/* Checks "l2cr=xxxx" command-line option */
@@ -221,7 +190,7 @@ int __init ppc_init(void)
arch_initcall(ppc_init);
-static void __init irqstack_early_init(void)
+void __init irqstack_early_init(void)
{
unsigned int i;
@@ -236,7 +205,7 @@ static void __init irqstack_early_init(void)
}
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
-static void __init exc_lvl_early_init(void)
+void __init exc_lvl_early_init(void)
{
unsigned int i, hw_cpu;
@@ -259,33 +228,25 @@ static void __init exc_lvl_early_init(void)
#endif
}
}
-#else
-#define exc_lvl_early_init()
#endif
-/* Warning, IO base is not yet inited */
-void __init setup_arch(char **cmdline_p)
+void __init setup_power_save(void)
{
- *cmdline_p = boot_command_line;
-
- /* so udelay does something sensible, assume <= 1000 bogomips */
- loops_per_jiffy = 500000000 / HZ;
-
- unflatten_device_tree();
- check_for_initrd();
-
- if (ppc_md.init_early)
- ppc_md.init_early();
-
- find_legacy_serial_ports();
-
- smp_setup_cpu_maps();
-
- /* Register early console */
- register_early_udbg_console();
+#ifdef CONFIG_6xx
+ if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+ cpu_has_feature(CPU_FTR_CAN_NAP))
+ ppc_md.power_save = ppc6xx_idle;
+#endif
- xmon_setup();
+#ifdef CONFIG_E500
+ if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+ cpu_has_feature(CPU_FTR_CAN_NAP))
+ ppc_md.power_save = e500_idle;
+#endif
+}
+__init void initialize_cache_info(void)
+{
/*
* Set cache line size based on type of cpu as a default.
* Systems with OF can look in the properties on the cpu node(s)
@@ -296,32 +257,4 @@ void __init setup_arch(char **cmdline_p)
ucache_bsize = 0;
if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
ucache_bsize = icache_bsize = dcache_bsize;
-
- if (ppc_md.panic)
- setup_panic();
-
- init_mm.start_code = (unsigned long)_stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = klimit;
-
- exc_lvl_early_init();
-
- irqstack_early_init();
-
- initmem_init();
- if ( ppc_md.progress ) ppc_md.progress("setup_arch: initmem", 0x3eab);
-
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
-
- if (ppc_md.setup_arch)
- ppc_md.setup_arch();
- if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
-
- paging_init();
-
- /* Initialize the MMU context management stuff */
- mmu_context_init();
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5530bb55a78b..d8216aed22b7 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -35,7 +35,6 @@
#include <linux/pci.h>
#include <linux/lockdep.h>
#include <linux/memblock.h>
-#include <linux/hugetlb.h>
#include <linux/memory.h>
#include <linux/nmi.h>
@@ -64,12 +63,10 @@
#include <asm/xmon.h>
#include <asm/udbg.h>
#include <asm/kexec.h>
-#include <asm/mmu_context.h>
#include <asm/code-patching.h>
-#include <asm/kvm_ppc.h>
-#include <asm/hugetlb.h>
-#include <asm/epapr_hcalls.h>
#include <asm/livepatch.h>
+#include <asm/opal.h>
+#include <asm/cputhreads.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -100,7 +97,7 @@ int icache_bsize;
int ucache_bsize;
#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
-static void setup_tlb_core_data(void)
+void __init setup_tlb_core_data(void)
{
int cpu;
@@ -133,10 +130,6 @@ static void setup_tlb_core_data(void)
}
}
}
-#else
-static void setup_tlb_core_data(void)
-{
-}
#endif
#ifdef CONFIG_SMP
@@ -144,7 +137,7 @@ static void setup_tlb_core_data(void)
static char *smt_enabled_cmdline;
/* Look for ibm,smt-enabled OF option */
-static void check_smt_enabled(void)
+void __init check_smt_enabled(void)
{
struct device_node *dn;
const char *smt_option;
@@ -193,12 +186,10 @@ static int __init early_smt_enabled(char *p)
}
early_param("smt-enabled", early_smt_enabled);
-#else
-#define check_smt_enabled()
#endif /* CONFIG_SMP */
/** Fix up paca fields required for the boot cpu */
-static void fixup_boot_paca(void)
+static void __init fixup_boot_paca(void)
{
/* The boot cpu is started */
get_paca()->cpu_start = 1;
@@ -206,23 +197,50 @@ static void fixup_boot_paca(void)
get_paca()->data_offset = 0;
}
-static void cpu_ready_for_interrupts(void)
+static void __init configure_exceptions(void)
{
- /* Set IR and DR in PACA MSR */
- get_paca()->kernel_msr = MSR_KERNEL;
-
/*
- * Enable AIL if supported, and we are in hypervisor mode. If we are
- * not in hypervisor mode, we enable relocation-on interrupts later
- * in pSeries_setup_arch() using the H_SET_MODE hcall.
+ * Setup the trampolines from the lowmem exception vectors
+ * to the kdump kernel when not using a relocatable kernel.
*/
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S)) {
- unsigned long lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+ setup_kdump_trampoline();
+
+ /* Under a PAPR hypervisor, we need hypercalls */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ /* Enable AIL if possible */
+ pseries_enable_reloc_on_exc();
+
+ /*
+ * Tell the hypervisor that we want our exceptions to
+ * be taken in little endian mode.
+ *
+ * We don't call this for big endian as our calling convention
+ * makes us always enter in BE, and the call may fail under
+ * some circumstances with kdump.
+ */
+#ifdef __LITTLE_ENDIAN__
+ pseries_little_endian_exceptions();
+#endif
+ } else {
+ /* Set endian mode using OPAL */
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ opal_configure_cores();
+
+ /* Enable AIL if supported, and we are in hypervisor mode */
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ unsigned long lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+ }
}
}
+static void cpu_ready_for_interrupts(void)
+{
+ /* Set IR and DR in PACA MSR */
+ get_paca()->kernel_msr = MSR_KERNEL;
+}
+
/*
* Early initialization entry point. This is called by head.S
* with MMU translation disabled. We rely on the "feature" of
@@ -270,22 +288,22 @@ void __init early_setup(unsigned long dt_ptr)
*/
early_init_devtree(__va(dt_ptr));
- epapr_paravirt_early_init();
-
/* Now we know the logical id of our boot cpu, setup the paca. */
setup_paca(&paca[boot_cpuid]);
fixup_boot_paca();
- /* Probe the machine type */
- probe_machine();
-
- setup_kdump_trampoline();
-
- DBG("Found, Initializing memory management...\n");
+ /*
+ * Configure exception handlers. This include setting up trampolines
+ * if needed, setting exception endian mode, etc...
+ */
+ configure_exceptions();
/* Initialize the hash table or TLB handling */
early_init_mmu();
+ /* Apply all the dynamic patching */
+ apply_feature_fixups();
+
/*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
@@ -293,16 +311,6 @@ void __init early_setup(unsigned long dt_ptr)
*/
cpu_ready_for_interrupts();
- /* Reserve large chunks of memory for use by CMA for KVM */
- kvm_cma_reserve();
-
- /*
- * Reserve any gigantic pages requested on the command line.
- * memblock needs to have been initialized by the time this is
- * called since this will reserve memory.
- */
- reserve_hugetlb_gpages();
-
DBG(" <- early_setup()\n");
#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
@@ -391,7 +399,7 @@ void smp_release_cpus(void)
* cache informations about the CPU that will be used by cache flush
* routines and/or provided to userland
*/
-static void __init initialize_cache_info(void)
+void __init initialize_cache_info(void)
{
struct device_node *np;
unsigned long num_cpus = 0;
@@ -456,127 +464,11 @@ static void __init initialize_cache_info(void)
}
}
- DBG(" <- initialize_cache_info()\n");
-}
-
-
-/*
- * Do some initial setup of the system. The parameters are those which
- * were passed in from the bootloader.
- */
-void __init setup_system(void)
-{
- DBG(" -> setup_system()\n");
-
- /* Apply the CPUs-specific and firmware specific fixups to kernel
- * text (nop out sections not relevant to this CPU or this firmware)
- */
- do_feature_fixups(cur_cpu_spec->cpu_features,
- &__start___ftr_fixup, &__stop___ftr_fixup);
- do_feature_fixups(cur_cpu_spec->mmu_features,
- &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup);
- do_feature_fixups(powerpc_firmware_features,
- &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
- do_lwsync_fixups(cur_cpu_spec->cpu_features,
- &__start___lwsync_fixup, &__stop___lwsync_fixup);
- do_final_fixups();
-
- /*
- * Unflatten the device-tree passed by prom_init or kexec
- */
- unflatten_device_tree();
-
- /*
- * Fill the ppc64_caches & systemcfg structures with informations
- * retrieved from the device-tree.
- */
- initialize_cache_info();
-
-#ifdef CONFIG_PPC_RTAS
- /*
- * Initialize RTAS if available
- */
- rtas_initialize();
-#endif /* CONFIG_PPC_RTAS */
-
- /*
- * Check if we have an initrd provided via the device-tree
- */
- check_for_initrd();
-
- /*
- * Do some platform specific early initializations, that includes
- * setting up the hash table pointers. It also sets up some interrupt-mapping
- * related options that will be used by finish_device_tree()
- */
- if (ppc_md.init_early)
- ppc_md.init_early();
-
- /*
- * We can discover serial ports now since the above did setup the
- * hash table management for us, thus ioremap works. We do that early
- * so that further code can be debugged
- */
- find_legacy_serial_ports();
-
- /*
- * Register early console
- */
- register_early_udbg_console();
-
- /*
- * Initialize xmon
- */
- xmon_setup();
-
- smp_setup_cpu_maps();
- check_smt_enabled();
- setup_tlb_core_data();
-
- /*
- * Freescale Book3e parts spin in a loop provided by firmware,
- * so smp_release_cpus() does nothing for them
- */
-#if defined(CONFIG_SMP)
- /* Release secondary cpus out of their spinloops at 0x60 now that
- * we can map physical -> logical CPU ids
- */
- smp_release_cpus();
-#endif
-
- pr_info("Starting Linux %s %s\n", init_utsname()->machine,
- init_utsname()->version);
-
- pr_info("-----------------------------------------------------\n");
- pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
- pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size());
-
- if (ppc64_caches.dline_size != 0x80)
- pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size);
- if (ppc64_caches.iline_size != 0x80)
- pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size);
-
- pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features);
- pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE);
- pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS);
- pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features,
- cur_cpu_spec->cpu_user_features2);
- pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
- pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
-
-#ifdef CONFIG_PPC_STD_MMU_64
- if (htab_address)
- pr_info("htab_address = 0x%p\n", htab_address);
-
- pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
-#endif
-
- if (PHYSICAL_START > 0)
- pr_info("physical_start = 0x%llx\n",
- (unsigned long long)PHYSICAL_START);
- pr_info("-----------------------------------------------------\n");
+ /* For use by binfmt_elf */
+ dcache_bsize = ppc64_caches.dline_size;
+ icache_bsize = ppc64_caches.iline_size;
- DBG(" <- setup_system()\n");
+ DBG(" <- initialize_cache_info()\n");
}
/* This returns the limit below which memory accesses to the linear
@@ -584,7 +476,7 @@ void __init setup_system(void)
* used to allocate interrupt or emergency stacks for which our
* exception entry path doesn't deal with being interrupted.
*/
-static u64 safe_stack_limit(void)
+static __init u64 safe_stack_limit(void)
{
#ifdef CONFIG_PPC_BOOK3E
/* Freescale BookE bolts the entire linear mapping */
@@ -600,7 +492,7 @@ static u64 safe_stack_limit(void)
#endif
}
-static void __init irqstack_early_init(void)
+void __init irqstack_early_init(void)
{
u64 limit = safe_stack_limit();
unsigned int i;
@@ -620,7 +512,7 @@ static void __init irqstack_early_init(void)
}
#ifdef CONFIG_PPC_BOOK3E
-static void __init exc_lvl_early_init(void)
+void __init exc_lvl_early_init(void)
{
unsigned int i;
unsigned long sp;
@@ -642,8 +534,6 @@ static void __init exc_lvl_early_init(void)
if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
patch_exception(0x040, exc_debug_debug_book3e);
}
-#else
-#define exc_lvl_early_init()
#endif
/*
@@ -651,7 +541,7 @@ static void __init exc_lvl_early_init(void)
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
*/
-static void __init emergency_stack_init(void)
+void __init emergency_stack_init(void)
{
u64 limit;
unsigned int i;
@@ -682,61 +572,6 @@ static void __init emergency_stack_init(void)
}
}
-/*
- * Called into from start_kernel this initializes memblock, which is used
- * to manage page allocation until mem_init is called.
- */
-void __init setup_arch(char **cmdline_p)
-{
- *cmdline_p = boot_command_line;
-
- /*
- * Set cache line size based on type of cpu as a default.
- * Systems with OF can look in the properties on the cpu node(s)
- * for a possibly more accurate value.
- */
- dcache_bsize = ppc64_caches.dline_size;
- icache_bsize = ppc64_caches.iline_size;
-
- if (ppc_md.panic)
- setup_panic();
-
- klp_init_thread_info(&init_thread_info);
-
- init_mm.start_code = (unsigned long)_stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = klimit;
-#ifdef CONFIG_PPC_64K_PAGES
- init_mm.context.pte_frag = NULL;
-#endif
-#ifdef CONFIG_SPAPR_TCE_IOMMU
- mm_iommu_init(&init_mm.context);
-#endif
- irqstack_early_init();
- exc_lvl_early_init();
- emergency_stack_init();
-
- initmem_init();
-
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
-
- if (ppc_md.setup_arch)
- ppc_md.setup_arch();
-
- paging_init();
-
- /* Initialize the MMU context management stuff */
- mmu_context_init();
-
- /* Interrupt code needs to be 64K-aligned */
- if ((unsigned long)_stext & 0xffff)
- panic("Kernelbase not 64K-aligned (0x%lx)!\n",
- (unsigned long)_stext);
-}
-
#ifdef CONFIG_SMP
#define PCPU_DYN_SIZE ()
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index bf8f34a58670..298afcf3bf2a 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim)
std r3, STK_PARAM(R3)(r1)
SAVE_NVGPRS(r1)
- /* We need to setup MSR for VSX register save instructions. Here we
- * also clear the MSR RI since when we do the treclaim, we won't have a
- * valid kernel pointer for a while. We clear RI here as it avoids
- * adding another mtmsr closer to the treclaim. This makes the region
- * maked as non-recoverable wider than it needs to be but it saves on
- * inserting another mtmsrd later.
- */
+ /* We need to setup MSR for VSX register save instructions. */
mfmsr r14
mr r15, r14
ori r15, r15, MSR_FP
- li r16, MSR_RI
+ li r16, 0
ori r16, r16, MSR_EE /* IRQs hard off */
andc r15, r15, r16
oris r15, r15, MSR_VEC@h
@@ -176,7 +170,17 @@ dont_backup_fp:
1: tdeqi r6, 0
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
- /* The moment we treclaim, ALL of our GPRs will switch
+ /* Clear MSR RI since we are about to change r1, EE is already off. */
+ li r4, 0
+ mtmsrd r4, 1
+
+ /*
+ * BE CAREFUL HERE:
+ * At this point we can't take an SLB miss since we have MSR_RI
+ * off. Load only to/from the stack/paca which are in SLB bolted regions
+ * until we turn MSR RI back on.
+ *
+ * The moment we treclaim, ALL of our GPRs will switch
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
*/
@@ -197,6 +201,11 @@ dont_backup_fp:
/* Store the PPR in r11 and reset to decent value */
std r11, GPR11(r1) /* Temporary stash */
+
+ /* Reset MSR RI so we can take SLB faults again */
+ li r11, MSR_RI
+ mtmsrd r11, 1
+
mfspr r11, SPRN_PPR
HMT_MEDIUM
@@ -329,8 +338,6 @@ _GLOBAL(__tm_recheckpoint)
*/
subi r7, r7, STACK_FRAME_OVERHEAD
- SET_SCRATCH0(r1)
-
mfmsr r6
/* R4 = original MSR to indicate whether thread used FP/Vector etc. */
@@ -397,11 +404,6 @@ restore_gprs:
ld r5, THREAD_TM_DSCR(r3)
ld r6, THREAD_TM_PPR(r3)
- /* Clear the MSR RI since we are about to change R1. EE is already off
- */
- li r4, 0
- mtmsrd r4, 1
-
REST_GPR(0, r7) /* GPR0 */
REST_2GPRS(2, r7) /* GPR2-3 */
REST_GPR(4, r7) /* GPR4 */
@@ -439,10 +441,34 @@ restore_gprs:
ld r6, _CCR(r7)
mtcr r6
- REST_GPR(1, r7) /* GPR1 */
- REST_GPR(5, r7) /* GPR5-7 */
REST_GPR(6, r7)
- ld r7, GPR7(r7)
+
+ /*
+ * Store r1 and r5 on the stack so that we can access them
+ * after we clear MSR RI.
+ */
+
+ REST_GPR(5, r7)
+ std r5, -8(r1)
+ ld r5, GPR1(r7)
+ std r5, -16(r1)
+
+ REST_GPR(7, r7)
+
+ /* Clear MSR RI since we are about to change r1. EE is already off */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /*
+ * BE CAREFUL HERE:
+ * At this point we can't take an SLB miss since we have MSR_RI
+ * off. Load only to/from the stack/paca which are in SLB bolted regions
+ * until we turn MSR RI back on.
+ */
+
+ SET_SCRATCH0(r1)
+ ld r5, -8(r1)
+ ld r1, -16(r1)
/* Commit register state as checkpointed state: */
TRECHKPT
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 1c2e7a343bf5..616a6d854638 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -70,10 +70,11 @@ _GLOBAL(load_up_altivec)
MTMSRD(r5) /* enable use of AltiVec now */
isync
- /* Hack: if we get an altivec unavailable trap with VRSAVE
- * set to all zeros, we assume this is a broken application
- * that fails to set it properly, and thus we switch it to
- * all 1's
+ /*
+ * While userspace in general ignores VRSAVE, glibc uses it as a boolean
+ * to optimise userspace context save/restore. Whenever we take an
+ * altivec unavailable exception we must set VRSAVE to something non
+ * zero. Set it to all 1s. See also the programming note in the ISA.
*/
mfspr r4,SPRN_VRSAVE
cmpwi 0,r4,0
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 2dd91f79de05..b5fba689fca6 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -165,7 +165,7 @@ SECTIONS
. = ALIGN(8);
.dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET)
{
-#ifdef CONFIG_RELOCATABLE_PPC32
+#ifdef CONFIG_PPC32
__dynamic_symtab = .;
#endif
*(.dynsym)