summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/44x_mmu.c82
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/fault.c42
-rw-r--r--arch/powerpc/mm/hash_low_64.S5
-rw-r--r--arch/powerpc/mm/hash_native_64.c82
-rw-r--r--arch/powerpc/mm/hash_utils_64.c144
-rw-r--r--arch/powerpc/mm/hugetlbpage.c549
-rw-r--r--arch/powerpc/mm/init_64.c22
-rw-r--r--arch/powerpc/mm/mem.c26
-rw-r--r--arch/powerpc/mm/mmu_context_64.c10
-rw-r--r--arch/powerpc/mm/mmu_decl.h3
-rw-r--r--arch/powerpc/mm/numa.c3
-rw-r--r--arch/powerpc/mm/pgtable_32.c28
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c2
-rw-r--r--arch/powerpc/mm/slb.c11
-rw-r--r--arch/powerpc/mm/slb_low.S52
-rw-r--r--arch/powerpc/mm/slice.c633
-rw-r--r--arch/powerpc/mm/stab.c2
-rw-r--r--arch/powerpc/mm/tlb_32.c4
-rw-r--r--arch/powerpc/mm/tlb_64.c12
20 files changed, 936 insertions, 777 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 0a0a0487b334..ca4dcb07a939 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -24,73 +24,38 @@
*
*/
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/highmem.h>
-
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
#include <asm/mmu.h>
-#include <asm/uaccess.h>
-#include <asm/smp.h>
-#include <asm/bootx.h>
-#include <asm/machdep.h>
-#include <asm/setup.h>
+#include <asm/system.h>
+#include <asm/page.h>
#include "mmu_decl.h"
-extern char etext[], _stext[];
-
/* Used by the 44x TLB replacement exception handler.
* Just needed it declared someplace.
*/
-unsigned int tlb_44x_index = 0;
-unsigned int tlb_44x_hwater = 62;
+unsigned int tlb_44x_index; /* = 0 */
+unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS;
/*
* "Pins" a 256MB TLB entry in AS0 for kernel lowmem
*/
-static void __init
-ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys)
+static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
{
- unsigned long attrib = 0;
-
- __asm__ __volatile__("\
- clrrwi %2,%2,10\n\
- ori %2,%2,%4\n\
- clrrwi %1,%1,10\n\
- li %0,0\n\
- ori %0,%0,%5\n\
- tlbwe %2,%3,%6\n\
- tlbwe %1,%3,%7\n\
- tlbwe %0,%3,%8"
+ __asm__ __volatile__(
+ "tlbwe %2,%3,%4\n"
+ "tlbwe %1,%3,%5\n"
+ "tlbwe %0,%3,%6\n"
:
- : "r" (attrib), "r" (phys), "r" (virt), "r" (slot),
- "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M),
- "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
+ : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
+ "r" (phys),
+ "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
+ "r" (tlb_44x_hwater--), /* slot for this TLB entry */
"i" (PPC44x_TLB_PAGEID),
"i" (PPC44x_TLB_XLAT),
"i" (PPC44x_TLB_ATTRIB));
}
-/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
- */
void __init MMU_init_hw(void)
{
flush_instruction_cache();
@@ -98,22 +63,13 @@ void __init MMU_init_hw(void)
unsigned long __init mmu_mapin_ram(void)
{
- unsigned int pinned_tlbs = 1;
- int i;
-
- /* Determine number of entries necessary to cover lowmem */
- pinned_tlbs = (unsigned int)
- (_ALIGN(total_lowmem, PPC_PIN_SIZE) >> PPC44x_PIN_SHIFT);
-
- /* Write upper watermark to save location */
- tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs;
+ unsigned long addr;
- /* If necessary, set additional pinned TLBs */
- if (pinned_tlbs > 1)
- for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) {
- unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC_PIN_SIZE;
- ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr);
- }
+ /* Pin in enough TLBs to cover any lowmem not covered by the
+ * initial 256M mapping established in head_44x.S */
+ for (addr = PPC_PIN_SIZE; addr < total_lowmem;
+ addr += PPC_PIN_SIZE)
+ ppc44x_pin_tlb(addr + PAGE_OFFSET, addr);
return total_lowmem;
}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 38a81967ca07..4f839c6a9768 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,4 +18,5 @@ obj-$(CONFIG_40x) += 4xx_mmu.o
obj-$(CONFIG_44x) += 44x_mmu.o
obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o
obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 03aeb3a46077..bfe901353142 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -28,6 +28,7 @@
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/kprobes.h>
+#include <linux/kdebug.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -36,40 +37,28 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
-#include <asm/kdebug.h>
#include <asm/siginfo.h>
-#ifdef CONFIG_KPROBES
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-/* Hook to register for page fault notifications */
-int register_page_fault_notifier(struct notifier_block *nb)
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs)
{
- return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
-}
+ int ret = 0;
-int unregister_page_fault_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
-}
+ /* kprobe_running() needs smp_processor_id() */
+ if (!user_mode(regs)) {
+ preempt_disable();
+ if (kprobe_running() && kprobe_fault_handler(regs, 11))
+ ret = 1;
+ preempt_enable();
+ }
-static inline int notify_page_fault(enum die_val val, const char *str,
- struct pt_regs *regs, long err, int trap, int sig)
-{
- struct die_args args = {
- .regs = regs,
- .str = str,
- .err = err,
- .trapnr = trap,
- .signr = sig
- };
- return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+ return ret;
}
#else
-static inline int notify_page_fault(enum die_val val, const char *str,
- struct pt_regs *regs, long err, int trap, int sig)
+static inline int notify_page_fault(struct pt_regs *regs)
{
- return NOTIFY_DONE;
+ return 0;
}
#endif
@@ -175,8 +164,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
is_write = error_code & ESR_DST;
#endif /* CONFIG_4xx || CONFIG_BOOKE */
- if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs, error_code,
- 11, SIGSEGV) == NOTIFY_STOP)
+ if (notify_page_fault(regs))
return 0;
if (trap == 0x300) {
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index e64ce3eec36e..4762ff7c14df 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -615,6 +615,9 @@ htab_pte_insert_failure:
li r3,-1
b htab_bail
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_PPC_HAS_HASH_64K
/*****************************************************************************
* *
@@ -870,7 +873,7 @@ ht64_pte_insert_failure:
b ht64_bail
-#endif /* CONFIG_PPC_64K_PAGES */
+#endif /* CONFIG_PPC_HAS_HASH_64K */
/*****************************************************************************
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 79aedaf36f2b..7b7fe2d7b9dc 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -26,6 +26,7 @@
#include <asm/tlb.h>
#include <asm/cputable.h>
#include <asm/udbg.h>
+#include <asm/kexec.h>
#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
@@ -340,31 +341,70 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
local_irq_restore(flags);
}
-/*
- * XXX This need fixing based on page size. It's only used by
- * native_hpte_clear() for now which needs fixing too so they
- * make a good pair...
- */
-static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
+#define LP_SHIFT 12
+#define LP_BITS 8
+#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
+
+static void hpte_decode(hpte_t *hpte, unsigned long slot,
+ int *psize, unsigned long *va)
{
- unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v);
- unsigned long va;
+ unsigned long hpte_r = hpte->r;
+ unsigned long hpte_v = hpte->v;
+ unsigned long avpn;
+ int i, size, shift, penc, avpnm_bits;
+
+ if (!(hpte_v & HPTE_V_LARGE))
+ size = MMU_PAGE_4K;
+ else {
+ for (i = 0; i < LP_BITS; i++) {
+ if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
+ break;
+ }
+ penc = LP_MASK(i+1) >> LP_SHIFT;
+ for (size = 0; size < MMU_PAGE_COUNT; size++) {
- va = avpn << 23;
+ /* 4K pages are not represented by LP */
+ if (size == MMU_PAGE_4K)
+ continue;
- if (! (hpte_v & HPTE_V_LARGE)) {
- unsigned long vpi, pteg;
+ /* valid entries have a shift value */
+ if (!mmu_psize_defs[size].shift)
+ continue;
+
+ if (penc == mmu_psize_defs[size].penc)
+ break;
+ }
+ }
- pteg = slot / HPTES_PER_GROUP;
- if (hpte_v & HPTE_V_SECONDARY)
- pteg = ~pteg;
+ /*
+ * FIXME, the code below works for 16M, 64K, and 4K pages as these
+ * fall under the p<=23 rules for calculating the virtual address.
+ * In the case of 16M pages, an extra bit is stolen from the AVPN
+ * field to achieve the requisite 24 bits.
+ *
+ * Does not work for 16G pages or 1 TB segments.
+ */
+ shift = mmu_psize_defs[size].shift;
+ if (mmu_psize_defs[size].avpnm)
+ avpnm_bits = __ilog2_u64(mmu_psize_defs[size].avpnm) + 1;
+ else
+ avpnm_bits = 0;
+ if (shift - avpnm_bits <= 23) {
+ avpn = HPTE_V_AVPN_VAL(hpte_v) << 23;
- vpi = ((va >> 28) ^ pteg) & htab_hash_mask;
+ if (shift < 23) {
+ unsigned long vpi, pteg;
- va |= vpi << PAGE_SHIFT;
+ pteg = slot / HPTES_PER_GROUP;
+ if (hpte_v & HPTE_V_SECONDARY)
+ pteg = ~pteg;
+ vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask;
+ avpn |= (vpi << mmu_psize_defs[size].shift);
+ }
}
- return va;
+ *va = avpn;
+ *psize = size;
}
/*
@@ -374,15 +414,14 @@ static unsigned long slot2va(unsigned long hpte_v, unsigned long slot)
*
* TODO: add batching support when enabled. remember, no dynamic memory here,
* athough there is the control page available...
- *
- * XXX FIXME: 4k only for now !
*/
static void native_hpte_clear(void)
{
unsigned long slot, slots, flags;
hpte_t *hptep = htab_address;
- unsigned long hpte_v;
+ unsigned long hpte_v, va;
unsigned long pteg_count;
+ int psize;
pteg_count = htab_hash_mask + 1;
@@ -408,8 +447,9 @@ static void native_hpte_clear(void)
* already hold the native_tlbie_lock.
*/
if (hpte_v & HPTE_V_VALID) {
+ hpte_decode(hptep, slot, &psize, &va);
hptep->v = 0;
- __tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K);
+ __tlbie(va, psize);
}
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 49618461defb..028ba4ed03d2 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -51,6 +51,7 @@
#include <asm/cputable.h>
#include <asm/abs_addr.h>
#include <asm/sections.h>
+#include <asm/spu.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -103,7 +104,7 @@ int mmu_ci_restrictions;
#ifdef CONFIG_DEBUG_PAGEALLOC
static u8 *linear_map_hash_slots;
static unsigned long linear_map_hash_count;
-static spinlock_t linear_map_hash_lock;
+static DEFINE_SPINLOCK(linear_map_hash_lock);
#endif /* CONFIG_DEBUG_PAGEALLOC */
/* There are definitions of page sizes arrays to be used when none
@@ -419,7 +420,7 @@ static void __init htab_finish_init(void)
extern unsigned int *htab_call_hpte_remove;
extern unsigned int *htab_call_hpte_updatepp;
-#ifdef CONFIG_PPC_64K_PAGES
+#ifdef CONFIG_PPC_HAS_HASH_64K
extern unsigned int *ht64_call_hpte_insert1;
extern unsigned int *ht64_call_hpte_insert2;
extern unsigned int *ht64_call_hpte_remove;
@@ -596,22 +597,23 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
* Demote a segment to using 4k pages.
* For now this makes the whole process use 4k pages.
*/
-void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
-{
#ifdef CONFIG_PPC_64K_PAGES
+static void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+{
if (mm->context.user_psize == MMU_PAGE_4K)
return;
+#ifdef CONFIG_PPC_MM_SLICES
+ slice_set_user_psize(mm, MMU_PAGE_4K);
+#else /* CONFIG_PPC_MM_SLICES */
mm->context.user_psize = MMU_PAGE_4K;
mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp;
- get_paca()->context = mm->context;
- slb_flush_and_rebolt();
+#endif /* CONFIG_PPC_MM_SLICES */
+
#ifdef CONFIG_SPE_BASE
spu_flush_all_slbs(mm);
#endif
-#endif
}
-
-EXPORT_SYMBOL_GPL(demote_segment_4k);
+#endif /* CONFIG_PPC_64K_PAGES */
/* Result code is:
* 0 - handled
@@ -646,7 +648,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
return 1;
}
vsid = get_vsid(mm->context.id, ea);
+#ifdef CONFIG_PPC_MM_SLICES
+ psize = get_slice_psize(mm, ea);
+#else
psize = mm->context.user_psize;
+#endif
break;
case VMALLOC_REGION_ID:
mm = &init_mm;
@@ -674,11 +680,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
if (user_region && cpus_equal(mm->cpu_vm_mask, tmp))
local = 1;
+#ifdef CONFIG_HUGETLB_PAGE
/* Handle hugepage regions */
- if (unlikely(in_hugepage_area(mm->context, ea))) {
+ if (HPAGE_SHIFT && psize == mmu_huge_psize) {
DBG_LOW(" -> huge page !\n");
return hash_huge_page(mm, access, ea, vsid, local, trap);
}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#ifndef CONFIG_PPC_64K_PAGES
+ /* If we use 4K pages and our psize is not 4K, then we are hitting
+ * a special driver mapping, we need to align the address before
+ * we fetch the PTE
+ */
+ if (psize != MMU_PAGE_4K)
+ ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+#endif /* CONFIG_PPC_64K_PAGES */
/* Get PTE and page size from page tables */
ptep = find_linux_pte(pgdir, ea);
@@ -702,54 +719,56 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
}
/* Do actual hashing */
-#ifndef CONFIG_PPC_64K_PAGES
- rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
-#else
+#ifdef CONFIG_PPC_64K_PAGES
/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
if (pte_val(*ptep) & _PAGE_4K_PFN) {
demote_segment_4k(mm, ea);
psize = MMU_PAGE_4K;
}
- if (mmu_ci_restrictions) {
- /* If this PTE is non-cacheable, switch to 4k */
- if (psize == MMU_PAGE_64K &&
- (pte_val(*ptep) & _PAGE_NO_CACHE)) {
- if (user_region) {
- demote_segment_4k(mm, ea);
- psize = MMU_PAGE_4K;
- } else if (ea < VMALLOC_END) {
- /*
- * some driver did a non-cacheable mapping
- * in vmalloc space, so switch vmalloc
- * to 4k pages
- */
- printk(KERN_ALERT "Reducing vmalloc segment "
- "to 4kB pages because of "
- "non-cacheable mapping\n");
- psize = mmu_vmalloc_psize = MMU_PAGE_4K;
- }
+ /* If this PTE is non-cacheable and we have restrictions on
+ * using non cacheable large pages, then we switch to 4k
+ */
+ if (mmu_ci_restrictions && psize == MMU_PAGE_64K &&
+ (pte_val(*ptep) & _PAGE_NO_CACHE)) {
+ if (user_region) {
+ demote_segment_4k(mm, ea);
+ psize = MMU_PAGE_4K;
+ } else if (ea < VMALLOC_END) {
+ /*
+ * some driver did a non-cacheable mapping
+ * in vmalloc space, so switch vmalloc
+ * to 4k pages
+ */
+ printk(KERN_ALERT "Reducing vmalloc segment "
+ "to 4kB pages because of "
+ "non-cacheable mapping\n");
+ psize = mmu_vmalloc_psize = MMU_PAGE_4K;
#ifdef CONFIG_SPE_BASE
spu_flush_all_slbs(mm);
#endif
}
- if (user_region) {
- if (psize != get_paca()->context.user_psize) {
- get_paca()->context = mm->context;
- slb_flush_and_rebolt();
- }
- } else if (get_paca()->vmalloc_sllp !=
- mmu_psize_defs[mmu_vmalloc_psize].sllp) {
- get_paca()->vmalloc_sllp =
- mmu_psize_defs[mmu_vmalloc_psize].sllp;
+ }
+ if (user_region) {
+ if (psize != get_paca()->context.user_psize) {
+ get_paca()->context.user_psize =
+ mm->context.user_psize;
slb_flush_and_rebolt();
}
+ } else if (get_paca()->vmalloc_sllp !=
+ mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+ get_paca()->vmalloc_sllp =
+ mmu_psize_defs[mmu_vmalloc_psize].sllp;
+ slb_flush_and_rebolt();
}
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_PPC_HAS_HASH_64K
if (psize == MMU_PAGE_64K)
rc = __hash_page_64K(ea, access, vsid, ptep, trap, local);
else
+#endif /* CONFIG_PPC_HAS_HASH_64K */
rc = __hash_page_4K(ea, access, vsid, ptep, trap, local);
-#endif /* CONFIG_PPC_64K_PAGES */
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
@@ -772,42 +791,55 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long flags;
int local = 0;
- /* We don't want huge pages prefaulted for now
- */
- if (unlikely(in_hugepage_area(mm->context, ea)))
+ BUG_ON(REGION_ID(ea) != USER_REGION_ID);
+
+#ifdef CONFIG_PPC_MM_SLICES
+ /* We only prefault standard pages for now */
+ if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize));
return;
+#endif
DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
" trap=%lx\n", mm, mm->pgd, ea, access, trap);
- /* Get PTE, VSID, access mask */
+ /* Get Linux PTE if available */
pgdir = mm->pgd;
if (pgdir == NULL)
return;
ptep = find_linux_pte(pgdir, ea);
if (!ptep)
return;
+
+#ifdef CONFIG_PPC_64K_PAGES
+ /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on
+ * a 64K kernel), then we don't preload, hash_page() will take
+ * care of it once we actually try to access the page.
+ * That way we don't have to duplicate all of the logic for segment
+ * page size demotion here
+ */
+ if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
+ return;
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ /* Get VSID */
vsid = get_vsid(mm->context.id, ea);
- /* Hash it in */
+ /* Hash doesn't like irqs */
local_irq_save(flags);
+
+ /* Is that local to this CPU ? */
mask = cpumask_of_cpu(smp_processor_id());
if (cpus_equal(mm->cpu_vm_mask, mask))
local = 1;
-#ifndef CONFIG_PPC_64K_PAGES
- __hash_page_4K(ea, access, vsid, ptep, trap, local);
-#else
- if (mmu_ci_restrictions) {
- /* If this PTE is non-cacheable, switch to 4k */
- if (mm->context.user_psize == MMU_PAGE_64K &&
- (pte_val(*ptep) & _PAGE_NO_CACHE))
- demote_segment_4k(mm, ea);
- }
+
+ /* Hash it in */
+#ifdef CONFIG_PPC_HAS_HASH_64K
if (mm->context.user_psize == MMU_PAGE_64K)
__hash_page_64K(ea, access, vsid, ptep, trap, local);
else
- __hash_page_4K(ea, access, vsid, ptep, trap, local);
#endif /* CONFIG_PPC_64K_PAGES */
+ __hash_page_4K(ea, access, vsid, ptep, trap, local);
+
local_irq_restore(flags);
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1f07f70ac89f..92a1b16fb7e3 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -12,7 +12,6 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/sysctl.h>
@@ -92,7 +91,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pgd_t *pg;
pud_t *pu;
- BUG_ON(! in_hugepage_area(mm->context, addr));
+ BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
addr &= HPAGE_MASK;
@@ -120,7 +119,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pud_t *pu;
hugepd_t *hpdp = NULL;
- BUG_ON(! in_hugepage_area(mm->context, addr));
+ BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
addr &= HPAGE_MASK;
@@ -303,7 +302,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb,
start = addr;
pgd = pgd_offset((*tlb)->mm, addr);
do {
- BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr));
+ BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize);
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
@@ -332,203 +331,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return __pte(old);
}
-struct slb_flush_info {
- struct mm_struct *mm;
- u16 newareas;
-};
-
-static void flush_low_segments(void *parm)
-{
- struct slb_flush_info *fi = parm;
- unsigned long i;
-
- BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS);
-
- if (current->active_mm != fi->mm)
- return;
-
- /* Only need to do anything if this CPU is working in the same
- * mm as the one which has changed */
-
- /* update the paca copy of the context struct */
- get_paca()->context = current->active_mm->context;
-
- asm volatile("isync" : : : "memory");
- for (i = 0; i < NUM_LOW_AREAS; i++) {
- if (! (fi->newareas & (1U << i)))
- continue;
- asm volatile("slbie %0"
- : : "r" ((i << SID_SHIFT) | SLBIE_C));
- }
- asm volatile("isync" : : : "memory");
-}
-
-static void flush_high_segments(void *parm)
-{
- struct slb_flush_info *fi = parm;
- unsigned long i, j;
-
-
- BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS);
-
- if (current->active_mm != fi->mm)
- return;
-
- /* Only need to do anything if this CPU is working in the same
- * mm as the one which has changed */
-
- /* update the paca copy of the context struct */
- get_paca()->context = current->active_mm->context;
-
- asm volatile("isync" : : : "memory");
- for (i = 0; i < NUM_HIGH_AREAS; i++) {
- if (! (fi->newareas & (1U << i)))
- continue;
- for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
- asm volatile("slbie %0"
- :: "r" (((i << HTLB_AREA_SHIFT)
- + (j << SID_SHIFT)) | SLBIE_C));
- }
- asm volatile("isync" : : : "memory");
-}
-
-static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
-{
- unsigned long start = area << SID_SHIFT;
- unsigned long end = (area+1) << SID_SHIFT;
- struct vm_area_struct *vma;
-
- BUG_ON(area >= NUM_LOW_AREAS);
-
- /* Check no VMAs are in the region */
- vma = find_vma(mm, start);
- if (vma && (vma->vm_start < end))
- return -EBUSY;
-
- return 0;
-}
-
-static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
-{
- unsigned long start = area << HTLB_AREA_SHIFT;
- unsigned long end = (area+1) << HTLB_AREA_SHIFT;
- struct vm_area_struct *vma;
-
- BUG_ON(area >= NUM_HIGH_AREAS);
-
- /* Hack, so that each addresses is controlled by exactly one
- * of the high or low area bitmaps, the first high area starts
- * at 4GB, not 0 */
- if (start == 0)
- start = 0x100000000UL;
-
- /* Check no VMAs are in the region */
- vma = find_vma(mm, start);
- if (vma && (vma->vm_start < end))
- return -EBUSY;
-
- return 0;
-}
-
-static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
-{
- unsigned long i;
- struct slb_flush_info fi;
-
- BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
- BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
-
- newareas &= ~(mm->context.low_htlb_areas);
- if (! newareas)
- return 0; /* The segments we want are already open */
-
- for (i = 0; i < NUM_LOW_AREAS; i++)
- if ((1 << i) & newareas)
- if (prepare_low_area_for_htlb(mm, i) != 0)
- return -EBUSY;
-
- mm->context.low_htlb_areas |= newareas;
-
- /* the context change must make it to memory before the flush,
- * so that further SLB misses do the right thing. */
- mb();
-
- fi.mm = mm;
- fi.newareas = newareas;
- on_each_cpu(flush_low_segments, &fi, 0, 1);
-
- return 0;
-}
-
-static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
-{
- struct slb_flush_info fi;
- unsigned long i;
-
- BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
- BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
- != NUM_HIGH_AREAS);
-
- newareas &= ~(mm->context.high_htlb_areas);
- if (! newareas)
- return 0; /* The areas we want are already open */
-
- for (i = 0; i < NUM_HIGH_AREAS; i++)
- if ((1 << i) & newareas)
- if (prepare_high_area_for_htlb(mm, i) != 0)
- return -EBUSY;
-
- mm->context.high_htlb_areas |= newareas;
-
- /* the context change must make it to memory before the flush,
- * so that further SLB misses do the right thing. */
- mb();
-
- fi.mm = mm;
- fi.newareas = newareas;
- on_each_cpu(flush_high_segments, &fi, 0, 1);
-
- return 0;
-}
-
-int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
-{
- int err = 0;
-
- if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
- return -EINVAL;
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
-
- if (addr < 0x100000000UL)
- err = open_low_hpage_areas(current->mm,
- LOW_ESID_MASK(addr, len));
- if ((addr + len) > 0x100000000UL)
- err = open_high_hpage_areas(current->mm,
- HTLB_AREA_MASK(addr, len));
-#ifdef CONFIG_SPE_BASE
- spu_flush_all_slbs(current->mm);
-#endif
- if (err) {
- printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
- " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
- addr, len,
- LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
- return err;
- }
-
- return 0;
-}
-
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
{
pte_t *ptep;
struct page *page;
- if (! in_hugepage_area(mm->context, address))
+ if (get_slice_psize(mm, address) != mmu_huge_psize)
return ERR_PTR(-EINVAL);
ptep = huge_pte_offset(mm, address);
@@ -552,359 +361,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
return NULL;
}
-/* Because we have an exclusive hugepage region which lies within the
- * normal user address space, we have to take special measures to make
- * non-huge mmap()s evade the hugepage reserved regions. */
-unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start_addr;
-
- if (len > TASK_SIZE)
- return -ENOMEM;
-
- /* handle fixed mapping: prevent overlap with huge pages */
- if (flags & MAP_FIXED) {
- if (is_hugepage_only_range(mm, addr, len))
- return -EINVAL;
- return addr;
- }
-
- if (addr) {
- addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
- if (((TASK_SIZE - len) >= addr)
- && (!vma || (addr+len) <= vma->vm_start)
- && !is_hugepage_only_range(mm, addr,len))
- return addr;
- }
- if (len > mm->cached_hole_size) {
- start_addr = addr = mm->free_area_cache;
- } else {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- }
-
-full_search:
- vma = find_vma(mm, addr);
- while (TASK_SIZE - len >= addr) {
- BUG_ON(vma && (addr >= vma->vm_end));
-
- if (touches_hugepage_low_range(mm, addr, len)) {
- addr = ALIGN(addr+1, 1<<SID_SHIFT);
- vma = find_vma(mm, addr);
- continue;
- }
- if (touches_hugepage_high_range(mm, addr, len)) {
- addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
- vma = find_vma(mm, addr);
- continue;
- }
- if (!vma || addr + len <= vma->vm_start) {
- /*
- * Remember the place where we stopped the search:
- */
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = vma->vm_end;
- vma = vma->vm_next;
- }
-
- /* Make sure we didn't miss any holes */
- if (start_addr != TASK_UNMAPPED_BASE) {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- goto full_search;
- }
- return -ENOMEM;
-}
-
-/*
- * This mmap-allocator allocates new areas top-down from below the
- * stack's low limit (the base):
- *
- * Because we have an exclusive hugepage region which lies within the
- * normal user address space, we have to take special measures to make
- * non-huge mmap()s evade the hugepage reserved regions.
- */
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
-{
- struct vm_area_struct *vma, *prev_vma;
- struct mm_struct *mm = current->mm;
- unsigned long base = mm->mmap_base, addr = addr0;
- unsigned long largest_hole = mm->cached_hole_size;
- int first_time = 1;
-
- /* requested length too big for entire address space */
- if (len > TASK_SIZE)
- return -ENOMEM;
-
- /* handle fixed mapping: prevent overlap with huge pages */
- if (flags & MAP_FIXED) {
- if (is_hugepage_only_range(mm, addr, len))
- return -EINVAL;
- return addr;
- }
-
- /* dont allow allocations above current base */
- if (mm->free_area_cache > base)
- mm->free_area_cache = base;
-
- /* requesting a specific address */
- if (addr) {
- addr = PAGE_ALIGN(addr);
- vma = find_vma(mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start)
- && !is_hugepage_only_range(mm, addr,len))
- return addr;
- }
-
- if (len <= largest_hole) {
- largest_hole = 0;
- mm->free_area_cache = base;
- }
-try_again:
- /* make sure it can fit in the remaining address space */
- if (mm->free_area_cache < len)
- goto fail;
-
- /* either no address requested or cant fit in requested address hole */
- addr = (mm->free_area_cache - len) & PAGE_MASK;
- do {
-hugepage_recheck:
- if (touches_hugepage_low_range(mm, addr, len)) {
- addr = (addr & ((~0) << SID_SHIFT)) - len;
- goto hugepage_recheck;
- } else if (touches_hugepage_high_range(mm, addr, len)) {
- addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
- goto hugepage_recheck;
- }
-
- /*
- * Lookup failure means no vma is above this address,
- * i.e. return with success:
- */
- if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
- return addr;
-
- /*
- * new region fits between prev_vma->vm_end and
- * vma->vm_start, use it:
- */
- if (addr+len <= vma->vm_start &&
- (!prev_vma || (addr >= prev_vma->vm_end))) {
- /* remember the address as a hint for next time */
- mm->cached_hole_size = largest_hole;
- return (mm->free_area_cache = addr);
- } else {
- /* pull free_area_cache down to the first hole */
- if (mm->free_area_cache == vma->vm_end) {
- mm->free_area_cache = vma->vm_start;
- mm->cached_hole_size = largest_hole;
- }
- }
-
- /* remember the largest hole we saw so far */
- if (addr + largest_hole < vma->vm_start)
- largest_hole = vma->vm_start - addr;
-
- /* try just below the current vma->vm_start */
- addr = vma->vm_start-len;
- } while (len <= vma->vm_start);
-
-fail:
- /*
- * if hint left us with no space for the requested
- * mapping then try again:
- */
- if (first_time) {
- mm->free_area_cache = base;
- largest_hole = 0;
- first_time = 0;
- goto try_again;
- }
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- mm->free_area_cache = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = ~0UL;
- addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
- /*
- * Restore the topdown base:
- */
- mm->free_area_cache = base;
- mm->cached_hole_size = ~0UL;
-
- return addr;
-}
-
-static int htlb_check_hinted_area(unsigned long addr, unsigned long len)
-{
- struct vm_area_struct *vma;
-
- vma = find_vma(current->mm, addr);
- if (TASK_SIZE - len >= addr &&
- (!vma || ((addr + len) <= vma->vm_start)))
- return 0;
-
- return -ENOMEM;
-}
-
-static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
-{
- unsigned long addr = 0;
- struct vm_area_struct *vma;
-
- vma = find_vma(current->mm, addr);
- while (addr + len <= 0x100000000UL) {
- BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-
- if (! __within_hugepage_low_range(addr, len, segmask)) {
- addr = ALIGN(addr+1, 1<<SID_SHIFT);
- vma = find_vma(current->mm, addr);
- continue;
- }
-
- if (!vma || (addr + len) <= vma->vm_start)
- return addr;
- addr = ALIGN(vma->vm_end, HPAGE_SIZE);
- /* Depending on segmask this might not be a confirmed
- * hugepage region, so the ALIGN could have skipped
- * some VMAs */
- vma = find_vma(current->mm, addr);
- }
-
- return -ENOMEM;
-}
-
-static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
-{
- unsigned long addr = 0x100000000UL;
- struct vm_area_struct *vma;
-
- vma = find_vma(current->mm, addr);
- while (addr + len <= TASK_SIZE_USER64) {
- BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
-
- if (! __within_hugepage_high_range(addr, len, areamask)) {
- addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
- vma = find_vma(current->mm, addr);
- continue;
- }
-
- if (!vma || (addr + len) <= vma->vm_start)
- return addr;
- addr = ALIGN(vma->vm_end, HPAGE_SIZE);
- /* Depending on segmask this might not be a confirmed
- * hugepage region, so the ALIGN could have skipped
- * some VMAs */
- vma = find_vma(current->mm, addr);
- }
-
- return -ENOMEM;
-}
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
- int lastshift;
- u16 areamask, curareas;
-
- if (HPAGE_SHIFT == 0)
- return -EINVAL;
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (len > TASK_SIZE)
- return -ENOMEM;
-
- if (!cpu_has_feature(CPU_FTR_16M_PAGE))
- return -EINVAL;
-
- /* Paranoia, caller should have dealt with this */
- BUG_ON((addr + len) < addr);
-
- /* Handle MAP_FIXED */
- if (flags & MAP_FIXED) {
- if (prepare_hugepage_range(addr, len, pgoff))
- return -EINVAL;
- return addr;
- }
-
- if (test_thread_flag(TIF_32BIT)) {
- curareas = current->mm->context.low_htlb_areas;
-
- /* First see if we can use the hint address */
- if (addr && (htlb_check_hinted_area(addr, len) == 0)) {
- areamask = LOW_ESID_MASK(addr, len);
- if (open_low_hpage_areas(current->mm, areamask) == 0)
- return addr;
- }
-
- /* Next see if we can map in the existing low areas */
- addr = htlb_get_low_area(len, curareas);
- if (addr != -ENOMEM)
- return addr;
-
- /* Finally go looking for areas to open */
- lastshift = 0;
- for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
- ! lastshift; areamask >>=1) {
- if (areamask & 1)
- lastshift = 1;
-
- addr = htlb_get_low_area(len, curareas | areamask);
- if ((addr != -ENOMEM)
- && open_low_hpage_areas(current->mm, areamask) == 0)
- return addr;
- }
- } else {
- curareas = current->mm->context.high_htlb_areas;
-
- /* First see if we can use the hint address */
- /* We discourage 64-bit processes from doing hugepage
- * mappings below 4GB (must use MAP_FIXED) */
- if ((addr >= 0x100000000UL)
- && (htlb_check_hinted_area(addr, len) == 0)) {
- areamask = HTLB_AREA_MASK(addr, len);
- if (open_high_hpage_areas(current->mm, areamask) == 0)
- return addr;
- }
-
- /* Next see if we can map in the existing high areas */
- addr = htlb_get_high_area(len, curareas);
- if (addr != -ENOMEM)
- return addr;
-
- /* Finally go looking for areas to open */
- lastshift = 0;
- for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
- ! lastshift; areamask >>=1) {
- if (areamask & 1)
- lastshift = 1;
-
- addr = htlb_get_high_area(len, curareas | areamask);
- if ((addr != -ENOMEM)
- && open_high_hpage_areas(current->mm, areamask) == 0)
- return addr;
- }
- }
- printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
- " enough areas\n");
- return -ENOMEM;
+ return slice_get_unmapped_area(addr, len, flags,
+ mmu_huge_psize, 1, 0);
}
/*
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 4416d5140c53..7312a265545f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -146,21 +146,16 @@ static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags)
memset(addr, 0, kmem_cache_size(cache));
}
-#ifdef CONFIG_PPC_64K_PAGES
-static const unsigned int pgtable_cache_size[3] = {
- PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE
-};
-static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
- "pte_pmd_cache", "pmd_cache", "pgd_cache",
-};
-#else
static const unsigned int pgtable_cache_size[2] = {
- PTE_TABLE_SIZE, PMD_TABLE_SIZE
+ PGD_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
- "pgd_pte_cache", "pud_pmd_cache",
-};
+#ifdef CONFIG_PPC_64K_PAGES
+ "pgd_cache", "pmd_cache",
+#else
+ "pgd_cache", "pud_pmd_cache",
#endif /* CONFIG_PPC_64K_PAGES */
+};
#ifdef CONFIG_HUGETLB_PAGE
/* Hugepages need one extra cache, initialized in hugetlbpage.c. We
@@ -183,11 +178,8 @@ void pgtable_cache_init(void)
"for size: %08x...\n", name, i, size);
pgtable_cache[i] = kmem_cache_create(name,
size, size,
- 0,
+ SLAB_PANIC,
zero_ctor,
NULL);
- if (! pgtable_cache[i])
- panic("pgtable_cache_init(): could not create %s!\n",
- name);
}
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c4bcd7546424..246eeea40ece 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
#include <linux/highmem.h>
#include <linux/initrd.h>
#include <linux/pagemap.h>
+#include <linux/suspend.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -80,7 +81,6 @@ int page_is_ram(unsigned long pfn)
return 0;
#endif
}
-EXPORT_SYMBOL(page_is_ram);
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
@@ -277,6 +277,28 @@ void __init do_init_bootmem(void)
init_bootmem_done = 1;
}
+/* mark pages that don't exist as nosave */
+static int __init mark_nonram_nosave(void)
+{
+ unsigned long lmb_next_region_start_pfn,
+ lmb_region_max_pfn;
+ int i;
+
+ for (i = 0; i < lmb.memory.cnt - 1; i++) {
+ lmb_region_max_pfn =
+ (lmb.memory.region[i].base >> PAGE_SHIFT) +
+ (lmb.memory.region[i].size >> PAGE_SHIFT);
+ lmb_next_region_start_pfn =
+ lmb.memory.region[i+1].base >> PAGE_SHIFT;
+
+ if (lmb_region_max_pfn < lmb_next_region_start_pfn)
+ register_nosave_region(lmb_region_max_pfn,
+ lmb_next_region_start_pfn);
+ }
+
+ return 0;
+}
+
/*
* paging_init() sets up the page tables - in fact we've already done this.
*/
@@ -308,6 +330,8 @@ void __init paging_init(void)
max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
#endif
free_area_init_nodes(max_zone_pfns);
+
+ mark_nonram_nosave();
}
#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c
index 90a06ac02d5e..7a78cdc0515a 100644
--- a/arch/powerpc/mm/mmu_context_64.c
+++ b/arch/powerpc/mm/mmu_context_64.c
@@ -28,6 +28,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
int index;
int err;
+ int new_context = (mm->context.id == 0);
again:
if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
@@ -50,9 +51,18 @@ again:
}
mm->context.id = index;
+#ifdef CONFIG_PPC_MM_SLICES
+ /* The old code would re-promote on fork, we don't do that
+ * when using slices as it could cause problem promoting slices
+ * that have been forced down to 4K
+ */
+ if (new_context)
+ slice_set_user_psize(mm, mmu_virtual_psize);
+#else
mm->context.user_psize = mmu_virtual_psize;
mm->context.sllp = SLB_VSID_USER |
mmu_psize_defs[mmu_virtual_psize].sllp;
+#endif
return 0;
}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9c4538bb04b0..2558c34eedaa 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -40,7 +40,8 @@ extern int __map_without_bats;
extern unsigned long ioremap_base;
extern unsigned int rtas_data, rtas_size;
-extern PTE *Hash, *Hash_end;
+struct _PTE;
+extern struct _PTE *Hash, *Hash_end;
extern unsigned long Hash_size, Hash_mask;
extern unsigned int num_tlbcam_entries;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b3a592b25ab3..de45aa82d97b 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -252,12 +252,15 @@ static int __cpuinit cpu_numa_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
numa_setup_cpu(lcpu);
ret = NOTIFY_OK;
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
unmap_cpu_from_node(lcpu);
break;
ret = NOTIFY_OK;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index bca560374927..d8232b7a08f7 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -261,7 +261,7 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
int err = -ENOMEM;
/* Use upper 10 bits of VA to index the first level map */
- pd = pmd_offset(pgd_offset_k(va), va);
+ pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va);
/* Use middle 10 bits of VA to index the second-level map */
pg = pte_alloc_kernel(pd, va);
if (pg != 0) {
@@ -354,23 +354,27 @@ int
get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp)
{
pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int retval = 0;
pgd = pgd_offset(mm, addr & PAGE_MASK);
if (pgd) {
- pmd = pmd_offset(pgd, addr & PAGE_MASK);
- if (pmd_present(*pmd)) {
- pte = pte_offset_map(pmd, addr & PAGE_MASK);
- if (pte) {
- retval = 1;
- *ptep = pte;
- if (pmdp)
- *pmdp = pmd;
- /* XXX caller needs to do pte_unmap, yuck */
- }
- }
+ pud = pud_offset(pgd, addr & PAGE_MASK);
+ if (pud && pud_present(*pud)) {
+ pmd = pmd_offset(pud, addr & PAGE_MASK);
+ if (pmd_present(*pmd)) {
+ pte = pte_offset_map(pmd, addr & PAGE_MASK);
+ if (pte) {
+ retval = 1;
+ *ptep = pte;
+ if (pmdp)
+ *pmdp = pmd;
+ /* XXX caller needs to do pte_unmap, yuck */
+ }
+ }
+ }
}
return(retval);
}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 05066674a7a0..ec1421a20aaa 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -185,7 +185,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
if (Hash == 0)
return;
- pmd = pmd_offset(pgd_offset(mm, ea), ea);
+ pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea);
if (!pmd_none(*pmd))
add_hash_page(mm->context.id, ea, pmd_val(*pmd));
}
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 224e960650a0..304375a73574 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -198,12 +198,6 @@ void slb_initialize(void)
static int slb_encoding_inited;
extern unsigned int *slb_miss_kernel_load_linear;
extern unsigned int *slb_miss_kernel_load_io;
-#ifdef CONFIG_HUGETLB_PAGE
- extern unsigned int *slb_miss_user_load_huge;
- unsigned long huge_llp;
-
- huge_llp = mmu_psize_defs[mmu_huge_psize].sllp;
-#endif
/* Prepare our SLB miss handler based on our page size */
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -220,11 +214,6 @@ void slb_initialize(void)
DBG("SLB: linear LLP = %04x\n", linear_llp);
DBG("SLB: io LLP = %04x\n", io_llp);
-#ifdef CONFIG_HUGETLB_PAGE
- patch_slb_encoding(slb_miss_user_load_huge,
- SLB_VSID_USER | huge_llp);
- DBG("SLB: huge LLP = %04x\n", huge_llp);
-#endif
}
get_paca()->stab_rr = SLB_NUM_BOLTED;
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index b10e4707d7c1..cd1a93d4948c 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -82,31 +82,45 @@ _GLOBAL(slb_miss_kernel_load_io)
srdi. r9,r10,USER_ESID_BITS
bne- 8f /* invalid ea bits set */
- /* Figure out if the segment contains huge pages */
-#ifdef CONFIG_HUGETLB_PAGE
-BEGIN_FTR_SECTION
- b 1f
-END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
+
+ /* when using slices, we extract the psize off the slice bitmaps
+ * and then we need to get the sllp encoding off the mmu_psize_defs
+ * array.
+ *
+ * XXX This is a bit inefficient especially for the normal case,
+ * so we should try to implement a fast path for the standard page
+ * size using the old sllp value so we avoid the array. We cannot
+ * really do dynamic patching unfortunately as processes might flip
+ * between 4k and 64k standard page size
+ */
+#ifdef CONFIG_PPC_MM_SLICES
cmpldi r10,16
- lhz r9,PACALOWHTLBAREAS(r13)
- mr r11,r10
+ /* Get the slice index * 4 in r11 and matching slice size mask in r9 */
+ ld r9,PACALOWSLICESPSIZE(r13)
+ sldi r11,r10,2
blt 5f
+ ld r9,PACAHIGHSLICEPSIZE(r13)
+ srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
+ andi. r11,r11,0x3c
- lhz r9,PACAHIGHHTLBAREAS(r13)
- srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT)
-
-5: srd r9,r9,r11
- andi. r9,r9,1
- beq 1f
-_GLOBAL(slb_miss_user_load_huge)
- li r11,0
- b 2f
-1:
-#endif /* CONFIG_HUGETLB_PAGE */
+5: /* Extract the psize and multiply to get an array offset */
+ srd r9,r9,r11
+ andi. r9,r9,0xf
+ mulli r9,r9,MMUPSIZEDEFSIZE
+ /* Now get to the array and obtain the sllp
+ */
+ ld r11,PACATOC(r13)
+ ld r11,mmu_psize_defs@got(r11)
+ add r11,r11,r9
+ ld r11,MMUPSIZESLLP(r11)
+ ori r11,r11,SLB_VSID_USER
+#else
+ /* paca context sllp already contains the SLB_VSID_USER bits */
lhz r11,PACACONTEXTSLLP(r13)
-2:
+#endif /* CONFIG_PPC_MM_SLICES */
+
ld r9,PACACONTEXTID(r13)
rldimi r10,r9,USER_ESID_BITS,0
b slb_finish_load
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
new file mode 100644
index 000000000000..f833dba2a028
--- /dev/null
+++ b/arch/powerpc/mm/slice.c
@@ -0,0 +1,633 @@
+/*
+ * address space "slices" (meta-segments) support
+ *
+ * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * Based on hugetlb implementation
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/mman.h>
+#include <asm/mmu.h>
+#include <asm/spu.h>
+
+static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED;
+
+
+#ifdef DEBUG
+int _slice_debug = 1;
+
+static void slice_print_mask(const char *label, struct slice_mask mask)
+{
+ char *p, buf[16 + 3 + 16 + 1];
+ int i;
+
+ if (!_slice_debug)
+ return;
+ p = buf;
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0';
+ *(p++) = ' ';
+ *(p++) = '-';
+ *(p++) = ' ';
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0';
+ *(p++) = 0;
+
+ printk(KERN_DEBUG "%s:%s\n", label, buf);
+}
+
+#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0)
+
+#else
+
+static void slice_print_mask(const char *label, struct slice_mask mask) {}
+#define slice_dbg(fmt...)
+
+#endif
+
+static struct slice_mask slice_range_to_mask(unsigned long start,
+ unsigned long len)
+{
+ unsigned long end = start + len - 1;
+ struct slice_mask ret = { 0, 0 };
+
+ if (start < SLICE_LOW_TOP) {
+ unsigned long mend = min(end, SLICE_LOW_TOP);
+ unsigned long mstart = min(start, SLICE_LOW_TOP);
+
+ ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+ - (1u << GET_LOW_SLICE_INDEX(mstart));
+ }
+
+ if ((start + len) > SLICE_LOW_TOP)
+ ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1))
+ - (1u << GET_HIGH_SLICE_INDEX(start));
+
+ return ret;
+}
+
+static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
+ unsigned long len)
+{
+ struct vm_area_struct *vma;
+
+ if ((mm->task_size - len) < addr)
+ return 0;
+ vma = find_vma(mm, addr);
+ return (!vma || (addr + len) <= vma->vm_start);
+}
+
+static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+ return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT,
+ 1ul << SLICE_LOW_SHIFT);
+}
+
+static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+ unsigned long start = slice << SLICE_HIGH_SHIFT;
+ unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
+
+ /* Hack, so that each addresses is controlled by exactly one
+ * of the high or low area bitmaps, the first high area starts
+ * at 4GB, not 0 */
+ if (start == 0)
+ start = SLICE_LOW_TOP;
+
+ return !slice_area_is_free(mm, start, end - start);
+}
+
+static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
+{
+ struct slice_mask ret = { 0, 0 };
+ unsigned long i;
+
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ if (!slice_low_has_vma(mm, i))
+ ret.low_slices |= 1u << i;
+
+ if (mm->task_size <= SLICE_LOW_TOP)
+ return ret;
+
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ if (!slice_high_has_vma(mm, i))
+ ret.high_slices |= 1u << i;
+
+ return ret;
+}
+
+static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+ struct slice_mask ret = { 0, 0 };
+ unsigned long i;
+ u64 psizes;
+
+ psizes = mm->context.low_slices_psize;
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ if (((psizes >> (i * 4)) & 0xf) == psize)
+ ret.low_slices |= 1u << i;
+
+ psizes = mm->context.high_slices_psize;
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ if (((psizes >> (i * 4)) & 0xf) == psize)
+ ret.high_slices |= 1u << i;
+
+ return ret;
+}
+
+static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
+{
+ return (mask.low_slices & available.low_slices) == mask.low_slices &&
+ (mask.high_slices & available.high_slices) == mask.high_slices;
+}
+
+static void slice_flush_segments(void *parm)
+{
+ struct mm_struct *mm = parm;
+ unsigned long flags;
+
+ if (mm != current->active_mm)
+ return;
+
+ /* update the paca copy of the context struct */
+ get_paca()->context = current->active_mm->context;
+
+ local_irq_save(flags);
+ slb_flush_and_rebolt();
+ local_irq_restore(flags);
+}
+
+static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+{
+ /* Write the new slice psize bits */
+ u64 lpsizes, hpsizes;
+ unsigned long i, flags;
+
+ slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
+ slice_print_mask(" mask", mask);
+
+ /* We need to use a spinlock here to protect against
+ * concurrent 64k -> 4k demotion ...
+ */
+ spin_lock_irqsave(&slice_convert_lock, flags);
+
+ lpsizes = mm->context.low_slices_psize;
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ if (mask.low_slices & (1u << i))
+ lpsizes = (lpsizes & ~(0xful << (i * 4))) |
+ (((unsigned long)psize) << (i * 4));
+
+ hpsizes = mm->context.high_slices_psize;
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ if (mask.high_slices & (1u << i))
+ hpsizes = (hpsizes & ~(0xful << (i * 4))) |
+ (((unsigned long)psize) << (i * 4));
+
+ mm->context.low_slices_psize = lpsizes;
+ mm->context.high_slices_psize = hpsizes;
+
+ slice_dbg(" lsps=%lx, hsps=%lx\n",
+ mm->context.low_slices_psize,
+ mm->context.high_slices_psize);
+
+ spin_unlock_irqrestore(&slice_convert_lock, flags);
+ mb();
+
+ /* XXX this is sub-optimal but will do for now */
+ on_each_cpu(slice_flush_segments, mm, 0, 1);
+#ifdef CONFIG_SPU_BASE
+ spu_flush_all_slbs(mm);
+#endif
+}
+
+static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
+ unsigned long len,
+ struct slice_mask available,
+ int psize, int use_cache)
+{
+ struct vm_area_struct *vma;
+ unsigned long start_addr, addr;
+ struct slice_mask mask;
+ int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+
+ if (use_cache) {
+ if (len <= mm->cached_hole_size) {
+ start_addr = addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
+ } else
+ start_addr = addr = mm->free_area_cache;
+ } else
+ start_addr = addr = TASK_UNMAPPED_BASE;
+
+full_search:
+ for (;;) {
+ addr = _ALIGN_UP(addr, 1ul << pshift);
+ if ((TASK_SIZE - len) < addr)
+ break;
+ vma = find_vma(mm, addr);
+ BUG_ON(vma && (addr >= vma->vm_end));
+
+ mask = slice_range_to_mask(addr, len);
+ if (!slice_check_fit(mask, available)) {
+ if (addr < SLICE_LOW_TOP)
+ addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT);
+ else
+ addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT);
+ continue;
+ }
+ if (!vma || addr + len <= vma->vm_start) {
+ /*
+ * Remember the place where we stopped the search:
+ */
+ if (use_cache)
+ mm->free_area_cache = addr + len;
+ return addr;
+ }
+ if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+ addr = vma->vm_end;
+ }
+
+ /* Make sure we didn't miss any holes */
+ if (use_cache && start_addr != TASK_UNMAPPED_BASE) {
+ start_addr = addr = TASK_UNMAPPED_BASE;
+ mm->cached_hole_size = 0;
+ goto full_search;
+ }
+ return -ENOMEM;
+}
+
+static unsigned long slice_find_area_topdown(struct mm_struct *mm,
+ unsigned long len,
+ struct slice_mask available,
+ int psize, int use_cache)
+{
+ struct vm_area_struct *vma;
+ unsigned long addr;
+ struct slice_mask mask;
+ int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+
+ /* check if free_area_cache is useful for us */
+ if (use_cache) {
+ if (len <= mm->cached_hole_size) {
+ mm->cached_hole_size = 0;
+ mm->free_area_cache = mm->mmap_base;
+ }
+
+ /* either no address requested or can't fit in requested
+ * address hole
+ */
+ addr = mm->free_area_cache;
+
+ /* make sure it can fit in the remaining address space */
+ if (addr > len) {
+ addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
+ mask = slice_range_to_mask(addr, len);
+ if (slice_check_fit(mask, available) &&
+ slice_area_is_free(mm, addr, len))
+ /* remember the address as a hint for
+ * next time
+ */
+ return (mm->free_area_cache = addr);
+ }
+ }
+
+ addr = mm->mmap_base;
+ while (addr > len) {
+ /* Go down by chunk size */
+ addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
+
+ /* Check for hit with different page size */
+ mask = slice_range_to_mask(addr, len);
+ if (!slice_check_fit(mask, available)) {
+ if (addr < SLICE_LOW_TOP)
+ addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT);
+ else if (addr < (1ul << SLICE_HIGH_SHIFT))
+ addr = SLICE_LOW_TOP;
+ else
+ addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT);
+ continue;
+ }
+
+ /*
+ * Lookup failure means no vma is above this address,
+ * else if new region fits below vma->vm_start,
+ * return with success:
+ */
+ vma = find_vma(mm, addr);
+ if (!vma || (addr + len) <= vma->vm_start) {
+ /* remember the address as a hint for next time */
+ if (use_cache)
+ mm->free_area_cache = addr;
+ return addr;
+ }
+
+ /* remember the largest hole we saw so far */
+ if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
+ mm->cached_hole_size = vma->vm_start - addr;
+
+ /* try just below the current vma->vm_start */
+ addr = vma->vm_start;
+ }
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ addr = slice_find_area_bottomup(mm, len, available, psize, 0);
+
+ /*
+ * Restore the topdown base:
+ */
+ if (use_cache) {
+ mm->free_area_cache = mm->mmap_base;
+ mm->cached_hole_size = ~0UL;
+ }
+
+ return addr;
+}
+
+
+static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
+ struct slice_mask mask, int psize,
+ int topdown, int use_cache)
+{
+ if (topdown)
+ return slice_find_area_topdown(mm, len, mask, psize, use_cache);
+ else
+ return slice_find_area_bottomup(mm, len, mask, psize, use_cache);
+}
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown, int use_cache)
+{
+ struct slice_mask mask;
+ struct slice_mask good_mask;
+ struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
+ int pmask_set = 0;
+ int fixed = (flags & MAP_FIXED);
+ int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+ struct mm_struct *mm = current->mm;
+
+ /* Sanity checks */
+ BUG_ON(mm->task_size == 0);
+
+ slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
+ slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n",
+ addr, len, flags, topdown, use_cache);
+
+ if (len > mm->task_size)
+ return -ENOMEM;
+ if (fixed && (addr & ((1ul << pshift) - 1)))
+ return -EINVAL;
+ if (fixed && addr > (mm->task_size - len))
+ return -EINVAL;
+
+ /* If hint, make sure it matches our alignment restrictions */
+ if (!fixed && addr) {
+ addr = _ALIGN_UP(addr, 1ul << pshift);
+ slice_dbg(" aligned addr=%lx\n", addr);
+ }
+
+ /* First makeup a "good" mask of slices that have the right size
+ * already
+ */
+ good_mask = slice_mask_for_size(mm, psize);
+ slice_print_mask(" good_mask", good_mask);
+
+ /* First check hint if it's valid or if we have MAP_FIXED */
+ if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) {
+
+ /* Don't bother with hint if it overlaps a VMA */
+ if (!fixed && !slice_area_is_free(mm, addr, len))
+ goto search;
+
+ /* Build a mask for the requested range */
+ mask = slice_range_to_mask(addr, len);
+ slice_print_mask(" mask", mask);
+
+ /* Check if we fit in the good mask. If we do, we just return,
+ * nothing else to do
+ */
+ if (slice_check_fit(mask, good_mask)) {
+ slice_dbg(" fits good !\n");
+ return addr;
+ }
+
+ /* We don't fit in the good mask, check what other slices are
+ * empty and thus can be converted
+ */
+ potential_mask = slice_mask_for_free(mm);
+ potential_mask.low_slices |= good_mask.low_slices;
+ potential_mask.high_slices |= good_mask.high_slices;
+ pmask_set = 1;
+ slice_print_mask(" potential", potential_mask);
+ if (slice_check_fit(mask, potential_mask)) {
+ slice_dbg(" fits potential !\n");
+ goto convert;
+ }
+ }
+
+ /* If we have MAP_FIXED and failed the above step, then error out */
+ if (fixed)
+ return -EBUSY;
+
+ search:
+ slice_dbg(" search...\n");
+
+ /* Now let's see if we can find something in the existing slices
+ * for that size
+ */
+ addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache);
+ if (addr != -ENOMEM) {
+ /* Found within the good mask, we don't have to setup,
+ * we thus return directly
+ */
+ slice_dbg(" found area at 0x%lx\n", addr);
+ return addr;
+ }
+
+ /* Won't fit, check what can be converted */
+ if (!pmask_set) {
+ potential_mask = slice_mask_for_free(mm);
+ potential_mask.low_slices |= good_mask.low_slices;
+ potential_mask.high_slices |= good_mask.high_slices;
+ pmask_set = 1;
+ slice_print_mask(" potential", potential_mask);
+ }
+
+ /* Now let's see if we can find something in the existing slices
+ * for that size
+ */
+ addr = slice_find_area(mm, len, potential_mask, psize, topdown,
+ use_cache);
+ if (addr == -ENOMEM)
+ return -ENOMEM;
+
+ mask = slice_range_to_mask(addr, len);
+ slice_dbg(" found potential area at 0x%lx\n", addr);
+ slice_print_mask(" mask", mask);
+
+ convert:
+ slice_convert(mm, mask, psize);
+ return addr;
+
+}
+EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
+
+unsigned long arch_get_unmapped_area(struct file *filp,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags)
+{
+ return slice_get_unmapped_area(addr, len, flags,
+ current->mm->context.user_psize,
+ 0, 1);
+}
+
+unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+ const unsigned long addr0,
+ const unsigned long len,
+ const unsigned long pgoff,
+ const unsigned long flags)
+{
+ return slice_get_unmapped_area(addr0, len, flags,
+ current->mm->context.user_psize,
+ 1, 1);
+}
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+{
+ u64 psizes;
+ int index;
+
+ if (addr < SLICE_LOW_TOP) {
+ psizes = mm->context.low_slices_psize;
+ index = GET_LOW_SLICE_INDEX(addr);
+ } else {
+ psizes = mm->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
+ }
+
+ return (psizes >> (index * 4)) & 0xf;
+}
+EXPORT_SYMBOL_GPL(get_slice_psize);
+
+/*
+ * This is called by hash_page when it needs to do a lazy conversion of
+ * an address space from real 64K pages to combo 4K pages (typically
+ * when hitting a non cacheable mapping on a processor or hypervisor
+ * that won't allow them for 64K pages).
+ *
+ * This is also called in init_new_context() to change back the user
+ * psize from whatever the parent context had it set to
+ *
+ * This function will only change the content of the {low,high)_slice_psize
+ * masks, it will not flush SLBs as this shall be handled lazily by the
+ * caller.
+ */
+void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
+{
+ unsigned long flags, lpsizes, hpsizes;
+ unsigned int old_psize;
+ int i;
+
+ slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
+
+ spin_lock_irqsave(&slice_convert_lock, flags);
+
+ old_psize = mm->context.user_psize;
+ slice_dbg(" old_psize=%d\n", old_psize);
+ if (old_psize == psize)
+ goto bail;
+
+ mm->context.user_psize = psize;
+ wmb();
+
+ lpsizes = mm->context.low_slices_psize;
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
+ lpsizes = (lpsizes & ~(0xful << (i * 4))) |
+ (((unsigned long)psize) << (i * 4));
+
+ hpsizes = mm->context.high_slices_psize;
+ for (i = 0; i < SLICE_NUM_HIGH; i++)
+ if (((hpsizes >> (i * 4)) & 0xf) == old_psize)
+ hpsizes = (hpsizes & ~(0xful << (i * 4))) |
+ (((unsigned long)psize) << (i * 4));
+
+ mm->context.low_slices_psize = lpsizes;
+ mm->context.high_slices_psize = hpsizes;
+
+ slice_dbg(" lsps=%lx, hsps=%lx\n",
+ mm->context.low_slices_psize,
+ mm->context.high_slices_psize);
+
+ bail:
+ spin_unlock_irqrestore(&slice_convert_lock, flags);
+}
+
+/*
+ * is_hugepage_only_range() is used by generic code to verify wether
+ * a normal mmap mapping (non hugetlbfs) is valid on a given area.
+ *
+ * until the generic code provides a more generic hook and/or starts
+ * calling arch get_unmapped_area for MAP_FIXED (which our implementation
+ * here knows how to deal with), we hijack it to keep standard mappings
+ * away from us.
+ *
+ * because of that generic code limitation, MAP_FIXED mapping cannot
+ * "convert" back a slice with no VMAs to the standard page size, only
+ * get_unmapped_area() can. It would be possible to fix it here but I
+ * prefer working on fixing the generic code instead.
+ *
+ * WARNING: This will not work if hugetlbfs isn't enabled since the
+ * generic code will redefine that function as 0 in that. This is ok
+ * for now as we only use slices with hugetlbfs enabled. This should
+ * be fixed as the generic code gets fixed.
+ */
+int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long len)
+{
+ struct slice_mask mask, available;
+
+ mask = slice_range_to_mask(addr, len);
+ available = slice_mask_for_size(mm, mm->context.user_psize);
+
+#if 0 /* too verbose */
+ slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
+ mm, addr, len);
+ slice_print_mask(" mask", mask);
+ slice_print_mask(" available", available);
+#endif
+ return !slice_check_fit(mask, available);
+}
+
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c
index eeeacab548e6..132c6bc66ce1 100644
--- a/arch/powerpc/mm/stab.c
+++ b/arch/powerpc/mm/stab.c
@@ -227,7 +227,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm)
* the first (bolted) segment, so that do_stab_bolted won't get a
* recursive segment miss on the segment table itself.
*/
-void stabs_alloc(void)
+void __init stabs_alloc(void)
{
int cpu;
diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c
index 925ff70be8ba..6a69417cbc0e 100644
--- a/arch/powerpc/mm/tlb_32.c
+++ b/arch/powerpc/mm/tlb_32.c
@@ -111,7 +111,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start,
if (start >= end)
return;
end = (end - 1) | ~PAGE_MASK;
- pmd = pmd_offset(pgd_offset(mm, start), start);
+ pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start);
for (;;) {
pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
if (pmd_end > end)
@@ -169,7 +169,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
return;
}
mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
- pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
+ pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr);
if (!pmd_none(*pmd))
flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
FINISH_FLUSH;
diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c
index fd8d08c325eb..2bfc4d7e1aa2 100644
--- a/arch/powerpc/mm/tlb_64.c
+++ b/arch/powerpc/mm/tlb_64.c
@@ -143,16 +143,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
*/
addr &= PAGE_MASK;
- /* Get page size (maybe move back to caller) */
+ /* Get page size (maybe move back to caller).
+ *
+ * NOTE: when using special 64K mappings in 4K environment like
+ * for SPEs, we obtain the page size from the slice, which thus
+ * must still exist (and thus the VMA not reused) at the time
+ * of this call
+ */
if (huge) {
#ifdef CONFIG_HUGETLB_PAGE
psize = mmu_huge_psize;
#else
BUG();
- psize = pte_pagesize_index(pte); /* shutup gcc */
+ psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
#endif
} else
- psize = pte_pagesize_index(pte);
+ psize = pte_pagesize_index(mm, addr, pte);
/* Build full vaddr */
if (!is_kernel_addr(addr)) {