diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/44x_mmu.c | 82 | ||||
-rw-r--r-- | arch/powerpc/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 42 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_64.S | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 82 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 144 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 549 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 22 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 26 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_64.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 28 | ||||
-rw-r--r-- | arch/powerpc/mm/ppc_mmu_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 11 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 52 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 633 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_32.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_64.c | 12 |
20 files changed, 936 insertions, 777 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 0a0a0487b334..ca4dcb07a939 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -24,73 +24,38 @@ * */ -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/stddef.h> -#include <linux/vmalloc.h> #include <linux/init.h> -#include <linux/delay.h> -#include <linux/highmem.h> - -#include <asm/pgalloc.h> -#include <asm/prom.h> -#include <asm/io.h> -#include <asm/mmu_context.h> -#include <asm/pgtable.h> #include <asm/mmu.h> -#include <asm/uaccess.h> -#include <asm/smp.h> -#include <asm/bootx.h> -#include <asm/machdep.h> -#include <asm/setup.h> +#include <asm/system.h> +#include <asm/page.h> #include "mmu_decl.h" -extern char etext[], _stext[]; - /* Used by the 44x TLB replacement exception handler. * Just needed it declared someplace. */ -unsigned int tlb_44x_index = 0; -unsigned int tlb_44x_hwater = 62; +unsigned int tlb_44x_index; /* = 0 */ +unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS; /* * "Pins" a 256MB TLB entry in AS0 for kernel lowmem */ -static void __init -ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys) +static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) { - unsigned long attrib = 0; - - __asm__ __volatile__("\ - clrrwi %2,%2,10\n\ - ori %2,%2,%4\n\ - clrrwi %1,%1,10\n\ - li %0,0\n\ - ori %0,%0,%5\n\ - tlbwe %2,%3,%6\n\ - tlbwe %1,%3,%7\n\ - tlbwe %0,%3,%8" + __asm__ __volatile__( + "tlbwe %2,%3,%4\n" + "tlbwe %1,%3,%5\n" + "tlbwe %0,%3,%6\n" : - : "r" (attrib), "r" (phys), "r" (virt), "r" (slot), - "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M), - "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), + : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), + "r" (phys), + "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), + "r" (tlb_44x_hwater--), /* slot for this TLB entry */ "i" (PPC44x_TLB_PAGEID), "i" (PPC44x_TLB_XLAT), "i" (PPC44x_TLB_ATTRIB)); } -/* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. - */ void __init MMU_init_hw(void) { flush_instruction_cache(); @@ -98,22 +63,13 @@ void __init MMU_init_hw(void) unsigned long __init mmu_mapin_ram(void) { - unsigned int pinned_tlbs = 1; - int i; - - /* Determine number of entries necessary to cover lowmem */ - pinned_tlbs = (unsigned int) - (_ALIGN(total_lowmem, PPC_PIN_SIZE) >> PPC44x_PIN_SHIFT); - - /* Write upper watermark to save location */ - tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs; + unsigned long addr; - /* If necessary, set additional pinned TLBs */ - if (pinned_tlbs > 1) - for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) { - unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC_PIN_SIZE; - ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr); - } + /* Pin in enough TLBs to cover any lowmem not covered by the + * initial 256M mapping established in head_44x.S */ + for (addr = PPC_PIN_SIZE; addr < total_lowmem; + addr += PPC_PIN_SIZE) + ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); return total_lowmem; } diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 38a81967ca07..4f839c6a9768 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -18,4 +18,5 @@ obj-$(CONFIG_40x) += 4xx_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o +obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 03aeb3a46077..bfe901353142 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -28,6 +28,7 @@ #include <linux/highmem.h> #include <linux/module.h> #include <linux/kprobes.h> +#include <linux/kdebug.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -36,40 +37,28 @@ #include <asm/system.h> #include <asm/uaccess.h> #include <asm/tlbflush.h> -#include <asm/kdebug.h> #include <asm/siginfo.h> -#ifdef CONFIG_KPROBES -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); -/* Hook to register for page fault notifications */ -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} + int ret = 0; -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -} + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 11)) + ret = 1; + preempt_enable(); + } -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) -{ - struct die_args args = { - .regs = regs, - .str = str, - .err = err, - .trapnr = trap, - .signr = sig - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); + return ret; } #else -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) +static inline int notify_page_fault(struct pt_regs *regs) { - return NOTIFY_DONE; + return 0; } #endif @@ -175,8 +164,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, is_write = error_code & ESR_DST; #endif /* CONFIG_4xx || CONFIG_BOOKE */ - if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs, error_code, - 11, SIGSEGV) == NOTIFY_STOP) + if (notify_page_fault(regs)) return 0; if (trap == 0x300) { diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index e64ce3eec36e..4762ff7c14df 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -615,6 +615,9 @@ htab_pte_insert_failure: li r3,-1 b htab_bail +#endif /* CONFIG_PPC_64K_PAGES */ + +#ifdef CONFIG_PPC_HAS_HASH_64K /***************************************************************************** * * @@ -870,7 +873,7 @@ ht64_pte_insert_failure: b ht64_bail -#endif /* CONFIG_PPC_64K_PAGES */ +#endif /* CONFIG_PPC_HAS_HASH_64K */ /***************************************************************************** diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 79aedaf36f2b..7b7fe2d7b9dc 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -26,6 +26,7 @@ #include <asm/tlb.h> #include <asm/cputable.h> #include <asm/udbg.h> +#include <asm/kexec.h> #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) @@ -340,31 +341,70 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, local_irq_restore(flags); } -/* - * XXX This need fixing based on page size. It's only used by - * native_hpte_clear() for now which needs fixing too so they - * make a good pair... - */ -static unsigned long slot2va(unsigned long hpte_v, unsigned long slot) +#define LP_SHIFT 12 +#define LP_BITS 8 +#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT) + +static void hpte_decode(hpte_t *hpte, unsigned long slot, + int *psize, unsigned long *va) { - unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v); - unsigned long va; + unsigned long hpte_r = hpte->r; + unsigned long hpte_v = hpte->v; + unsigned long avpn; + int i, size, shift, penc, avpnm_bits; + + if (!(hpte_v & HPTE_V_LARGE)) + size = MMU_PAGE_4K; + else { + for (i = 0; i < LP_BITS; i++) { + if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1)) + break; + } + penc = LP_MASK(i+1) >> LP_SHIFT; + for (size = 0; size < MMU_PAGE_COUNT; size++) { - va = avpn << 23; + /* 4K pages are not represented by LP */ + if (size == MMU_PAGE_4K) + continue; - if (! (hpte_v & HPTE_V_LARGE)) { - unsigned long vpi, pteg; + /* valid entries have a shift value */ + if (!mmu_psize_defs[size].shift) + continue; + + if (penc == mmu_psize_defs[size].penc) + break; + } + } - pteg = slot / HPTES_PER_GROUP; - if (hpte_v & HPTE_V_SECONDARY) - pteg = ~pteg; + /* + * FIXME, the code below works for 16M, 64K, and 4K pages as these + * fall under the p<=23 rules for calculating the virtual address. + * In the case of 16M pages, an extra bit is stolen from the AVPN + * field to achieve the requisite 24 bits. + * + * Does not work for 16G pages or 1 TB segments. + */ + shift = mmu_psize_defs[size].shift; + if (mmu_psize_defs[size].avpnm) + avpnm_bits = __ilog2_u64(mmu_psize_defs[size].avpnm) + 1; + else + avpnm_bits = 0; + if (shift - avpnm_bits <= 23) { + avpn = HPTE_V_AVPN_VAL(hpte_v) << 23; - vpi = ((va >> 28) ^ pteg) & htab_hash_mask; + if (shift < 23) { + unsigned long vpi, pteg; - va |= vpi << PAGE_SHIFT; + pteg = slot / HPTES_PER_GROUP; + if (hpte_v & HPTE_V_SECONDARY) + pteg = ~pteg; + vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask; + avpn |= (vpi << mmu_psize_defs[size].shift); + } } - return va; + *va = avpn; + *psize = size; } /* @@ -374,15 +414,14 @@ static unsigned long slot2va(unsigned long hpte_v, unsigned long slot) * * TODO: add batching support when enabled. remember, no dynamic memory here, * athough there is the control page available... - * - * XXX FIXME: 4k only for now ! */ static void native_hpte_clear(void) { unsigned long slot, slots, flags; hpte_t *hptep = htab_address; - unsigned long hpte_v; + unsigned long hpte_v, va; unsigned long pteg_count; + int psize; pteg_count = htab_hash_mask + 1; @@ -408,8 +447,9 @@ static void native_hpte_clear(void) * already hold the native_tlbie_lock. */ if (hpte_v & HPTE_V_VALID) { + hpte_decode(hptep, slot, &psize, &va); hptep->v = 0; - __tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K); + __tlbie(va, psize); } } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 49618461defb..028ba4ed03d2 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -51,6 +51,7 @@ #include <asm/cputable.h> #include <asm/abs_addr.h> #include <asm/sections.h> +#include <asm/spu.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -103,7 +104,7 @@ int mmu_ci_restrictions; #ifdef CONFIG_DEBUG_PAGEALLOC static u8 *linear_map_hash_slots; static unsigned long linear_map_hash_count; -static spinlock_t linear_map_hash_lock; +static DEFINE_SPINLOCK(linear_map_hash_lock); #endif /* CONFIG_DEBUG_PAGEALLOC */ /* There are definitions of page sizes arrays to be used when none @@ -419,7 +420,7 @@ static void __init htab_finish_init(void) extern unsigned int *htab_call_hpte_remove; extern unsigned int *htab_call_hpte_updatepp; -#ifdef CONFIG_PPC_64K_PAGES +#ifdef CONFIG_PPC_HAS_HASH_64K extern unsigned int *ht64_call_hpte_insert1; extern unsigned int *ht64_call_hpte_insert2; extern unsigned int *ht64_call_hpte_remove; @@ -596,22 +597,23 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) * Demote a segment to using 4k pages. * For now this makes the whole process use 4k pages. */ -void demote_segment_4k(struct mm_struct *mm, unsigned long addr) -{ #ifdef CONFIG_PPC_64K_PAGES +static void demote_segment_4k(struct mm_struct *mm, unsigned long addr) +{ if (mm->context.user_psize == MMU_PAGE_4K) return; +#ifdef CONFIG_PPC_MM_SLICES + slice_set_user_psize(mm, MMU_PAGE_4K); +#else /* CONFIG_PPC_MM_SLICES */ mm->context.user_psize = MMU_PAGE_4K; mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[MMU_PAGE_4K].sllp; - get_paca()->context = mm->context; - slb_flush_and_rebolt(); +#endif /* CONFIG_PPC_MM_SLICES */ + #ifdef CONFIG_SPE_BASE spu_flush_all_slbs(mm); #endif -#endif } - -EXPORT_SYMBOL_GPL(demote_segment_4k); +#endif /* CONFIG_PPC_64K_PAGES */ /* Result code is: * 0 - handled @@ -646,7 +648,11 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) return 1; } vsid = get_vsid(mm->context.id, ea); +#ifdef CONFIG_PPC_MM_SLICES + psize = get_slice_psize(mm, ea); +#else psize = mm->context.user_psize; +#endif break; case VMALLOC_REGION_ID: mm = &init_mm; @@ -674,11 +680,22 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) local = 1; +#ifdef CONFIG_HUGETLB_PAGE /* Handle hugepage regions */ - if (unlikely(in_hugepage_area(mm->context, ea))) { + if (HPAGE_SHIFT && psize == mmu_huge_psize) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } +#endif /* CONFIG_HUGETLB_PAGE */ + +#ifndef CONFIG_PPC_64K_PAGES + /* If we use 4K pages and our psize is not 4K, then we are hitting + * a special driver mapping, we need to align the address before + * we fetch the PTE + */ + if (psize != MMU_PAGE_4K) + ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); +#endif /* CONFIG_PPC_64K_PAGES */ /* Get PTE and page size from page tables */ ptep = find_linux_pte(pgdir, ea); @@ -702,54 +719,56 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } /* Do actual hashing */ -#ifndef CONFIG_PPC_64K_PAGES - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); -#else +#ifdef CONFIG_PPC_64K_PAGES /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ if (pte_val(*ptep) & _PAGE_4K_PFN) { demote_segment_4k(mm, ea); psize = MMU_PAGE_4K; } - if (mmu_ci_restrictions) { - /* If this PTE is non-cacheable, switch to 4k */ - if (psize == MMU_PAGE_64K && - (pte_val(*ptep) & _PAGE_NO_CACHE)) { - if (user_region) { - demote_segment_4k(mm, ea); - psize = MMU_PAGE_4K; - } else if (ea < VMALLOC_END) { - /* - * some driver did a non-cacheable mapping - * in vmalloc space, so switch vmalloc - * to 4k pages - */ - printk(KERN_ALERT "Reducing vmalloc segment " - "to 4kB pages because of " - "non-cacheable mapping\n"); - psize = mmu_vmalloc_psize = MMU_PAGE_4K; - } + /* If this PTE is non-cacheable and we have restrictions on + * using non cacheable large pages, then we switch to 4k + */ + if (mmu_ci_restrictions && psize == MMU_PAGE_64K && + (pte_val(*ptep) & _PAGE_NO_CACHE)) { + if (user_region) { + demote_segment_4k(mm, ea); + psize = MMU_PAGE_4K; + } else if (ea < VMALLOC_END) { + /* + * some driver did a non-cacheable mapping + * in vmalloc space, so switch vmalloc + * to 4k pages + */ + printk(KERN_ALERT "Reducing vmalloc segment " + "to 4kB pages because of " + "non-cacheable mapping\n"); + psize = mmu_vmalloc_psize = MMU_PAGE_4K; #ifdef CONFIG_SPE_BASE spu_flush_all_slbs(mm); #endif } - if (user_region) { - if (psize != get_paca()->context.user_psize) { - get_paca()->context = mm->context; - slb_flush_and_rebolt(); - } - } else if (get_paca()->vmalloc_sllp != - mmu_psize_defs[mmu_vmalloc_psize].sllp) { - get_paca()->vmalloc_sllp = - mmu_psize_defs[mmu_vmalloc_psize].sllp; + } + if (user_region) { + if (psize != get_paca()->context.user_psize) { + get_paca()->context.user_psize = + mm->context.user_psize; slb_flush_and_rebolt(); } + } else if (get_paca()->vmalloc_sllp != + mmu_psize_defs[mmu_vmalloc_psize].sllp) { + get_paca()->vmalloc_sllp = + mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_flush_and_rebolt(); } +#endif /* CONFIG_PPC_64K_PAGES */ + +#ifdef CONFIG_PPC_HAS_HASH_64K if (psize == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); else +#endif /* CONFIG_PPC_HAS_HASH_64K */ rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); -#endif /* CONFIG_PPC_64K_PAGES */ #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); @@ -772,42 +791,55 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long flags; int local = 0; - /* We don't want huge pages prefaulted for now - */ - if (unlikely(in_hugepage_area(mm->context, ea))) + BUG_ON(REGION_ID(ea) != USER_REGION_ID); + +#ifdef CONFIG_PPC_MM_SLICES + /* We only prefault standard pages for now */ + if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)); return; +#endif DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," " trap=%lx\n", mm, mm->pgd, ea, access, trap); - /* Get PTE, VSID, access mask */ + /* Get Linux PTE if available */ pgdir = mm->pgd; if (pgdir == NULL) return; ptep = find_linux_pte(pgdir, ea); if (!ptep) return; + +#ifdef CONFIG_PPC_64K_PAGES + /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on + * a 64K kernel), then we don't preload, hash_page() will take + * care of it once we actually try to access the page. + * That way we don't have to duplicate all of the logic for segment + * page size demotion here + */ + if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) + return; +#endif /* CONFIG_PPC_64K_PAGES */ + + /* Get VSID */ vsid = get_vsid(mm->context.id, ea); - /* Hash it in */ + /* Hash doesn't like irqs */ local_irq_save(flags); + + /* Is that local to this CPU ? */ mask = cpumask_of_cpu(smp_processor_id()); if (cpus_equal(mm->cpu_vm_mask, mask)) local = 1; -#ifndef CONFIG_PPC_64K_PAGES - __hash_page_4K(ea, access, vsid, ptep, trap, local); -#else - if (mmu_ci_restrictions) { - /* If this PTE is non-cacheable, switch to 4k */ - if (mm->context.user_psize == MMU_PAGE_64K && - (pte_val(*ptep) & _PAGE_NO_CACHE)) - demote_segment_4k(mm, ea); - } + + /* Hash it in */ +#ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) __hash_page_64K(ea, access, vsid, ptep, trap, local); else - __hash_page_4K(ea, access, vsid, ptep, trap, local); #endif /* CONFIG_PPC_64K_PAGES */ + __hash_page_4K(ea, access, vsid, ptep, trap, local); + local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1f07f70ac89f..92a1b16fb7e3 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -12,7 +12,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/sysctl.h> @@ -92,7 +91,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pgd_t *pg; pud_t *pu; - BUG_ON(! in_hugepage_area(mm->context, addr)); + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); addr &= HPAGE_MASK; @@ -120,7 +119,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) pud_t *pu; hugepd_t *hpdp = NULL; - BUG_ON(! in_hugepage_area(mm->context, addr)); + BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); addr &= HPAGE_MASK; @@ -303,7 +302,7 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, start = addr; pgd = pgd_offset((*tlb)->mm, addr); do { - BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); + BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; @@ -332,203 +331,13 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(old); } -struct slb_flush_info { - struct mm_struct *mm; - u16 newareas; -}; - -static void flush_low_segments(void *parm) -{ - struct slb_flush_info *fi = parm; - unsigned long i; - - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); - - if (current->active_mm != fi->mm) - return; - - /* Only need to do anything if this CPU is working in the same - * mm as the one which has changed */ - - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; - - asm volatile("isync" : : : "memory"); - for (i = 0; i < NUM_LOW_AREAS; i++) { - if (! (fi->newareas & (1U << i))) - continue; - asm volatile("slbie %0" - : : "r" ((i << SID_SHIFT) | SLBIE_C)); - } - asm volatile("isync" : : : "memory"); -} - -static void flush_high_segments(void *parm) -{ - struct slb_flush_info *fi = parm; - unsigned long i, j; - - - BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); - - if (current->active_mm != fi->mm) - return; - - /* Only need to do anything if this CPU is working in the same - * mm as the one which has changed */ - - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; - - asm volatile("isync" : : : "memory"); - for (i = 0; i < NUM_HIGH_AREAS; i++) { - if (! (fi->newareas & (1U << i))) - continue; - for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) - asm volatile("slbie %0" - :: "r" (((i << HTLB_AREA_SHIFT) - + (j << SID_SHIFT)) | SLBIE_C)); - } - asm volatile("isync" : : : "memory"); -} - -static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << SID_SHIFT; - unsigned long end = (area+1) << SID_SHIFT; - struct vm_area_struct *vma; - - BUG_ON(area >= NUM_LOW_AREAS); - - /* Check no VMAs are in the region */ - vma = find_vma(mm, start); - if (vma && (vma->vm_start < end)) - return -EBUSY; - - return 0; -} - -static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << HTLB_AREA_SHIFT; - unsigned long end = (area+1) << HTLB_AREA_SHIFT; - struct vm_area_struct *vma; - - BUG_ON(area >= NUM_HIGH_AREAS); - - /* Hack, so that each addresses is controlled by exactly one - * of the high or low area bitmaps, the first high area starts - * at 4GB, not 0 */ - if (start == 0) - start = 0x100000000UL; - - /* Check no VMAs are in the region */ - vma = find_vma(mm, start); - if (vma && (vma->vm_start < end)) - return -EBUSY; - - return 0; -} - -static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) -{ - unsigned long i; - struct slb_flush_info fi; - - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); - BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); - - newareas &= ~(mm->context.low_htlb_areas); - if (! newareas) - return 0; /* The segments we want are already open */ - - for (i = 0; i < NUM_LOW_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_low_area_for_htlb(mm, i) != 0) - return -EBUSY; - - mm->context.low_htlb_areas |= newareas; - - /* the context change must make it to memory before the flush, - * so that further SLB misses do the right thing. */ - mb(); - - fi.mm = mm; - fi.newareas = newareas; - on_each_cpu(flush_low_segments, &fi, 0, 1); - - return 0; -} - -static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) -{ - struct slb_flush_info fi; - unsigned long i; - - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); - BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) - != NUM_HIGH_AREAS); - - newareas &= ~(mm->context.high_htlb_areas); - if (! newareas) - return 0; /* The areas we want are already open */ - - for (i = 0; i < NUM_HIGH_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_high_area_for_htlb(mm, i) != 0) - return -EBUSY; - - mm->context.high_htlb_areas |= newareas; - - /* the context change must make it to memory before the flush, - * so that further SLB misses do the right thing. */ - mb(); - - fi.mm = mm; - fi.newareas = newareas; - on_each_cpu(flush_high_segments, &fi, 0, 1); - - return 0; -} - -int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) -{ - int err = 0; - - if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) - return -EINVAL; - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - - if (addr < 0x100000000UL) - err = open_low_hpage_areas(current->mm, - LOW_ESID_MASK(addr, len)); - if ((addr + len) > 0x100000000UL) - err = open_high_hpage_areas(current->mm, - HTLB_AREA_MASK(addr, len)); -#ifdef CONFIG_SPE_BASE - spu_flush_all_slbs(current->mm); -#endif - if (err) { - printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" - " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", - addr, len, - LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); - return err; - } - - return 0; -} - struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { pte_t *ptep; struct page *page; - if (! in_hugepage_area(mm->context, address)) + if (get_slice_psize(mm, address) != mmu_huge_psize) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, address); @@ -552,359 +361,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -/* Because we have an exclusive hugepage region which lies within the - * normal user address space, we have to take special measures to make - * non-huge mmap()s evade the hugepage reserved regions. */ -unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - - if (len > TASK_SIZE) - return -ENOMEM; - - /* handle fixed mapping: prevent overlap with huge pages */ - if (flags & MAP_FIXED) { - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (((TASK_SIZE - len) >= addr) - && (!vma || (addr+len) <= vma->vm_start) - && !is_hugepage_only_range(mm, addr,len)) - return addr; - } - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - vma = find_vma(mm, addr); - while (TASK_SIZE - len >= addr) { - BUG_ON(vma && (addr >= vma->vm_end)); - - if (touches_hugepage_low_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1<<SID_SHIFT); - vma = find_vma(mm, addr); - continue; - } - if (touches_hugepage_high_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); - vma = find_vma(mm, addr); - continue; - } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; - vma = vma->vm_next; - } - - /* Make sure we didn't miss any holes */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; -} - -/* - * This mmap-allocator allocates new areas top-down from below the - * stack's low limit (the base): - * - * Because we have an exclusive hugepage region which lies within the - * normal user address space, we have to take special measures to make - * non-huge mmap()s evade the hugepage reserved regions. - */ -unsigned long -arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, - const unsigned long len, const unsigned long pgoff, - const unsigned long flags) -{ - struct vm_area_struct *vma, *prev_vma; - struct mm_struct *mm = current->mm; - unsigned long base = mm->mmap_base, addr = addr0; - unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; - - /* requested length too big for entire address space */ - if (len > TASK_SIZE) - return -ENOMEM; - - /* handle fixed mapping: prevent overlap with huge pages */ - if (flags & MAP_FIXED) { - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - return addr; - } - - /* dont allow allocations above current base */ - if (mm->free_area_cache > base) - mm->free_area_cache = base; - - /* requesting a specific address */ - if (addr) { - addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start) - && !is_hugepage_only_range(mm, addr,len)) - return addr; - } - - if (len <= largest_hole) { - largest_hole = 0; - mm->free_area_cache = base; - } -try_again: - /* make sure it can fit in the remaining address space */ - if (mm->free_area_cache < len) - goto fail; - - /* either no address requested or cant fit in requested address hole */ - addr = (mm->free_area_cache - len) & PAGE_MASK; - do { -hugepage_recheck: - if (touches_hugepage_low_range(mm, addr, len)) { - addr = (addr & ((~0) << SID_SHIFT)) - len; - goto hugepage_recheck; - } else if (touches_hugepage_high_range(mm, addr, len)) { - addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; - goto hugepage_recheck; - } - - /* - * Lookup failure means no vma is above this address, - * i.e. return with success: - */ - if (!(vma = find_vma_prev(mm, addr, &prev_vma))) - return addr; - - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - if (addr+len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { - /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - return (mm->free_area_cache = addr); - } else { - /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } - } - - /* remember the largest hole we saw so far */ - if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - } while (len <= vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); - /* - * Restore the topdown base: - */ - mm->free_area_cache = base; - mm->cached_hole_size = ~0UL; - - return addr; -} - -static int htlb_check_hinted_area(unsigned long addr, unsigned long len) -{ - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || ((addr + len) <= vma->vm_start))) - return 0; - - return -ENOMEM; -} - -static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) -{ - unsigned long addr = 0; - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - while (addr + len <= 0x100000000UL) { - BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - - if (! __within_hugepage_low_range(addr, len, segmask)) { - addr = ALIGN(addr+1, 1<<SID_SHIFT); - vma = find_vma(current->mm, addr); - continue; - } - - if (!vma || (addr + len) <= vma->vm_start) - return addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Depending on segmask this might not be a confirmed - * hugepage region, so the ALIGN could have skipped - * some VMAs */ - vma = find_vma(current->mm, addr); - } - - return -ENOMEM; -} - -static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) -{ - unsigned long addr = 0x100000000UL; - struct vm_area_struct *vma; - - vma = find_vma(current->mm, addr); - while (addr + len <= TASK_SIZE_USER64) { - BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - - if (! __within_hugepage_high_range(addr, len, areamask)) { - addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); - vma = find_vma(current->mm, addr); - continue; - } - - if (!vma || (addr + len) <= vma->vm_start) - return addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Depending on segmask this might not be a confirmed - * hugepage region, so the ALIGN could have skipped - * some VMAs */ - vma = find_vma(current->mm, addr); - } - - return -ENOMEM; -} unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - int lastshift; - u16 areamask, curareas; - - if (HPAGE_SHIFT == 0) - return -EINVAL; - if (len & ~HPAGE_MASK) - return -EINVAL; - if (len > TASK_SIZE) - return -ENOMEM; - - if (!cpu_has_feature(CPU_FTR_16M_PAGE)) - return -EINVAL; - - /* Paranoia, caller should have dealt with this */ - BUG_ON((addr + len) < addr); - - /* Handle MAP_FIXED */ - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len, pgoff)) - return -EINVAL; - return addr; - } - - if (test_thread_flag(TIF_32BIT)) { - curareas = current->mm->context.low_htlb_areas; - - /* First see if we can use the hint address */ - if (addr && (htlb_check_hinted_area(addr, len) == 0)) { - areamask = LOW_ESID_MASK(addr, len); - if (open_low_hpage_areas(current->mm, areamask) == 0) - return addr; - } - - /* Next see if we can map in the existing low areas */ - addr = htlb_get_low_area(len, curareas); - if (addr != -ENOMEM) - return addr; - - /* Finally go looking for areas to open */ - lastshift = 0; - for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); - ! lastshift; areamask >>=1) { - if (areamask & 1) - lastshift = 1; - - addr = htlb_get_low_area(len, curareas | areamask); - if ((addr != -ENOMEM) - && open_low_hpage_areas(current->mm, areamask) == 0) - return addr; - } - } else { - curareas = current->mm->context.high_htlb_areas; - - /* First see if we can use the hint address */ - /* We discourage 64-bit processes from doing hugepage - * mappings below 4GB (must use MAP_FIXED) */ - if ((addr >= 0x100000000UL) - && (htlb_check_hinted_area(addr, len) == 0)) { - areamask = HTLB_AREA_MASK(addr, len); - if (open_high_hpage_areas(current->mm, areamask) == 0) - return addr; - } - - /* Next see if we can map in the existing high areas */ - addr = htlb_get_high_area(len, curareas); - if (addr != -ENOMEM) - return addr; - - /* Finally go looking for areas to open */ - lastshift = 0; - for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); - ! lastshift; areamask >>=1) { - if (areamask & 1) - lastshift = 1; - - addr = htlb_get_high_area(len, curareas | areamask); - if ((addr != -ENOMEM) - && open_high_hpage_areas(current->mm, areamask) == 0) - return addr; - } - } - printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" - " enough areas\n"); - return -ENOMEM; + return slice_get_unmapped_area(addr, len, flags, + mmu_huge_psize, 1, 0); } /* diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 4416d5140c53..7312a265545f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -146,21 +146,16 @@ static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) memset(addr, 0, kmem_cache_size(cache)); } -#ifdef CONFIG_PPC_64K_PAGES -static const unsigned int pgtable_cache_size[3] = { - PTE_TABLE_SIZE, PMD_TABLE_SIZE, PGD_TABLE_SIZE -}; -static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { - "pte_pmd_cache", "pmd_cache", "pgd_cache", -}; -#else static const unsigned int pgtable_cache_size[2] = { - PTE_TABLE_SIZE, PMD_TABLE_SIZE + PGD_TABLE_SIZE, PMD_TABLE_SIZE }; static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { - "pgd_pte_cache", "pud_pmd_cache", -}; +#ifdef CONFIG_PPC_64K_PAGES + "pgd_cache", "pmd_cache", +#else + "pgd_cache", "pud_pmd_cache", #endif /* CONFIG_PPC_64K_PAGES */ +}; #ifdef CONFIG_HUGETLB_PAGE /* Hugepages need one extra cache, initialized in hugetlbpage.c. We @@ -183,11 +178,8 @@ void pgtable_cache_init(void) "for size: %08x...\n", name, i, size); pgtable_cache[i] = kmem_cache_create(name, size, size, - 0, + SLAB_PANIC, zero_ctor, NULL); - if (! pgtable_cache[i]) - panic("pgtable_cache_init(): could not create %s!\n", - name); } } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c4bcd7546424..246eeea40ece 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -31,6 +31,7 @@ #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/pagemap.h> +#include <linux/suspend.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -80,7 +81,6 @@ int page_is_ram(unsigned long pfn) return 0; #endif } -EXPORT_SYMBOL(page_is_ram); pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) @@ -277,6 +277,28 @@ void __init do_init_bootmem(void) init_bootmem_done = 1; } +/* mark pages that don't exist as nosave */ +static int __init mark_nonram_nosave(void) +{ + unsigned long lmb_next_region_start_pfn, + lmb_region_max_pfn; + int i; + + for (i = 0; i < lmb.memory.cnt - 1; i++) { + lmb_region_max_pfn = + (lmb.memory.region[i].base >> PAGE_SHIFT) + + (lmb.memory.region[i].size >> PAGE_SHIFT); + lmb_next_region_start_pfn = + lmb.memory.region[i+1].base >> PAGE_SHIFT; + + if (lmb_region_max_pfn < lmb_next_region_start_pfn) + register_nosave_region(lmb_region_max_pfn, + lmb_next_region_start_pfn); + } + + return 0; +} + /* * paging_init() sets up the page tables - in fact we've already done this. */ @@ -308,6 +330,8 @@ void __init paging_init(void) max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; #endif free_area_init_nodes(max_zone_pfns); + + mark_nonram_nosave(); } #endif /* ! CONFIG_NEED_MULTIPLE_NODES */ diff --git a/arch/powerpc/mm/mmu_context_64.c b/arch/powerpc/mm/mmu_context_64.c index 90a06ac02d5e..7a78cdc0515a 100644 --- a/arch/powerpc/mm/mmu_context_64.c +++ b/arch/powerpc/mm/mmu_context_64.c @@ -28,6 +28,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { int index; int err; + int new_context = (mm->context.id == 0); again: if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) @@ -50,9 +51,18 @@ again: } mm->context.id = index; +#ifdef CONFIG_PPC_MM_SLICES + /* The old code would re-promote on fork, we don't do that + * when using slices as it could cause problem promoting slices + * that have been forced down to 4K + */ + if (new_context) + slice_set_user_psize(mm, mmu_virtual_psize); +#else mm->context.user_psize = mmu_virtual_psize; mm->context.sllp = SLB_VSID_USER | mmu_psize_defs[mmu_virtual_psize].sllp; +#endif return 0; } diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 9c4538bb04b0..2558c34eedaa 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -40,7 +40,8 @@ extern int __map_without_bats; extern unsigned long ioremap_base; extern unsigned int rtas_data, rtas_size; -extern PTE *Hash, *Hash_end; +struct _PTE; +extern struct _PTE *Hash, *Hash_end; extern unsigned long Hash_size, Hash_mask; extern unsigned int num_tlbcam_entries; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b3a592b25ab3..de45aa82d97b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -252,12 +252,15 @@ static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: numa_setup_cpu(lcpu); ret = NOTIFY_OK; break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: unmap_cpu_from_node(lcpu); break; ret = NOTIFY_OK; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index bca560374927..d8232b7a08f7 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -261,7 +261,7 @@ int map_page(unsigned long va, phys_addr_t pa, int flags) int err = -ENOMEM; /* Use upper 10 bits of VA to index the first level map */ - pd = pmd_offset(pgd_offset_k(va), va); + pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); /* Use middle 10 bits of VA to index the second-level map */ pg = pte_alloc_kernel(pd, va); if (pg != 0) { @@ -354,23 +354,27 @@ int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; int retval = 0; pgd = pgd_offset(mm, addr & PAGE_MASK); if (pgd) { - pmd = pmd_offset(pgd, addr & PAGE_MASK); - if (pmd_present(*pmd)) { - pte = pte_offset_map(pmd, addr & PAGE_MASK); - if (pte) { - retval = 1; - *ptep = pte; - if (pmdp) - *pmdp = pmd; - /* XXX caller needs to do pte_unmap, yuck */ - } - } + pud = pud_offset(pgd, addr & PAGE_MASK); + if (pud && pud_present(*pud)) { + pmd = pmd_offset(pud, addr & PAGE_MASK); + if (pmd_present(*pmd)) { + pte = pte_offset_map(pmd, addr & PAGE_MASK); + if (pte) { + retval = 1; + *ptep = pte; + if (pmdp) + *pmdp = pmd; + /* XXX caller needs to do pte_unmap, yuck */ + } + } + } } return(retval); } diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 05066674a7a0..ec1421a20aaa 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -185,7 +185,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, if (Hash == 0) return; - pmd = pmd_offset(pgd_offset(mm, ea), ea); + pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); if (!pmd_none(*pmd)) add_hash_page(mm->context.id, ea, pmd_val(*pmd)); } diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 224e960650a0..304375a73574 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -198,12 +198,6 @@ void slb_initialize(void) static int slb_encoding_inited; extern unsigned int *slb_miss_kernel_load_linear; extern unsigned int *slb_miss_kernel_load_io; -#ifdef CONFIG_HUGETLB_PAGE - extern unsigned int *slb_miss_user_load_huge; - unsigned long huge_llp; - - huge_llp = mmu_psize_defs[mmu_huge_psize].sllp; -#endif /* Prepare our SLB miss handler based on our page size */ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; @@ -220,11 +214,6 @@ void slb_initialize(void) DBG("SLB: linear LLP = %04x\n", linear_llp); DBG("SLB: io LLP = %04x\n", io_llp); -#ifdef CONFIG_HUGETLB_PAGE - patch_slb_encoding(slb_miss_user_load_huge, - SLB_VSID_USER | huge_llp); - DBG("SLB: huge LLP = %04x\n", huge_llp); -#endif } get_paca()->stab_rr = SLB_NUM_BOLTED; diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index b10e4707d7c1..cd1a93d4948c 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -82,31 +82,45 @@ _GLOBAL(slb_miss_kernel_load_io) srdi. r9,r10,USER_ESID_BITS bne- 8f /* invalid ea bits set */ - /* Figure out if the segment contains huge pages */ -#ifdef CONFIG_HUGETLB_PAGE -BEGIN_FTR_SECTION - b 1f -END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) + + /* when using slices, we extract the psize off the slice bitmaps + * and then we need to get the sllp encoding off the mmu_psize_defs + * array. + * + * XXX This is a bit inefficient especially for the normal case, + * so we should try to implement a fast path for the standard page + * size using the old sllp value so we avoid the array. We cannot + * really do dynamic patching unfortunately as processes might flip + * between 4k and 64k standard page size + */ +#ifdef CONFIG_PPC_MM_SLICES cmpldi r10,16 - lhz r9,PACALOWHTLBAREAS(r13) - mr r11,r10 + /* Get the slice index * 4 in r11 and matching slice size mask in r9 */ + ld r9,PACALOWSLICESPSIZE(r13) + sldi r11,r10,2 blt 5f + ld r9,PACAHIGHSLICEPSIZE(r13) + srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2) + andi. r11,r11,0x3c - lhz r9,PACAHIGHHTLBAREAS(r13) - srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT) - -5: srd r9,r9,r11 - andi. r9,r9,1 - beq 1f -_GLOBAL(slb_miss_user_load_huge) - li r11,0 - b 2f -1: -#endif /* CONFIG_HUGETLB_PAGE */ +5: /* Extract the psize and multiply to get an array offset */ + srd r9,r9,r11 + andi. r9,r9,0xf + mulli r9,r9,MMUPSIZEDEFSIZE + /* Now get to the array and obtain the sllp + */ + ld r11,PACATOC(r13) + ld r11,mmu_psize_defs@got(r11) + add r11,r11,r9 + ld r11,MMUPSIZESLLP(r11) + ori r11,r11,SLB_VSID_USER +#else + /* paca context sllp already contains the SLB_VSID_USER bits */ lhz r11,PACACONTEXTSLLP(r13) -2: +#endif /* CONFIG_PPC_MM_SLICES */ + ld r9,PACACONTEXTID(r13) rldimi r10,r9,USER_ESID_BITS,0 b slb_finish_load diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c new file mode 100644 index 000000000000..f833dba2a028 --- /dev/null +++ b/arch/powerpc/mm/slice.c @@ -0,0 +1,633 @@ +/* + * address space "slices" (meta-segments) support + * + * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation. + * + * Based on hugetlb implementation + * + * Copyright (C) 2003 David Gibson, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <asm/mman.h> +#include <asm/mmu.h> +#include <asm/spu.h> + +static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED; + + +#ifdef DEBUG +int _slice_debug = 1; + +static void slice_print_mask(const char *label, struct slice_mask mask) +{ + char *p, buf[16 + 3 + 16 + 1]; + int i; + + if (!_slice_debug) + return; + p = buf; + for (i = 0; i < SLICE_NUM_LOW; i++) + *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0'; + *(p++) = ' '; + *(p++) = '-'; + *(p++) = ' '; + for (i = 0; i < SLICE_NUM_HIGH; i++) + *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0'; + *(p++) = 0; + + printk(KERN_DEBUG "%s:%s\n", label, buf); +} + +#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0) + +#else + +static void slice_print_mask(const char *label, struct slice_mask mask) {} +#define slice_dbg(fmt...) + +#endif + +static struct slice_mask slice_range_to_mask(unsigned long start, + unsigned long len) +{ + unsigned long end = start + len - 1; + struct slice_mask ret = { 0, 0 }; + + if (start < SLICE_LOW_TOP) { + unsigned long mend = min(end, SLICE_LOW_TOP); + unsigned long mstart = min(start, SLICE_LOW_TOP); + + ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) + - (1u << GET_LOW_SLICE_INDEX(mstart)); + } + + if ((start + len) > SLICE_LOW_TOP) + ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1)) + - (1u << GET_HIGH_SLICE_INDEX(start)); + + return ret; +} + +static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + struct vm_area_struct *vma; + + if ((mm->task_size - len) < addr) + return 0; + vma = find_vma(mm, addr); + return (!vma || (addr + len) <= vma->vm_start); +} + +static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) +{ + return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT, + 1ul << SLICE_LOW_SHIFT); +} + +static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) +{ + unsigned long start = slice << SLICE_HIGH_SHIFT; + unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); + + /* Hack, so that each addresses is controlled by exactly one + * of the high or low area bitmaps, the first high area starts + * at 4GB, not 0 */ + if (start == 0) + start = SLICE_LOW_TOP; + + return !slice_area_is_free(mm, start, end - start); +} + +static struct slice_mask slice_mask_for_free(struct mm_struct *mm) +{ + struct slice_mask ret = { 0, 0 }; + unsigned long i; + + for (i = 0; i < SLICE_NUM_LOW; i++) + if (!slice_low_has_vma(mm, i)) + ret.low_slices |= 1u << i; + + if (mm->task_size <= SLICE_LOW_TOP) + return ret; + + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (!slice_high_has_vma(mm, i)) + ret.high_slices |= 1u << i; + + return ret; +} + +static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) +{ + struct slice_mask ret = { 0, 0 }; + unsigned long i; + u64 psizes; + + psizes = mm->context.low_slices_psize; + for (i = 0; i < SLICE_NUM_LOW; i++) + if (((psizes >> (i * 4)) & 0xf) == psize) + ret.low_slices |= 1u << i; + + psizes = mm->context.high_slices_psize; + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (((psizes >> (i * 4)) & 0xf) == psize) + ret.high_slices |= 1u << i; + + return ret; +} + +static int slice_check_fit(struct slice_mask mask, struct slice_mask available) +{ + return (mask.low_slices & available.low_slices) == mask.low_slices && + (mask.high_slices & available.high_slices) == mask.high_slices; +} + +static void slice_flush_segments(void *parm) +{ + struct mm_struct *mm = parm; + unsigned long flags; + + if (mm != current->active_mm) + return; + + /* update the paca copy of the context struct */ + get_paca()->context = current->active_mm->context; + + local_irq_save(flags); + slb_flush_and_rebolt(); + local_irq_restore(flags); +} + +static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) +{ + /* Write the new slice psize bits */ + u64 lpsizes, hpsizes; + unsigned long i, flags; + + slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); + slice_print_mask(" mask", mask); + + /* We need to use a spinlock here to protect against + * concurrent 64k -> 4k demotion ... + */ + spin_lock_irqsave(&slice_convert_lock, flags); + + lpsizes = mm->context.low_slices_psize; + for (i = 0; i < SLICE_NUM_LOW; i++) + if (mask.low_slices & (1u << i)) + lpsizes = (lpsizes & ~(0xful << (i * 4))) | + (((unsigned long)psize) << (i * 4)); + + hpsizes = mm->context.high_slices_psize; + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (mask.high_slices & (1u << i)) + hpsizes = (hpsizes & ~(0xful << (i * 4))) | + (((unsigned long)psize) << (i * 4)); + + mm->context.low_slices_psize = lpsizes; + mm->context.high_slices_psize = hpsizes; + + slice_dbg(" lsps=%lx, hsps=%lx\n", + mm->context.low_slices_psize, + mm->context.high_slices_psize); + + spin_unlock_irqrestore(&slice_convert_lock, flags); + mb(); + + /* XXX this is sub-optimal but will do for now */ + on_each_cpu(slice_flush_segments, mm, 0, 1); +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif +} + +static unsigned long slice_find_area_bottomup(struct mm_struct *mm, + unsigned long len, + struct slice_mask available, + int psize, int use_cache) +{ + struct vm_area_struct *vma; + unsigned long start_addr, addr; + struct slice_mask mask; + int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); + + if (use_cache) { + if (len <= mm->cached_hole_size) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } else + start_addr = addr = mm->free_area_cache; + } else + start_addr = addr = TASK_UNMAPPED_BASE; + +full_search: + for (;;) { + addr = _ALIGN_UP(addr, 1ul << pshift); + if ((TASK_SIZE - len) < addr) + break; + vma = find_vma(mm, addr); + BUG_ON(vma && (addr >= vma->vm_end)); + + mask = slice_range_to_mask(addr, len); + if (!slice_check_fit(mask, available)) { + if (addr < SLICE_LOW_TOP) + addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT); + else + addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); + continue; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Remember the place where we stopped the search: + */ + if (use_cache) + mm->free_area_cache = addr + len; + return addr; + } + if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + } + + /* Make sure we didn't miss any holes */ + if (use_cache && start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; +} + +static unsigned long slice_find_area_topdown(struct mm_struct *mm, + unsigned long len, + struct slice_mask available, + int psize, int use_cache) +{ + struct vm_area_struct *vma; + unsigned long addr; + struct slice_mask mask; + int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); + + /* check if free_area_cache is useful for us */ + if (use_cache) { + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested + * address hole + */ + addr = mm->free_area_cache; + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + addr = _ALIGN_DOWN(addr - len, 1ul << pshift); + mask = slice_range_to_mask(addr, len); + if (slice_check_fit(mask, available) && + slice_area_is_free(mm, addr, len)) + /* remember the address as a hint for + * next time + */ + return (mm->free_area_cache = addr); + } + } + + addr = mm->mmap_base; + while (addr > len) { + /* Go down by chunk size */ + addr = _ALIGN_DOWN(addr - len, 1ul << pshift); + + /* Check for hit with different page size */ + mask = slice_range_to_mask(addr, len); + if (!slice_check_fit(mask, available)) { + if (addr < SLICE_LOW_TOP) + addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT); + else if (addr < (1ul << SLICE_HIGH_SHIFT)) + addr = SLICE_LOW_TOP; + else + addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT); + continue; + } + + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (!vma || (addr + len) <= vma->vm_start) { + /* remember the address as a hint for next time */ + if (use_cache) + mm->free_area_cache = addr; + return addr; + } + + /* remember the largest hole we saw so far */ + if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start; + } + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + addr = slice_find_area_bottomup(mm, len, available, psize, 0); + + /* + * Restore the topdown base: + */ + if (use_cache) { + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + } + + return addr; +} + + +static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, + struct slice_mask mask, int psize, + int topdown, int use_cache) +{ + if (topdown) + return slice_find_area_topdown(mm, len, mask, psize, use_cache); + else + return slice_find_area_bottomup(mm, len, mask, psize, use_cache); +} + +unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, + unsigned long flags, unsigned int psize, + int topdown, int use_cache) +{ + struct slice_mask mask; + struct slice_mask good_mask; + struct slice_mask potential_mask = {0,0} /* silence stupid warning */; + int pmask_set = 0; + int fixed = (flags & MAP_FIXED); + int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); + struct mm_struct *mm = current->mm; + + /* Sanity checks */ + BUG_ON(mm->task_size == 0); + + slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); + slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n", + addr, len, flags, topdown, use_cache); + + if (len > mm->task_size) + return -ENOMEM; + if (fixed && (addr & ((1ul << pshift) - 1))) + return -EINVAL; + if (fixed && addr > (mm->task_size - len)) + return -EINVAL; + + /* If hint, make sure it matches our alignment restrictions */ + if (!fixed && addr) { + addr = _ALIGN_UP(addr, 1ul << pshift); + slice_dbg(" aligned addr=%lx\n", addr); + } + + /* First makeup a "good" mask of slices that have the right size + * already + */ + good_mask = slice_mask_for_size(mm, psize); + slice_print_mask(" good_mask", good_mask); + + /* First check hint if it's valid or if we have MAP_FIXED */ + if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) { + + /* Don't bother with hint if it overlaps a VMA */ + if (!fixed && !slice_area_is_free(mm, addr, len)) + goto search; + + /* Build a mask for the requested range */ + mask = slice_range_to_mask(addr, len); + slice_print_mask(" mask", mask); + + /* Check if we fit in the good mask. If we do, we just return, + * nothing else to do + */ + if (slice_check_fit(mask, good_mask)) { + slice_dbg(" fits good !\n"); + return addr; + } + + /* We don't fit in the good mask, check what other slices are + * empty and thus can be converted + */ + potential_mask = slice_mask_for_free(mm); + potential_mask.low_slices |= good_mask.low_slices; + potential_mask.high_slices |= good_mask.high_slices; + pmask_set = 1; + slice_print_mask(" potential", potential_mask); + if (slice_check_fit(mask, potential_mask)) { + slice_dbg(" fits potential !\n"); + goto convert; + } + } + + /* If we have MAP_FIXED and failed the above step, then error out */ + if (fixed) + return -EBUSY; + + search: + slice_dbg(" search...\n"); + + /* Now let's see if we can find something in the existing slices + * for that size + */ + addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache); + if (addr != -ENOMEM) { + /* Found within the good mask, we don't have to setup, + * we thus return directly + */ + slice_dbg(" found area at 0x%lx\n", addr); + return addr; + } + + /* Won't fit, check what can be converted */ + if (!pmask_set) { + potential_mask = slice_mask_for_free(mm); + potential_mask.low_slices |= good_mask.low_slices; + potential_mask.high_slices |= good_mask.high_slices; + pmask_set = 1; + slice_print_mask(" potential", potential_mask); + } + + /* Now let's see if we can find something in the existing slices + * for that size + */ + addr = slice_find_area(mm, len, potential_mask, psize, topdown, + use_cache); + if (addr == -ENOMEM) + return -ENOMEM; + + mask = slice_range_to_mask(addr, len); + slice_dbg(" found potential area at 0x%lx\n", addr); + slice_print_mask(" mask", mask); + + convert: + slice_convert(mm, mask, psize); + return addr; + +} +EXPORT_SYMBOL_GPL(slice_get_unmapped_area); + +unsigned long arch_get_unmapped_area(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + return slice_get_unmapped_area(addr, len, flags, + current->mm->context.user_psize, + 0, 1); +} + +unsigned long arch_get_unmapped_area_topdown(struct file *filp, + const unsigned long addr0, + const unsigned long len, + const unsigned long pgoff, + const unsigned long flags) +{ + return slice_get_unmapped_area(addr0, len, flags, + current->mm->context.user_psize, + 1, 1); +} + +unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +{ + u64 psizes; + int index; + + if (addr < SLICE_LOW_TOP) { + psizes = mm->context.low_slices_psize; + index = GET_LOW_SLICE_INDEX(addr); + } else { + psizes = mm->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); + } + + return (psizes >> (index * 4)) & 0xf; +} +EXPORT_SYMBOL_GPL(get_slice_psize); + +/* + * This is called by hash_page when it needs to do a lazy conversion of + * an address space from real 64K pages to combo 4K pages (typically + * when hitting a non cacheable mapping on a processor or hypervisor + * that won't allow them for 64K pages). + * + * This is also called in init_new_context() to change back the user + * psize from whatever the parent context had it set to + * + * This function will only change the content of the {low,high)_slice_psize + * masks, it will not flush SLBs as this shall be handled lazily by the + * caller. + */ +void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) +{ + unsigned long flags, lpsizes, hpsizes; + unsigned int old_psize; + int i; + + slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize); + + spin_lock_irqsave(&slice_convert_lock, flags); + + old_psize = mm->context.user_psize; + slice_dbg(" old_psize=%d\n", old_psize); + if (old_psize == psize) + goto bail; + + mm->context.user_psize = psize; + wmb(); + + lpsizes = mm->context.low_slices_psize; + for (i = 0; i < SLICE_NUM_LOW; i++) + if (((lpsizes >> (i * 4)) & 0xf) == old_psize) + lpsizes = (lpsizes & ~(0xful << (i * 4))) | + (((unsigned long)psize) << (i * 4)); + + hpsizes = mm->context.high_slices_psize; + for (i = 0; i < SLICE_NUM_HIGH; i++) + if (((hpsizes >> (i * 4)) & 0xf) == old_psize) + hpsizes = (hpsizes & ~(0xful << (i * 4))) | + (((unsigned long)psize) << (i * 4)); + + mm->context.low_slices_psize = lpsizes; + mm->context.high_slices_psize = hpsizes; + + slice_dbg(" lsps=%lx, hsps=%lx\n", + mm->context.low_slices_psize, + mm->context.high_slices_psize); + + bail: + spin_unlock_irqrestore(&slice_convert_lock, flags); +} + +/* + * is_hugepage_only_range() is used by generic code to verify wether + * a normal mmap mapping (non hugetlbfs) is valid on a given area. + * + * until the generic code provides a more generic hook and/or starts + * calling arch get_unmapped_area for MAP_FIXED (which our implementation + * here knows how to deal with), we hijack it to keep standard mappings + * away from us. + * + * because of that generic code limitation, MAP_FIXED mapping cannot + * "convert" back a slice with no VMAs to the standard page size, only + * get_unmapped_area() can. It would be possible to fix it here but I + * prefer working on fixing the generic code instead. + * + * WARNING: This will not work if hugetlbfs isn't enabled since the + * generic code will redefine that function as 0 in that. This is ok + * for now as we only use slices with hugetlbfs enabled. This should + * be fixed as the generic code gets fixed. + */ +int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + struct slice_mask mask, available; + + mask = slice_range_to_mask(addr, len); + available = slice_mask_for_size(mm, mm->context.user_psize); + +#if 0 /* too verbose */ + slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n", + mm, addr, len); + slice_print_mask(" mask", mask); + slice_print_mask(" available", available); +#endif + return !slice_check_fit(mask, available); +} + diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index eeeacab548e6..132c6bc66ce1 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -227,7 +227,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) * the first (bolted) segment, so that do_stab_bolted won't get a * recursive segment miss on the segment table itself. */ -void stabs_alloc(void) +void __init stabs_alloc(void) { int cpu; diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c index 925ff70be8ba..6a69417cbc0e 100644 --- a/arch/powerpc/mm/tlb_32.c +++ b/arch/powerpc/mm/tlb_32.c @@ -111,7 +111,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, if (start >= end) return; end = (end - 1) | ~PAGE_MASK; - pmd = pmd_offset(pgd_offset(mm, start), start); + pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); for (;;) { pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; if (pmd_end > end) @@ -169,7 +169,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) return; } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; - pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); + pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); FINISH_FLUSH; diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index fd8d08c325eb..2bfc4d7e1aa2 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -143,16 +143,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, */ addr &= PAGE_MASK; - /* Get page size (maybe move back to caller) */ + /* Get page size (maybe move back to caller). + * + * NOTE: when using special 64K mappings in 4K environment like + * for SPEs, we obtain the page size from the slice, which thus + * must still exist (and thus the VMA not reused) at the time + * of this call + */ if (huge) { #ifdef CONFIG_HUGETLB_PAGE psize = mmu_huge_psize; #else BUG(); - psize = pte_pagesize_index(pte); /* shutup gcc */ + psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ #endif } else - psize = pte_pagesize_index(pte); + psize = pte_pagesize_index(mm, addr, pte); /* Build full vaddr */ if (!is_kernel_addr(addr)) { |