diff options
Diffstat (limited to 'arch/powerpc')
31 files changed, 248 insertions, 237 deletions
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index c2b1c4404683..e4bfb1101c0e 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -120,6 +120,5 @@ CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 53687c3a70c4..7c6baf6df139 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1123,7 +1123,6 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index 5b0177733994..9d85dc2c5b86 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -13,6 +13,10 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); +extern void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte); + static inline int hstate_get_psize(struct hstate *hstate) { unsigned long shift; @@ -32,8 +36,8 @@ static inline int hstate_get_psize(struct hstate *hstate) } } -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE_RUNTIME_ALLOCATION +static inline bool gigantic_page_runtime_allocation_supported(void) { return true; } @@ -42,4 +46,12 @@ static inline bool gigantic_page_supported(void) /* hugepd entry valid bit */ #define HUGEPD_VAL_BITS (0x8000000000000000UL) +#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start +extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + +#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit +extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t new_pte); #endif diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 404e0f48f3f3..5043f846def1 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1306,6 +1306,24 @@ static inline int pud_pfn(pud_t pud) BUILD_BUG(); return 0; } +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *); +void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long, + pte_t *, pte_t, pte_t); + +/* + * Returns true for a R -> RW upgrade of pte + */ +static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val) +{ + if (!(old_val & _PAGE_READ)) + return false; + + if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE)) + return true; + + return false; +} #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 7d1a3d1543fc..5ab134eeed20 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -127,6 +127,10 @@ extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep pte_t entry, unsigned long address, int psize); +extern void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte); + static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr, unsigned long set) { diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index aee4fcc24990..77fc21278fa2 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -10,6 +10,7 @@ #include <linux/pci.h> #include <linux/list.h> #include <linux/ioport.h> +#include <linux/numa.h> struct device_node; @@ -265,7 +266,7 @@ extern int pcibios_map_io_space(struct pci_bus *bus); #ifdef CONFIG_NUMA #define PHB_SET_NODE(PHB, NODE) ((PHB)->node = (NODE)) #else -#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = -1) +#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = NUMA_NO_NODE) #endif #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 913bfca09c4f..8c890c6557ed 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -11,6 +11,7 @@ #include <linux/export.h> #include <linux/memblock.h> #include <linux/sched/task.h> +#include <linux/numa.h> #include <asm/lppaca.h> #include <asm/paca.h> @@ -27,7 +28,7 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align, unsigned long limit, int cpu) { - unsigned long pa; + void *ptr; int nid; /* @@ -36,23 +37,21 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align, * which will put its paca in the right place. */ if (cpu == boot_cpuid) { - nid = -1; + nid = NUMA_NO_NODE; memblock_set_bottom_up(true); } else { nid = early_cpu_to_node(cpu); } - pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE); - if (!pa) { - pa = memblock_alloc_base(size, align, limit); - if (!pa) - panic("cannot allocate paca data"); - } + ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT, + limit, nid); + if (!ptr) + panic("cannot allocate paca data"); if (cpu == boot_cpuid) memblock_set_bottom_up(false); - return __va(pa); + return ptr; } #ifdef CONFIG_PPC_PSERIES @@ -118,7 +117,6 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) } s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu); - memset(s, 0, sizeof(*s)); s->persistent = cpu_to_be32(SLB_NUM_BOLTED); s->buffer_length = cpu_to_be32(sizeof(*s)); @@ -222,7 +220,6 @@ void __init allocate_paca(int cpu) paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES, limit, cpu); paca_ptrs[cpu] = paca; - memset(paca, 0, sizeof(struct paca_struct)); initialise_paca(paca, cpu); #ifdef CONFIG_PPC_PSERIES diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 88e4f69a09e5..4538e8ddde80 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -32,6 +32,7 @@ #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/vgaarb.h> +#include <linux/numa.h> #include <asm/processor.h> #include <asm/io.h> @@ -132,7 +133,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) int nid = of_node_to_nid(dev); if (nid < 0 || !node_online(nid)) - nid = -1; + nid = NUMA_NO_NODE; PHB_SET_NODE(phb, nid); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ce393df243aa..6a4b59d574c2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1298,16 +1298,6 @@ void show_user_instructions(struct pt_regs *regs) pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); - /* - * Make sure the NIP points at userspace, not kernel text/data or - * elsewhere. - */ - if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) { - pr_info("%s[%d]: Bad NIP, not dumping instructions.\n", - current->comm, current->pid); - return; - } - seq_buf_init(&s, buf, sizeof(buf)); while (n) { @@ -1318,7 +1308,7 @@ void show_user_instructions(struct pt_regs *regs) for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) { int instr; - if (probe_kernel_address((const void *)pc, instr)) { + if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) { seq_buf_printf(&s, "XXXXXXXX "); continue; } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index ca00fbb97cf8..82be48c123cf 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -459,8 +459,8 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); - cpu_to_phys_id = __va(memblock_phys_alloc(nr_cpu_ids * sizeof(u32), __alignof__(u32))); - memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32)); + cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32), + __alignof__(u32)); for_each_node_by_type(dn, "cpu") { const __be32 *intserv; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 236c1151a3a7..3dcd779aef44 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -634,19 +634,17 @@ __init u64 ppc64_bolted_size(void) static void *__init alloc_stack(unsigned long limit, int cpu) { - unsigned long pa; + void *ptr; BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16); - pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, - early_cpu_to_node(cpu), MEMBLOCK_NONE); - if (!pa) { - pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); - if (!pa) - panic("cannot allocate stacks"); - } + ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE, + MEMBLOCK_LOW_LIMIT, limit, + early_cpu_to_node(cpu)); + if (!ptr) + panic("cannot allocate stacks"); - return __va(pa); + return ptr; } void __init irqstack_early_init(void) @@ -739,20 +737,17 @@ void __init emergency_stack_init(void) struct thread_info *ti; ti = alloc_stack(limit, i); - memset(ti, 0, THREAD_SIZE); emerg_stack_init_thread_info(ti, i); paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ ti = alloc_stack(limit, i); - memset(ti, 0, THREAD_SIZE); emerg_stack_init_thread_info(ti, i); paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE; /* emergency stack for machine check exception handling. */ ti = alloc_stack(limit, i); - memset(ti, 0, THREAD_SIZE); emerg_stack_init_thread_info(ti, i); paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE; #endif @@ -933,8 +928,9 @@ static void __ref init_fallback_flush(void) * hardware prefetch runoff. We don't have a recipe for load patterns to * reliably avoid the prefetcher. */ - l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); - memset(l1d_flush_fallback_area, 0, l1d_size * 2); + l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2, + l1d_size, MEMBLOCK_LOW_LIMIT, + limit, NUMA_NO_NODE); for_each_possible_cpu(cpu) { struct paca_struct *paca = paca_ptrs[cpu]; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 7725a9714736..a31b6234fcd7 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -798,7 +798,6 @@ static int __init vdso_init(void) BUG_ON(vdso32_pagelist == NULL); for (i = 0; i < vdso32_pages; i++) { struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - ClearPageReserved(pg); get_page(pg); vdso32_pagelist[i] = pg; } @@ -812,7 +811,6 @@ static int __init vdso_init(void) BUG_ON(vdso64_pagelist == NULL); for (i = 0; i < vdso64_pages; i++) { struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - ClearPageReserved(pg); get_page(pg); vdso64_pagelist[i] = pg; } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 887f11bcf330..ec74305fa330 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -276,12 +276,8 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && access_ok(nip, sizeof(*nip))) { unsigned int inst; - int res; - pagefault_disable(); - res = __get_user_inatomic(inst, nip); - pagefault_enable(); - if (!res) + if (!probe_user_read(&inst, nip, sizeof(inst))) return !store_updates_sp(inst); *must_retry = true; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 4aa0797000f7..3d4b2399192f 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -908,9 +908,9 @@ static void __init htab_initialize(void) #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled()) { linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = __va(memblock_alloc_base( - linear_map_hash_count, 1, ppc64_rma_size)); - memset(linear_map_hash_slots, 0, linear_map_hash_count); + linear_map_hash_slots = memblock_alloc_try_nid( + linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); } #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 2e6a8f9345d3..367ce3a4a503 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -121,3 +121,28 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } + +pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + unsigned long pte_val; + /* + * Clear the _PAGE_PRESENT so that no hardware parallel update is + * possible. Also keep the pte_present true so that we don't take + * wrong fault. + */ + pte_val = pte_update(vma->vm_mm, addr, ptep, + _PAGE_PRESENT, _PAGE_INVALID, 1); + + return __pte(pte_val); +} + +void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte) +{ + + if (radix_enabled()) + return radix__huge_ptep_modify_prot_commit(vma, addr, ptep, + old_pte, pte); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c index 2486bee0f93e..11d9ea28a816 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/hugetlbpage-radix.c @@ -90,3 +90,20 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return vm_unmapped_area(&info); } + +void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + struct mm_struct *mm = vma->vm_mm; + + /* + * To avoid NMMU hang while relaxing access we need to flush the tlb before + * we set the new value. + */ + if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + (atomic_read(&mm->context.copros) > 0)) + radix__flush_hugetlb_page(vma, addr); + + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); +} diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index a712a650a8b6..74d43f522dc2 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -21,6 +21,7 @@ #include <linux/sizes.h> #include <asm/mmu_context.h> #include <asm/pte-walk.h> +#include <linux/mm_inline.h> static DEFINE_MUTEX(mem_list_mutex); @@ -34,8 +35,18 @@ struct mm_iommu_table_group_mem_t { atomic64_t mapped; unsigned int pageshift; u64 ua; /* userspace address */ - u64 entries; /* number of entries in hpas[] */ - u64 *hpas; /* vmalloc'ed */ + u64 entries; /* number of entries in hpas/hpages[] */ + /* + * in mm_iommu_get we temporarily use this to store + * struct page address. + * + * We need to convert ua to hpa in real mode. Make it + * simpler by storing physical address. + */ + union { + struct page **hpages; /* vmalloc'ed */ + phys_addr_t *hpas; + }; #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) u64 dev_hpa; /* Device memory base address */ }; @@ -80,64 +91,13 @@ bool mm_iommu_preregistered(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(mm_iommu_preregistered); -/* - * Taken from alloc_migrate_target with changes to remove CMA allocations - */ -struct page *new_iommu_non_cma_page(struct page *page, unsigned long private) -{ - gfp_t gfp_mask = GFP_USER; - struct page *new_page; - - if (PageCompound(page)) - return NULL; - - if (PageHighMem(page)) - gfp_mask |= __GFP_HIGHMEM; - - /* - * We don't want the allocation to force an OOM if possibe - */ - new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN); - return new_page; -} - -static int mm_iommu_move_page_from_cma(struct page *page) -{ - int ret = 0; - LIST_HEAD(cma_migrate_pages); - - /* Ignore huge pages for now */ - if (PageCompound(page)) - return -EBUSY; - - lru_add_drain(); - ret = isolate_lru_page(page); - if (ret) - return ret; - - list_add(&page->lru, &cma_migrate_pages); - put_page(page); /* Drop the gup reference */ - - ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page, - NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE); - if (ret) { - if (!list_empty(&cma_migrate_pages)) - putback_movable_pages(&cma_migrate_pages); - } - - return 0; -} - static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, - unsigned long entries, unsigned long dev_hpa, - struct mm_iommu_table_group_mem_t **pmem) + unsigned long entries, unsigned long dev_hpa, + struct mm_iommu_table_group_mem_t **pmem) { struct mm_iommu_table_group_mem_t *mem; - long i, j, ret = 0, locked_entries = 0; + long i, ret = 0, locked_entries = 0; unsigned int pageshift; - unsigned long flags; - unsigned long cur_ua; - struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -187,58 +147,40 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, goto unlock_exit; } + down_read(&mm->mmap_sem); + ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL); + up_read(&mm->mmap_sem); + if (ret != entries) { + /* free the reference taken */ + for (i = 0; i < ret; i++) + put_page(mem->hpages[i]); + + vfree(mem->hpas); + kfree(mem); + ret = -EFAULT; + goto unlock_exit; + } else { + ret = 0; + } + + pageshift = PAGE_SHIFT; for (i = 0; i < entries; ++i) { - cur_ua = ua + (i << PAGE_SHIFT); - if (1 != get_user_pages_fast(cur_ua, - 1/* pages */, 1/* iswrite */, &page)) { - ret = -EFAULT; - for (j = 0; j < i; ++j) - put_page(pfn_to_page(mem->hpas[j] >> - PAGE_SHIFT)); - vfree(mem->hpas); - kfree(mem); - goto unlock_exit; - } + struct page *page = mem->hpages[i]; + /* - * If we get a page from the CMA zone, since we are going to - * be pinning these entries, we might as well move them out - * of the CMA zone if possible. NOTE: faulting in + migration - * can be expensive. Batching can be considered later + * Allow to use larger than 64k IOMMU pages. Only do that + * if we are backed by hugetlb. */ - if (is_migrate_cma_page(page)) { - if (mm_iommu_move_page_from_cma(page)) - goto populate; - if (1 != get_user_pages_fast(cur_ua, - 1/* pages */, 1/* iswrite */, - &page)) { - ret = -EFAULT; - for (j = 0; j < i; ++j) - put_page(pfn_to_page(mem->hpas[j] >> - PAGE_SHIFT)); - vfree(mem->hpas); - kfree(mem); - goto unlock_exit; - } - } -populate: - pageshift = PAGE_SHIFT; - if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) { - pte_t *pte; + if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) { struct page *head = compound_head(page); - unsigned int compshift = compound_order(head); - unsigned int pteshift; - - local_irq_save(flags); /* disables as well */ - pte = find_linux_pte(mm->pgd, cur_ua, NULL, &pteshift); - - /* Double check it is still the same pinned page */ - if (pte && pte_page(*pte) == head && - pteshift == compshift + PAGE_SHIFT) - pageshift = max_t(unsigned int, pteshift, - PAGE_SHIFT); - local_irq_restore(flags); + + pageshift = compound_order(head) + PAGE_SHIFT; } mem->pageshift = min(mem->pageshift, pageshift); + /* + * We don't need struct page reference any more, switch + * to physical address. + */ mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b5d1c45c1475..ac49e4158e50 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -84,7 +84,7 @@ static void __init setup_node_to_cpumask_map(void) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); /* cpumask_of_node() will now work */ - dbg("Node to cpumask map for %d nodes\n", nr_node_ids); + dbg("Node to cpumask map for %u nodes\n", nr_node_ids); } static int __init fake_numa_create_new_node(unsigned long end_pfn, @@ -215,7 +215,7 @@ static void initialize_distance_lookup_table(int nid, */ static int associativity_to_nid(const __be32 *associativity) { - int nid = -1; + int nid = NUMA_NO_NODE; if (min_common_depth == -1) goto out; @@ -225,7 +225,7 @@ static int associativity_to_nid(const __be32 *associativity) /* POWER4 LPAR uses 0xffff as invalid node */ if (nid == 0xffff || nid >= MAX_NUMNODES) - nid = -1; + nid = NUMA_NO_NODE; if (nid > 0 && of_read_number(associativity, 1) >= distance_ref_points_depth) { @@ -244,7 +244,7 @@ out: */ static int of_node_to_nid_single(struct device_node *device) { - int nid = -1; + int nid = NUMA_NO_NODE; const __be32 *tmp; tmp = of_get_associativity(device); @@ -256,7 +256,7 @@ static int of_node_to_nid_single(struct device_node *device) /* Walk the device tree upwards, looking for an associativity id */ int of_node_to_nid(struct device_node *device) { - int nid = -1; + int nid = NUMA_NO_NODE; of_node_get(device); while (device) { @@ -454,7 +454,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) */ static int numa_setup_cpu(unsigned long lcpu) { - int nid = -1; + int nid = NUMA_NO_NODE; struct device_node *cpu; /* @@ -930,7 +930,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) { struct drmem_lmb *lmb; unsigned long lmb_size; - int nid = -1; + int nid = NUMA_NO_NODE; lmb_size = drmem_lmb_size(); @@ -960,7 +960,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr) static int hot_add_node_scn_to_nid(unsigned long scn_addr) { struct device_node *memory; - int nid = -1; + int nid = NUMA_NO_NODE; for_each_node_by_type(memory, "memory") { unsigned long start, size; diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c index e0ccf36714b2..53cbc7dc2df2 100644 --- a/arch/powerpc/mm/pgtable-book3e.c +++ b/arch/powerpc/mm/pgtable-book3e.c @@ -57,12 +57,8 @@ void vmemmap_remove_mapping(unsigned long start, static __ref void *early_alloc_pgtable(unsigned long size) { - void *pt; - - pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); - memset(pt, 0, size); - - return pt; + return memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT, + __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE); } /* diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index ecd31569a120..92a3e4c39540 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -195,11 +195,8 @@ void __init mmu_partition_table_init(void) unsigned long ptcr; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); - partition_tb = __va(memblock_alloc_base(patb_size, patb_size, - MEMBLOCK_ALLOC_ANYWHERE)); - /* Initialize the Partition Table with no entries */ - memset((void *)partition_tb, 0, patb_size); + partition_tb = memblock_alloc(patb_size, patb_size); /* * update partition table control register, @@ -401,6 +398,31 @@ void arch_report_meminfo(struct seq_file *m) } #endif /* CONFIG_PROC_FS */ +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) +{ + unsigned long pte_val; + + /* + * Clear the _PAGE_PRESENT so that no hardware parallel update is + * possible. Also keep the pte_present true so that we don't take + * wrong fault. + */ + pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0); + + return __pte(pte_val); + +} + +void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, pte_t old_pte, pte_t pte) +{ + if (radix_enabled()) + return radix__ptep_modify_prot_commit(vma, addr, + ptep, old_pte, pte); + set_pte_at(vma->vm_mm, addr, ptep, pte); +} + /* * For hash translation mode, we use the deposited table to store hash slot * information and they are stored at PTRS_PER_PMD offset from related pmd diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 931156069a81..e377684ac6ad 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -51,26 +51,15 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz static __ref void *early_alloc_pgtable(unsigned long size, int nid, unsigned long region_start, unsigned long region_end) { - unsigned long pa = 0; - void *pt; + phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT; + phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE; - if (region_start || region_end) /* has region hint */ - pa = memblock_alloc_range(size, size, region_start, region_end, - MEMBLOCK_NONE); - else if (nid != -1) /* has node hint */ - pa = memblock_alloc_base_nid(size, size, - MEMBLOCK_ALLOC_ANYWHERE, - nid, MEMBLOCK_NONE); + if (region_start) + min_addr = region_start; + if (region_end) + max_addr = region_end; - if (!pa) - pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE); - - BUG_ON(!pa); - - pt = __va(pa); - memset(pt, 0, size); - - return pt; + return memblock_alloc_try_nid(size, size, min_addr, max_addr, nid); } static int early_map_kernel_page(unsigned long ea, unsigned long pa, @@ -1063,3 +1052,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, } /* See ptesync comment in radix__set_pte_at */ } + +void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t old_pte, pte_t pte) +{ + struct mm_struct *mm = vma->vm_mm; + + /* + * To avoid NMMU hang while relaxing access we need to flush the tlb before + * we set the new value. We need to do this only for radix, because hash + * translation does flush when updating the linux pte. + */ + if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && + (atomic_read(&mm->context.copros) > 0)) + radix__flush_tlb_page(vma, addr); + + set_pte_at(mm, addr, ptep, pte); +} diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 3f4193201ee7..36a664f06c65 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -211,8 +211,7 @@ void __init MMU_init_hw(void) * Find some memory for the hash table. */ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(memblock_phys_alloc(Hash_size, Hash_size)); - memset(Hash, 0, Hash_size); + Hash = memblock_alloc(Hash_size, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size); diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 0af051a1974e..0680efb2237b 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -159,12 +159,8 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) ((unsigned long)ptr & 7)) return -EFAULT; - pagefault_disable(); - if (!__get_user_inatomic(*ret, ptr)) { - pagefault_enable(); + if (!probe_user_read(ret, ptr, sizeof(*ret))) return 0; - } - pagefault_enable(); return read_user_stack_slow(ptr, ret, 8); } @@ -175,12 +171,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) ((unsigned long)ptr & 3)) return -EFAULT; - pagefault_disable(); - if (!__get_user_inatomic(*ret, ptr)) { - pagefault_enable(); + if (!probe_user_read(ret, ptr, sizeof(*ret))) return 0; - } - pagefault_enable(); return read_user_stack_slow(ptr, ret, 4); } @@ -307,17 +299,11 @@ static inline int current_is_64bit(void) */ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) { - int rc; - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || ((unsigned long)ptr & 3)) return -EFAULT; - pagefault_disable(); - rc = __get_user_inatomic(*ret, ptr); - pagefault_enable(); - - return rc; + return probe_user_read(ret, ptr, sizeof(*ret)); } static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b0723002a396..4b64ddf0db68 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -416,7 +416,6 @@ static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) static __u64 power_pmu_bhrb_to(u64 addr) { unsigned int instr; - int ret; __u64 target; if (is_kernel_addr(addr)) { @@ -427,13 +426,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Userspace: need copy instruction here then translate it */ - pagefault_disable(); - ret = __get_user_inatomic(instr, (unsigned int __user *)addr); - if (ret) { - pagefault_enable(); + if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr))) return 0; - } - pagefault_enable(); target = branch_target(&instr); if ((!target) || (instr & BRANCH_ABSOLUTE)) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7c544607ba32..d07b753d6b20 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -325,7 +325,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA + select ARCH_HAS_GIGANTIC_PAGE_RUNTIME_ALLOCATION if (MEMORY_ISOLATION && COMPACTION) || CMA default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index f2971522fb4a..f62930f839ca 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -208,7 +208,9 @@ static int __init iob_init(struct device_node *dn) pr_debug(" -> %s\n", __func__); /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */ - iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); + iob_l2_base = memblock_alloc_try_nid_raw(1UL << 21, 1UL << 21, + MEMBLOCK_LOW_LIMIT, 0x80000000, + NUMA_NO_NODE); pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base); @@ -269,4 +271,3 @@ void __init iommu_init_early_pasemi(void) pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi; set_pci_dma_ops(&dma_iommu_ops); } - diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 84d038ed3882..248a38ad25c7 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -20,6 +20,7 @@ #include <linux/slab.h> #include <linux/memory.h> #include <linux/memory_hotplug.h> +#include <linux/numa.h> #include <asm/machdep.h> #include <asm/debugfs.h> @@ -223,7 +224,7 @@ static int memtrace_online(void) ent = &memtrace_array[i]; /* We have onlined this chunk previously */ - if (ent->nid == -1) + if (ent->nid == NUMA_NO_NODE) continue; /* Remove from io mappings */ @@ -257,7 +258,7 @@ static int memtrace_online(void) */ debugfs_remove_recursive(ent->dir); pr_info("Added trace memory back to node %d\n", ent->nid); - ent->size = ent->start = ent->nid = -1; + ent->size = ent->start = ent->nid = NUMA_NO_NODE; } if (ret) return ret; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d1072d464e99..0711e2455f72 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -170,8 +170,7 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node, /* * Allocate a buffer to hold the MC recoverable ranges. */ - mc_recoverable_range =__va(memblock_phys_alloc(size, __alignof__(u64))); - memset(mc_recoverable_range, 0, size); + mc_recoverable_range = memblock_alloc(size, __alignof__(u64)); for (i = 0; i < mc_recoverable_range_len; i++) { mc_recoverable_range[i].start_addr = diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 41f62ca27c63..e4f0dfd4ae33 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -130,8 +130,13 @@ static void __init fwnmi_init(void) * It will be used in real mode mce handler, hence it needs to be * below RMA. */ - mce_data_buf = __va(memblock_alloc_base(RTAS_ERROR_LOG_MAX * nr_cpus, - RTAS_ERROR_LOG_MAX, ppc64_rma_size)); + mce_data_buf = memblock_alloc_try_nid_raw(RTAS_ERROR_LOG_MAX * nr_cpus, + RTAS_ERROR_LOG_MAX, MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); + if (!mce_data_buf) + panic("Failed to allocate %d bytes below %pa for MCE buffer\n", + RTAS_ERROR_LOG_MAX * nr_cpus, &ppc64_rma_size); + for_each_possible_cpu(i) { paca_ptrs[i]->mce_data_buf = mce_data_buf + (RTAS_ERROR_LOG_MAX * i); @@ -140,8 +145,13 @@ static void __init fwnmi_init(void) #ifdef CONFIG_PPC_BOOK3S_64 /* Allocate per cpu slb area to save old slb contents during MCE */ size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus; - slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry), - ppc64_rma_size)); + slb_ptr = memblock_alloc_try_nid_raw(size, sizeof(struct slb_entry), + MEMBLOCK_LOW_LIMIT, ppc64_rma_size, + NUMA_NO_NODE); + if (!slb_ptr) + panic("Failed to allocate %zu bytes below %pa for slb area\n", + size, &ppc64_rma_size); + for_each_possible_cpu(i) paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i); #endif diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index a5b40d1460f1..25bc25fe0d93 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -251,8 +251,11 @@ static void allocate_dart(void) * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we * will blow up an entire large page anyway in the kernel mapping. */ - dart_tablebase = __va(memblock_alloc_base(1UL<<24, - 1UL<<24, 0x80000000L)); + dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M, + MEMBLOCK_LOW_LIMIT, SZ_2G, + NUMA_NO_NODE); + if (!dart_tablebase) + panic("Failed to allocate 16MB below 2GB for DART table\n"); /* There is no point scanning the DART space for leaks*/ kmemleak_no_scan((void *)dart_tablebase); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 918be816b097..c8a1b26489f5 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1068,13 +1068,11 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) addr += mfspr(SPRN_MCAR); if (is_in_pci_mem_space(addr)) { - if (user_mode(regs)) { - pagefault_disable(); - ret = get_user(inst, (__u32 __user *)regs->nip); - pagefault_enable(); - } else { + if (user_mode(regs)) + ret = probe_user_read(&inst, (void __user *)regs->nip, + sizeof(inst)); + else ret = probe_kernel_address((void *)regs->nip, inst); - } if (!ret && mcheck_handle_load(regs, inst)) { regs->nip += 4; |