From ea1754a084760e68886f5b725c8eaada9cc57155 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 1 Apr 2016 15:29:48 +0300 Subject: mm, fs: remove remaining PAGE_CACHE_* and page_cache_{get,release} usage Mostly direct substitution with occasional adjustment or removing outdated comments. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/mm.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index ed6407d1b7b5..ffcff53e3b2b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -623,7 +623,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, * * A page may belong to an inode's memory mapping. In this case, page->mapping * is the pointer to the inode, and page->index is the file offset of the page, - * in units of PAGE_CACHE_SIZE. + * in units of PAGE_SIZE. * * If pagecache pages are not associated with an inode, they are said to be * anonymous pages. These may become associated with the swapcache, and in that -- cgit v1.2.3 From c12d2da56d0e07d230968ee2305aaa86b93a6832 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Apr 2016 10:24:58 +0200 Subject: mm/gup: Remove the macro overload API migration helpers from the get_user*() APIs The pkeys changes brought about a truly hideous set of macros in: cde70140fed8 ("mm/gup: Overload get_user_pages() functions") ... which macros are (ab-)using the fact that __VA_ARGS__ can be used to shift parameter positions in macro arguments without breaking the build and so can be used to call separate C functions depending on the number of arguments of the macro. This allowed easy migration of these 3 GUP APIs, as both these variants worked at the C level: old: ret = get_user_pages(current, current->mm, address, 1, 1, 0, &page, NULL); new: ret = get_user_pages(address, 1, 1, 0, &page, NULL); ... while we also generated a (functionally harmless but noticeable) build time warning if the old API was used. As there are over 300 uses of these APIs, this trick eased the migration of the API and avoided excessive migration pain in linux-next. Now, with its work done, get rid of all of that complication and ugliness: 3 files changed, 16 insertions(+), 140 deletions(-) ... where the linecount of the migration hack was further inflated by the fact that there are NOMMU variants of these GUP APIs as well. Much of the conversion was done in linux-next over the past couple of months, and Linus recently removed all remaining old API uses from the upstream tree in the following upstrea commit: cb107161df3c ("Convert straggling drivers to new six-argument get_user_pages()") There was one more old-API usage in mm/gup.c, in the CONFIG_HAVE_GENERIC_RCU_GUP code path that ARM, ARM64 and PowerPC uses. After this commit any old API usage will break the build. [ Also fixed a PowerPC/HAVE_GENERIC_RCU_GUP warning reported by Stephen Rothwell. ] Cc: Andrew Morton Cc: Dave Hansen Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- include/linux/mm.h | 64 +++--------------------------------------------------- mm/gup.c | 52 ++++++-------------------------------------- mm/nommu.c | 44 ++++++------------------------------- 3 files changed, 17 insertions(+), 143 deletions(-) (limited to 'include/linux/mm.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index ed6407d1b7b5..d6508a025a31 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1250,78 +1250,20 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages6(unsigned long start, unsigned long nr_pages, +long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, +long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, unsigned int gup_flags); -long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); -/* suppress warnings from use in EXPORT_SYMBOL() */ -#ifndef __DISABLE_GUP_DEPRECATED -#define __gup_deprecated __deprecated -#else -#define __gup_deprecated -#endif -/* - * These macros provide backward-compatibility with the old - * get_user_pages() variants which took tsk/mm. These - * functions/macros provide both compile-time __deprecated so we - * can catch old-style use and not break the build. The actual - * functions also have WARN_ON()s to let us know at runtime if - * the get_user_pages() should have been the "remote" variant. - * - * These are hideous, but temporary. - * - * If you run into one of these __deprecated warnings, look - * at how you are calling get_user_pages(). If you are calling - * it with current/current->mm as the first two arguments, - * simply remove those arguments. The behavior will be the same - * as it is now. If you are calling it on another task, use - * get_user_pages_remote() instead. - * - * Any questions? Ask Dave Hansen - */ -long -__gup_deprecated -get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas); -#define GUP_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages, ...) \ - get_user_pages -#define get_user_pages(...) GUP_MACRO(__VA_ARGS__, \ - get_user_pages8, x, \ - get_user_pages6, x, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked); -#define GUPL_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages_locked, ...) \ - get_user_pages_locked -#define get_user_pages_locked(...) GUPL_MACRO(__VA_ARGS__, \ - get_user_pages_locked8, x, \ - get_user_pages_locked6, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages); -#define GUPU_MACRO(_1, _2, _3, _4, _5, _6, _7, get_user_pages_unlocked, ...) \ - get_user_pages_unlocked -#define get_user_pages_unlocked(...) GUPU_MACRO(__VA_ARGS__, \ - get_user_pages_unlocked7, x, \ - get_user_pages_unlocked5, x, x, x, x)(__VA_ARGS__) - /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ diff --git a/mm/gup.c b/mm/gup.c index 7f1c4fb77cfa..63d5e002768b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1,4 +1,3 @@ -#define __DISABLE_GUP_DEPRECATED 1 #include #include #include @@ -839,7 +838,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, * if (locked) * up_read(&mm->mmap_sem); */ -long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, +long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked) { @@ -847,7 +846,7 @@ long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, write, force, pages, NULL, locked, true, FOLL_TOUCH); } -EXPORT_SYMBOL(get_user_pages_locked6); +EXPORT_SYMBOL(get_user_pages_locked); /* * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to @@ -892,13 +891,13 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); * or if "force" shall be set to 1 (get_user_pages_fast misses the * "force" parameter). */ -long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages) { return __get_user_pages_unlocked(current, current->mm, start, nr_pages, write, force, pages, FOLL_TOUCH); } -EXPORT_SYMBOL(get_user_pages_unlocked5); +EXPORT_SYMBOL(get_user_pages_unlocked); /* * get_user_pages_remote() - pin user pages in memory @@ -972,7 +971,7 @@ EXPORT_SYMBOL(get_user_pages_remote); * and mm being operated on are the current task's. We also * obviously don't pass FOLL_REMOTE in here. */ -long get_user_pages6(unsigned long start, unsigned long nr_pages, +long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas) { @@ -980,7 +979,7 @@ long get_user_pages6(unsigned long start, unsigned long nr_pages, write, force, pages, vmas, NULL, false, FOLL_TOUCH); } -EXPORT_SYMBOL(get_user_pages6); +EXPORT_SYMBOL(get_user_pages); /** * populate_vma_page_range() - populate a range of pages in the vma. @@ -1491,7 +1490,6 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { - struct mm_struct *mm = current->mm; int nr, ret; start &= PAGE_MASK; @@ -1503,8 +1501,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; - ret = get_user_pages_unlocked(current, mm, start, - nr_pages - nr, write, 0, pages); + ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) { @@ -1519,38 +1516,3 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, } #endif /* CONFIG_HAVE_GENERIC_RCU_GUP */ - -long get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas) -{ - WARN_ONCE(tsk != current, "get_user_pages() called on remote task"); - WARN_ONCE(mm != current->mm, "get_user_pages() called on remote mm"); - - return get_user_pages6(start, nr_pages, write, force, pages, vmas); -} -EXPORT_SYMBOL(get_user_pages8); - -long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, int *locked) -{ - WARN_ONCE(tsk != current, "get_user_pages_locked() called on remote task"); - WARN_ONCE(mm != current->mm, "get_user_pages_locked() called on remote mm"); - - return get_user_pages_locked6(start, nr_pages, write, force, pages, locked); -} -EXPORT_SYMBOL(get_user_pages_locked8); - -long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) -{ - WARN_ONCE(tsk != current, "get_user_pages_unlocked() called on remote task"); - WARN_ONCE(mm != current->mm, "get_user_pages_unlocked() called on remote mm"); - - return get_user_pages_unlocked5(start, nr_pages, write, force, pages); -} -EXPORT_SYMBOL(get_user_pages_unlocked7); - diff --git a/mm/nommu.c b/mm/nommu.c index de8b6b6580c1..bf94913dbbb6 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -15,8 +15,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#define __DISABLE_GUP_DEPRECATED - #include #include #include @@ -161,7 +159,7 @@ finish_or_fault: * slab page or a secondary page from a compound page * - don't permit access to VMAs that don't support it, such as I/O mappings */ -long get_user_pages6(unsigned long start, unsigned long nr_pages, +long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas) { @@ -175,15 +173,15 @@ long get_user_pages6(unsigned long start, unsigned long nr_pages, return __get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas, NULL); } -EXPORT_SYMBOL(get_user_pages6); +EXPORT_SYMBOL(get_user_pages); -long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, +long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked) { - return get_user_pages6(start, nr_pages, write, force, pages, NULL); + return get_user_pages(start, nr_pages, write, force, pages, NULL); } -EXPORT_SYMBOL(get_user_pages_locked6); +EXPORT_SYMBOL(get_user_pages_locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, @@ -199,13 +197,13 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(__get_user_pages_unlocked); -long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages) { return __get_user_pages_unlocked(current, current->mm, start, nr_pages, write, force, pages, 0); } -EXPORT_SYMBOL(get_user_pages_unlocked5); +EXPORT_SYMBOL(get_user_pages_unlocked); /** * follow_pfn - look up PFN at a user virtual address @@ -1989,31 +1987,3 @@ static int __meminit init_admin_reserve(void) return 0; } subsys_initcall(init_admin_reserve); - -long get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas) -{ - return get_user_pages6(start, nr_pages, write, force, pages, vmas); -} -EXPORT_SYMBOL(get_user_pages8); - -long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked) -{ - return get_user_pages_locked6(start, nr_pages, write, - force, pages, locked); -} -EXPORT_SYMBOL(get_user_pages_locked8); - -long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) -{ - return get_user_pages_unlocked5(start, nr_pages, write, force, pages); -} -EXPORT_SYMBOL(get_user_pages_unlocked7); - -- cgit v1.2.3 From 66ee95d16a7f1b7b4f1dd74a2d81c6e19dc29a14 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 28 Apr 2016 16:18:24 -0700 Subject: mm: exclude HugeTLB pages from THP page_mapped() logic HugeTLB pages cannot be split, so we use the compound_mapcount to track rmaps. Currently page_mapped() will check the compound_mapcount, but will also go through the constituent pages of a THP compound page and query the individual _mapcount's too. Unfortunately, page_mapped() does not distinguish between HugeTLB and THP compound pages and assumes that a compound page always needs to have HPAGE_PMD_NR pages querying. For most cases when dealing with HugeTLB this is just inefficient, but for scenarios where the HugeTLB page size is less than the pmd block size (e.g. when using contiguous bit on ARM) this can lead to crashes. This patch adjusts the page_mapped function such that we skip the unnecessary THP reference checks for HugeTLB pages. Fixes: e1534ae95004 ("mm: differentiate page_mapped() from page_mapcount() for compound pages") Signed-off-by: Steve Capper Acked-by: Kirill A. Shutemov Cc: Will Deacon Cc: Catalin Marinas Cc: Michal Hocko Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/mm.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index a55e5be0894f..79b6c18d0a38 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1031,6 +1031,8 @@ static inline bool page_mapped(struct page *page) page = compound_head(page); if (atomic_read(compound_mapcount_ptr(page)) >= 0) return true; + if (PageHuge(page)) + return false; for (i = 0; i < hpage_nr_pages(page); i++) { if (atomic_read(&page[i]._mapcount) >= 0) return true; -- cgit v1.2.3 From 28093f9f34cedeaea0f481c58446d9dac6dd620f Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 28 Apr 2016 16:18:35 -0700 Subject: numa: fix /proc//numa_maps for THP In gather_pte_stats() a THP pmd is cast into a pte, which is wrong because the layouts may differ depending on the architecture. On s390 this will lead to inaccurate numa_maps accounting in /proc because of misguided pte_present() and pte_dirty() checks on the fake pte. On other architectures pte_present() and pte_dirty() may work by chance, but there may be an issue with direct-access (dax) mappings w/o underlying struct pages when HAVE_PTE_SPECIAL is set and THP is available. In vm_normal_page() the fake pte will be checked with pte_special() and because there is no "special" bit in a pmd, this will always return false and the VM_PFNMAP | VM_MIXEDMAP checking will be skipped. On dax mappings w/o struct pages, an invalid struct page pointer would then be returned that can crash the kernel. This patch fixes the numa_maps THP handling by introducing new "_pmd" variants of the can_gather_numa_stats() and vm_normal_page() functions. Signed-off-by: Gerald Schaefer Cc: Naoya Horiguchi Cc: "Kirill A . Shutemov" Cc: Konstantin Khlebnikov Cc: Michal Hocko Cc: Vlastimil Babka Cc: Jerome Marchand Cc: Johannes Weiner Cc: Dave Hansen Cc: Mel Gorman Cc: Dan Williams Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Michael Holzheu Cc: [4.3+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 33 ++++++++++++++++++++++++++++++--- include/linux/mm.h | 2 ++ mm/memory.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 3 deletions(-) (limited to 'include/linux/mm.h') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 229cb546bee0..541583510cfb 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1518,6 +1518,32 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, return page; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static struct page *can_gather_numa_stats_pmd(pmd_t pmd, + struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *page; + int nid; + + if (!pmd_present(pmd)) + return NULL; + + page = vm_normal_page_pmd(vma, addr, pmd); + if (!page) + return NULL; + + if (PageReserved(page)) + return NULL; + + nid = page_to_nid(page); + if (!node_isset(nid, node_states[N_MEMORY])) + return NULL; + + return page; +} +#endif + static int gather_pte_stats(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -1527,14 +1553,14 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, pte_t *orig_pte; pte_t *pte; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { - pte_t huge_pte = *(pte_t *)pmd; struct page *page; - page = can_gather_numa_stats(huge_pte, vma, addr); + page = can_gather_numa_stats_pmd(*pmd, vma, addr); if (page) - gather_stats(page, md, pte_dirty(huge_pte), + gather_stats(page, md, pmd_dirty(*pmd), HPAGE_PMD_SIZE/PAGE_SIZE); spin_unlock(ptl); return 0; @@ -1542,6 +1568,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (pmd_trans_unstable(pmd)) return 0; +#endif orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); do { struct page *page = can_gather_numa_stats(*pte, vma, addr); diff --git a/include/linux/mm.h b/include/linux/mm.h index 79b6c18d0a38..864d7221de84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1140,6 +1140,8 @@ struct zap_details { struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t pmd); int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); diff --git a/mm/memory.c b/mm/memory.c index 93897f23cc11..305537fc8640 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -789,6 +789,46 @@ out: return pfn_to_page(pfn); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t pmd) +{ + unsigned long pfn = pmd_pfn(pmd); + + /* + * There is no pmd_special() but there may be special pmds, e.g. + * in a direct-access (dax) mapping, so let's just replicate the + * !HAVE_PTE_SPECIAL case from vm_normal_page() here. + */ + if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { + if (vma->vm_flags & VM_MIXEDMAP) { + if (!pfn_valid(pfn)) + return NULL; + goto out; + } else { + unsigned long off; + off = (addr - vma->vm_start) >> PAGE_SHIFT; + if (pfn == vma->vm_pgoff + off) + return NULL; + if (!is_cow_mapping(vma->vm_flags)) + return NULL; + } + } + + if (is_zero_pfn(pfn)) + return NULL; + if (unlikely(pfn > highest_memmap_pfn)) + return NULL; + + /* + * NOTE! We still have PageReserved() pages in the page tables. + * eg. VDSO mappings can cause them to exist. + */ +out: + return pfn_to_page(pfn); +} +#endif + /* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range -- cgit v1.2.3