From ddc58f27f9eee9117219936f77e90ad5b2e00e96 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 15 Jan 2016 16:52:56 -0800 Subject: mm: drop tail page refcounting Tail page refcounting is utterly complicated and painful to support. It uses ->_mapcount on tail pages to store how many times this page is pinned. get_page() bumps ->_mapcount on tail page in addition to ->_count on head. This information is required by split_huge_page() to be able to distribute pins from head of compound page to tails during the split. We will need ->_mapcount to account PTE mappings of subpages of the compound page. We eliminate need in current meaning of ->_mapcount in tail pages by forbidding split entirely if the page is pinned. The only user of tail page refcounting is THP which is marked BROKEN for now. Let's drop all this mess. It makes get_page() and put_page() much simpler. Signed-off-by: Kirill A. Shutemov Tested-by: Sasha Levin Tested-by: Aneesh Kumar K.V Acked-by: Vlastimil Babka Acked-by: Jerome Marchand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Dave Hansen Cc: Mel Gorman Cc: Rik van Riel Cc: Naoya Horiguchi Cc: Steve Capper Cc: Johannes Weiner Cc: Michal Hocko Cc: Christoph Lameter Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hugetlbpage.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'arch/powerpc/mm/hugetlbpage.c') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 61b8b7ccea4f..6bd3afa775d3 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -999,7 +999,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, { unsigned long mask; unsigned long pte_end; - struct page *head, *page, *tail; + struct page *head, *page; pte_t pte; int refs; @@ -1022,7 +1022,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, head = pte_page(pte); page = head + ((addr & (sz-1)) >> PAGE_SHIFT); - tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -1044,15 +1043,5 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, return 0; } - /* - * Any tail page need their mapcount reference taken before we - * return. - */ - while (refs--) { - if (PageTail(tail)) - get_huge_page_tail(tail); - tail++; - } - return 1; } -- cgit v1.2.3 From 7aa9a23c69eae5bfba4f1f92c58d89edc334c8ae Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 15 Jan 2016 16:53:21 -0800 Subject: powerpc, thp: remove infrastructure for handling splitting PMDs With new refcounting we don't need to mark PMDs splitting. Let's drop code to handle this. pmdp_splitting_flush() is not needed too: on splitting PMD we will do pmdp_clear_flush() + set_pte_at(). pmdp_clear_flush() will do IPI as needed for fast_gup. Signed-off-by: Kirill A. Shutemov Tested-by: Aneesh Kumar K.V Reviewed-by: Aneesh Kumar K.V Cc: Sasha Levin Cc: Jerome Marchand Cc: Vlastimil Babka Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Dave Hansen Cc: Mel Gorman Cc: Rik van Riel Cc: Naoya Horiguchi Cc: Steve Capper Cc: Johannes Weiner Cc: Michal Hocko Cc: Christoph Lameter Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/hash-64k.h | 12 ------- arch/powerpc/include/asm/book3s/64/hash.h | 10 ++---- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 --- arch/powerpc/mm/hugepage-hash64.c | 3 -- arch/powerpc/mm/hugetlbpage.c | 4 --- arch/powerpc/mm/pgtable_64.c | 49 --------------------------- 6 files changed, 2 insertions(+), 80 deletions(-) (limited to 'arch/powerpc/mm/hugetlbpage.c') diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 9e55e3b1fef0..849bbec80f7b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -256,13 +256,6 @@ static inline int pmd_trans_huge(pmd_t pmd) (_PAGE_PTE | _PAGE_THP_HUGE)); } -static inline int pmd_trans_splitting(pmd_t pmd) -{ - if (pmd_trans_huge(pmd)) - return pmd_val(pmd) & _PAGE_SPLITTING; - return 0; -} - static inline int pmd_large(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_PTE); @@ -273,11 +266,6 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd) return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT); } -static inline pmd_t pmd_mksplitting(pmd_t pmd) -{ - return __pmd(pmd_val(pmd) | _PAGE_SPLITTING); -} - #define __HAVE_ARCH_PMD_SAME static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) { diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 2ff8b3df553d..06f17e778c27 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -40,11 +40,6 @@ #define _PAGE_SOFT_DIRTY 0x00000 #endif -/* - * THP pages can't be special. So use the _PAGE_SPECIAL - */ -#define _PAGE_SPLITTING _PAGE_SPECIAL - /* * We need to differentiate between explicit huge page and THP huge * page, since THP huge page also need to track real subpage details @@ -54,9 +49,8 @@ /* * set of bits not changed in pmd_modify. */ -#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \ - _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \ - _PAGE_THP_HUGE | _PAGE_PTE | _PAGE_SOFT_DIRTY) +#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ + _PAGE_ACCESSED | _PAGE_THP_HUGE) #ifdef CONFIG_PPC_64K_PAGES #include diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index b3a5badab69f..da11c4ac8a09 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -266,10 +266,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH -extern void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); - extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #define pmdp_collapse_flush pmdp_collapse_flush diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index baf1301ded0c..49b152b0f926 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -39,9 +39,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, /* If PMD busy, retry the access */ if (unlikely(old_pmd & _PAGE_BUSY)) return 0; - /* If PMD is trans splitting retry the access */ - if (unlikely(old_pmd & _PAGE_SPLITTING)) - return 0; /* If PMD permissions don't match, take page fault */ if (unlikely(access & ~old_pmd)) return 1; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 6bd3afa775d3..744e24bcb85c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -958,10 +958,6 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, /* * A hugepage collapse is captured by pmd_none, because * it mark the pmd none and do a hpte invalidate. - * - * We don't worry about pmd_trans_splitting here, The - * caller if it needs to handle the splitting case - * should check for that. */ if (pmd_none(pmd)) return NULL; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index ea6bc31debb0..3124a20d0fab 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -603,55 +603,6 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma, return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp); } -/* - * We mark the pmd splitting and invalidate all the hpte - * entries for this hugepage. - */ -void pmdp_splitting_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - unsigned long old, tmp; - - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - -#ifdef CONFIG_DEBUG_VM - WARN_ON(!pmd_trans_huge(*pmdp)); - assert_spin_locked(&vma->vm_mm->page_table_lock); -#endif - -#ifdef PTE_ATOMIC_UPDATES - - __asm__ __volatile__( - "1: ldarx %0,0,%3\n\ - andi. %1,%0,%6\n\ - bne- 1b \n\ - oris %1,%0,%4@h \n\ - stdcx. %1,0,%3 \n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*pmdp) - : "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY) - : "cc" ); -#else - old = pmd_val(*pmdp); - *pmdp = __pmd(old | _PAGE_SPLITTING); -#endif - /* - * If we didn't had the splitting flag set, go and flush the - * HPTE entries. - */ - trace_hugepage_splitting(address, old); - if (!(old & _PAGE_SPLITTING)) { - /* We need to flush the hpte */ - if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old); - } - /* - * This ensures that generic code that rely on IRQ disabling - * to prevent a parallel THP split work as expected. - */ - kick_all_cpus_sync(); -} - /* * We want to put the pgtable in pmd and use pgtable for tracking * the base page size hptes -- cgit v1.2.3