From f1e61557f0230d51a3df8d825f2c156e75563bff Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:50 -0800 Subject: mm: pack compound_dtor and compound_order into one word in struct page The patch halves space occupied by compound_dtor and compound_order in struct page. For compound_order, it's trivial long -> short conversion. For get_compound_page_dtor(), we now use hardcoded table for destructor lookup and store its index in the struct page instead of direct pointer to destructor. It shouldn't be a big trouble to maintain the table: we have only two destructor and NULL currently. This patch free up one word in tail pages for reuse. This is preparation for the next patch. Signed-off-by: Kirill A. Shutemov Reviewed-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Andrea Arcangeli Cc: "Paul E. McKenney" Cc: Andi Kleen Cc: Aneesh Kumar K.V Cc: Christoph Lameter Cc: David Rientjes Cc: Hugh Dickins Cc: Joonsoo Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux/mm.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index 906c46a05707..6581c21320cb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -568,18 +568,32 @@ int split_free_page(struct page *page); /* * Compound pages have a destructor function. Provide a * prototype for that function and accessor functions. - * These are _only_ valid on the head of a PG_compound page. + * These are _only_ valid on the head of a compound page. */ +typedef void compound_page_dtor(struct page *); + +/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */ +enum compound_dtor_id { + NULL_COMPOUND_DTOR, + COMPOUND_PAGE_DTOR, +#ifdef CONFIG_HUGETLB_PAGE + HUGETLB_PAGE_DTOR, +#endif + NR_COMPOUND_DTORS, +}; +extern compound_page_dtor * const compound_page_dtors[]; static inline void set_compound_page_dtor(struct page *page, - compound_page_dtor *dtor) + enum compound_dtor_id compound_dtor) { - page[1].compound_dtor = dtor; + VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page); + page[1].compound_dtor = compound_dtor; } static inline compound_page_dtor *get_compound_page_dtor(struct page *page) { - return page[1].compound_dtor; + VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page); + return compound_page_dtors[page[1].compound_dtor]; } static inline int compound_order(struct page *page) @@ -589,7 +603,7 @@ static inline int compound_order(struct page *page) return page[1].compound_order; } -static inline void set_compound_order(struct page *page, unsigned long order) +static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; } -- cgit v1.2.3 From 1d798ca3f16437c71ff63e36597ff07f9c12e4d6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:54 -0800 Subject: mm: make compound_head() robust Hugh has pointed that compound_head() call can be unsafe in some context. There's one example: CPU0 CPU1 isolate_migratepages_block() page_count() compound_head() !!PageTail() == true put_page() tail->first_page = NULL head = tail->first_page alloc_pages(__GFP_COMP) prep_compound_page() tail->first_page = head __SetPageTail(p); !!PageTail() == true The race is pure theoretical. I don't it's possible to trigger it in practice. But who knows. We can fix the race by changing how encode PageTail() and compound_head() within struct page to be able to update them in one shot. The patch introduces page->compound_head into third double word block in front of compound_dtor and compound_order. Bit 0 encodes PageTail() and the rest bits are pointer to head page if bit zero is set. The patch moves page->pmd_huge_pte out of word, just in case if an architecture defines pgtable_t into something what can have the bit 0 set. hugetlb_cgroup uses page->lru.next in the second tail page to store pointer struct hugetlb_cgroup. The patch switch it to use page->private in the second tail page instead. The space is free since ->first_page is removed from the union. The patch also opens possibility to remove HUGETLB_CGROUP_MIN_ORDER limitation, since there's now space in first tail page to store struct hugetlb_cgroup pointer. But that's out of scope of the patch. That means page->compound_head shares storage space with: - page->lru.next; - page->next; - page->rcu_head.next; That's too long list to be absolutely sure, but looks like nobody uses bit 0 of the word. page->rcu_head.next guaranteed[1] to have bit 0 clean as long as we use call_rcu(), call_rcu_bh(), call_rcu_sched(), or call_srcu(). But future call_rcu_lazy() is not allowed as it makes use of the bit and we can get false positive PageTail(). [1] http://lkml.kernel.org/g/20150827163634.GD4029@linux.vnet.ibm.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Reviewed-by: Andrea Arcangeli Cc: Hugh Dickins Cc: David Rientjes Cc: Vlastimil Babka Acked-by: Paul E. McKenney Cc: Aneesh Kumar K.V Cc: Andi Kleen Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/split_page_table_lock | 4 +- arch/xtensa/configs/iss_defconfig | 1 - include/linux/hugetlb_cgroup.h | 4 +- include/linux/mm.h | 53 ++-------------------- include/linux/mm_types.h | 22 ++++++++-- include/linux/page-flags.h | 80 ++++++++-------------------------- mm/Kconfig | 12 ----- mm/debug.c | 5 --- mm/huge_memory.c | 3 +- mm/hugetlb.c | 8 +--- mm/hugetlb_cgroup.c | 2 +- mm/internal.h | 4 +- mm/memory-failure.c | 7 --- mm/page_alloc.c | 48 ++++++++++++-------- mm/swap.c | 4 +- 15 files changed, 82 insertions(+), 175 deletions(-) (limited to 'include/linux/mm.h') diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock index 6dea4fd5c961..62842a857dab 100644 --- a/Documentation/vm/split_page_table_lock +++ b/Documentation/vm/split_page_table_lock @@ -54,8 +54,8 @@ everything required is done by pgtable_page_ctor() and pgtable_page_dtor(), which must be called on PTE table allocation / freeing. Make sure the architecture doesn't use slab allocator for page table -allocation: slab uses page->slab_cache and page->first_page for its pages. -These fields share storage with page->ptl. +allocation: slab uses page->slab_cache for its pages. +This field shares storage with page->ptl. PMD split lock only makes sense if you have more than two page table levels. diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig index f3dfe0d921c2..44c6764d9146 100644 --- a/arch/xtensa/configs/iss_defconfig +++ b/arch/xtensa/configs/iss_defconfig @@ -169,7 +169,6 @@ CONFIG_FLATMEM_MANUAL=y # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y -CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_PHYS_ADDR_T_64BIT is not set CONFIG_ZONE_DMA_FLAG=1 diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 7edd30515298..24154c26d469 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -32,7 +32,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return NULL; - return (struct hugetlb_cgroup *)page[2].lru.next; + return (struct hugetlb_cgroup *)page[2].private; } static inline @@ -42,7 +42,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return -1; - page[2].lru.next = (void *)h_cg; + page[2].private = (unsigned long)h_cg; return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 6581c21320cb..9671b6f23eda 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -430,46 +430,6 @@ static inline void compound_unlock_irqrestore(struct page *page, #endif } -static inline struct page *compound_head_by_tail(struct page *tail) -{ - struct page *head = tail->first_page; - - /* - * page->first_page may be a dangling pointer to an old - * compound page, so recheck that it is still a tail - * page before returning. - */ - smp_rmb(); - if (likely(PageTail(tail))) - return head; - return tail; -} - -/* - * Since either compound page could be dismantled asynchronously in THP - * or we access asynchronously arbitrary positioned struct page, there - * would be tail flag race. To handle this race, we should call - * smp_rmb() before checking tail flag. compound_head_by_tail() did it. - */ -static inline struct page *compound_head(struct page *page) -{ - if (unlikely(PageTail(page))) - return compound_head_by_tail(page); - return page; -} - -/* - * If we access compound page synchronously such as access to - * allocated page, there is no need to handle tail flag race, so we can - * check tail flag directly without any synchronization primitive. - */ -static inline struct page *compound_head_fast(struct page *page) -{ - if (unlikely(PageTail(page))) - return page->first_page; - return page; -} - /* * The atomic page->_mapcount, starts from -1: so that transitions * both from it and to it can be tracked, using atomic_inc_and_test @@ -518,7 +478,7 @@ static inline void get_huge_page_tail(struct page *page) VM_BUG_ON_PAGE(!PageTail(page), page); VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); - if (compound_tail_refcounted(page->first_page)) + if (compound_tail_refcounted(compound_head(page))) atomic_inc(&page->_mapcount); } @@ -541,13 +501,7 @@ static inline struct page *virt_to_head_page(const void *x) { struct page *page = virt_to_page(x); - /* - * We don't need to worry about synchronization of tail flag - * when we call virt_to_head_page() since it is only called for - * already allocated page and this page won't be freed until - * this virt_to_head_page() is finished. So use _fast variant. - */ - return compound_head_fast(page); + return compound_head(page); } /* @@ -1586,8 +1540,7 @@ static inline bool ptlock_init(struct page *page) * with 0. Make sure nobody took it in use in between. * * It can happen if arch try to use slab for page table allocation: - * slab code uses page->slab_cache and page->first_page (for tail - * pages), which share storage with page->ptl. + * slab code uses page->slab_cache, which share storage with page->ptl. */ VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); if (!ptlock_alloc(page)) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e334ef79cb43..bb91658c603f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -111,7 +111,13 @@ struct page { }; }; - /* Third double word block */ + /* + * Third double word block + * + * WARNING: bit 0 of the first word encode PageTail(). That means + * the rest users of the storage space MUST NOT use the bit to + * avoid collision and false-positive PageTail(). + */ union { struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! @@ -132,14 +138,23 @@ struct page { struct rcu_head rcu_head; /* Used by SLAB * when destroying via RCU */ - /* First tail page of compound page */ + /* Tail pages of compound page */ struct { + unsigned long compound_head; /* If bit zero is set */ + + /* First tail page only */ unsigned short int compound_dtor; unsigned short int compound_order; }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page->ptl */ + struct { + unsigned long __pad; /* do not overlay pmd_huge_pte + * with compound_head to avoid + * possible bit 0 collision. + */ + pgtable_t pmd_huge_pte; /* protected by page->ptl */ + }; #endif }; @@ -160,7 +175,6 @@ struct page { #endif #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ - struct page *first_page; /* Compound tail pages */ }; #ifdef CONFIG_MEMCG diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a525e5067484..bb53c7b86315 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -86,12 +86,7 @@ enum pageflags { PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ PG_writeback, /* Page is under writeback */ -#ifdef CONFIG_PAGEFLAGS_EXTENDED PG_head, /* A head page */ - PG_tail, /* A tail page */ -#else - PG_compound, /* A compound page */ -#endif PG_swapcache, /* Swap page: swp_entry_t in private */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ @@ -398,85 +393,46 @@ static inline void set_page_writeback_keepwrite(struct page *page) test_set_page_writeback_keepwrite(page); } -#ifdef CONFIG_PAGEFLAGS_EXTENDED -/* - * System with lots of page flags available. This allows separate - * flags for PageHead() and PageTail() checks of compound pages so that bit - * tests can be used in performance sensitive paths. PageCompound is - * generally not used in hot code paths except arch/powerpc/mm/init_64.c - * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages - * and avoid handling those in real mode. - */ __PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head) -__PAGEFLAG(Tail, tail) -static inline int PageCompound(struct page *page) -{ - return page->flags & ((1L << PG_head) | (1L << PG_tail)); - -} -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void ClearPageCompound(struct page *page) +static inline int PageTail(struct page *page) { - BUG_ON(!PageHead(page)); - ClearPageHead(page); + return READ_ONCE(page->compound_head) & 1; } -#endif - -#define PG_head_mask ((1L << PG_head)) -#else -/* - * Reduce page flag use as much as possible by overlapping - * compound page flags with the flags used for page cache pages. Possible - * because PageCompound is always set for compound pages and not for - * pages on the LRU and/or pagecache. - */ -TESTPAGEFLAG(Compound, compound) -__SETPAGEFLAG(Head, compound) __CLEARPAGEFLAG(Head, compound) - -/* - * PG_reclaim is used in combination with PG_compound to mark the - * head and tail of a compound page. This saves one page flag - * but makes it impossible to use compound pages for the page cache. - * The PG_reclaim bit would have to be used for reclaim or readahead - * if compound pages enter the page cache. - * - * PG_compound & PG_reclaim => Tail page - * PG_compound & ~PG_reclaim => Head page - */ -#define PG_head_mask ((1L << PG_compound)) -#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim)) - -static inline int PageHead(struct page *page) +static inline void set_compound_head(struct page *page, struct page *head) { - return ((page->flags & PG_head_tail_mask) == PG_head_mask); + WRITE_ONCE(page->compound_head, (unsigned long)head + 1); } -static inline int PageTail(struct page *page) +static inline void clear_compound_head(struct page *page) { - return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); + WRITE_ONCE(page->compound_head, 0); } -static inline void __SetPageTail(struct page *page) +static inline struct page *compound_head(struct page *page) { - page->flags |= PG_head_tail_mask; + unsigned long head = READ_ONCE(page->compound_head); + + if (unlikely(head & 1)) + return (struct page *) (head - 1); + return page; } -static inline void __ClearPageTail(struct page *page) +static inline int PageCompound(struct page *page) { - page->flags &= ~PG_head_tail_mask; -} + return PageHead(page) || PageTail(page); +} #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void ClearPageCompound(struct page *page) { - BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound)); - clear_bit(PG_compound, &page->flags); + BUG_ON(!PageHead(page)); + ClearPageHead(page); } #endif -#endif /* !PAGEFLAGS_EXTENDED */ +#define PG_head_mask ((1L << PG_head)) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); diff --git a/mm/Kconfig b/mm/Kconfig index 0d9fdcd01e47..97a4e06b15c0 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -200,18 +200,6 @@ config MEMORY_HOTREMOVE depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE depends on MIGRATION -# -# If we have space for more page flags then we can enable additional -# optimizations and functionality. -# -# Regular Sparsemem takes page flag bits for the sectionid if it does not -# use a virtual memmap. Disable extended page flags for 32 bit platforms -# that require the use of a sectionid in the page flags. -# -config PAGEFLAGS_EXTENDED - def_bool y - depends on 64BIT || SPARSEMEM_VMEMMAP || !SPARSEMEM - # Heavily threaded applications may benefit from splitting the mm-wide # page_table_lock, so that faults on different parts of the user address # space can be handled with less contention: split it at this NR_CPUS. diff --git a/mm/debug.c b/mm/debug.c index e784110fb51d..668aa35191ca 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -25,12 +25,7 @@ static const struct trace_print_flags pageflag_names[] = { {1UL << PG_private, "private" }, {1UL << PG_private_2, "private_2" }, {1UL << PG_writeback, "writeback" }, -#ifdef CONFIG_PAGEFLAGS_EXTENDED {1UL << PG_head, "head" }, - {1UL << PG_tail, "tail" }, -#else - {1UL << PG_compound, "compound" }, -#endif {1UL << PG_swapcache, "swapcache" }, {1UL << PG_mappedtodisk, "mappedtodisk" }, {1UL << PG_reclaim, "reclaim" }, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 73266ee7274c..e1ccc83f73d3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1755,8 +1755,7 @@ static void __split_huge_page_refcount(struct page *page, (1L << PG_unevictable))); page_tail->flags |= (1L << PG_dirty); - /* clear PageTail before overwriting first_page */ - smp_wmb(); + clear_compound_head(page_tail); if (page_is_young(page)) set_page_young(page_tail); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e90a29024c5c..4eb0f0964883 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1001,9 +1001,8 @@ static void destroy_compound_gigantic_page(struct page *page, struct page *p = page + 1; for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { - __ClearPageTail(p); + clear_compound_head(p); set_page_refcounted(p); - p->first_page = NULL; } set_compound_order(page, 0); @@ -1276,10 +1275,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order) */ __ClearPageReserved(p); set_page_count(p, 0); - p->first_page = page; - /* Make sure p->first_page is always valid for PageTail() */ - smp_wmb(); - __SetPageTail(p); + set_compound_head(p, page); } } diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 33d59abe91f1..d8fb10de0f14 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -385,7 +385,7 @@ void __init hugetlb_cgroup_file_init(void) /* * Add cgroup control files only if the huge page consists * of more than two normal pages. This is because we use - * page[2].lru.next for storing cgroup details. + * page[2].private for storing cgroup details. */ if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER) __hugetlb_cgroup_file_init(hstate_index(h)); diff --git a/mm/internal.h b/mm/internal.h index 5b7841f6fa27..a7f5670fea23 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -80,9 +80,9 @@ static inline void __get_page_tail_foll(struct page *page, * speculative page access (like in * page_cache_get_speculative()) on tail pages. */ - VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page); + VM_BUG_ON_PAGE(atomic_read(&compound_head(page)->_count) <= 0, page); if (get_page_head) - atomic_inc(&page->first_page->_count); + atomic_inc(&compound_head(page)->_count); get_huge_page_tail(page); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 16a0ec385320..8424b64711ac 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -776,8 +776,6 @@ static int me_huge_page(struct page *p, unsigned long pfn) #define lru (1UL << PG_lru) #define swapbacked (1UL << PG_swapbacked) #define head (1UL << PG_head) -#define tail (1UL << PG_tail) -#define compound (1UL << PG_compound) #define slab (1UL << PG_slab) #define reserved (1UL << PG_reserved) @@ -800,12 +798,7 @@ static struct page_state { */ { slab, slab, MF_MSG_SLAB, me_kernel }, -#ifdef CONFIG_PAGEFLAGS_EXTENDED { head, head, MF_MSG_HUGE, me_huge_page }, - { tail, tail, MF_MSG_HUGE, me_huge_page }, -#else - { compound, compound, MF_MSG_HUGE, me_huge_page }, -#endif { sc|dirty, sc|dirty, MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty }, { sc|dirty, sc, MF_MSG_CLEAN_SWAPCACHE, me_swapcache_clean }, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fae1bd6f9f37..e361001519d3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -445,15 +445,15 @@ out: /* * Higher-order pages are called "compound pages". They are structured thusly: * - * The first PAGE_SIZE page is called the "head page". + * The first PAGE_SIZE page is called the "head page" and have PG_head set. * - * The remaining PAGE_SIZE pages are called "tail pages". + * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded + * in bit 0 of page->compound_head. The rest of bits is pointer to head page. * - * All pages have PG_compound set. All tail pages have their ->first_page - * pointing at the head page. + * The first tail page's ->compound_dtor holds the offset in array of compound + * page destructors. See compound_page_dtors. * - * The first tail page's ->lru.next holds the address of the compound page's - * put_page() function. Its ->lru.prev holds the order of allocation. + * The first tail page's ->compound_order holds the order of allocation. * This usage means that zero-order pages may not be compound. */ @@ -473,10 +473,7 @@ void prep_compound_page(struct page *page, unsigned long order) for (i = 1; i < nr_pages; i++) { struct page *p = page + i; set_page_count(p, 0); - p->first_page = page; - /* Make sure p->first_page is always valid for PageTail() */ - smp_wmb(); - __SetPageTail(p); + set_compound_head(p, page); } } @@ -854,17 +851,30 @@ static void free_one_page(struct zone *zone, static int free_tail_pages_check(struct page *head_page, struct page *page) { - if (!IS_ENABLED(CONFIG_DEBUG_VM)) - return 0; + int ret = 1; + + /* + * We rely page->lru.next never has bit 0 set, unless the page + * is PageTail(). Let's make sure that's true even for poisoned ->lru. + */ + BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1); + + if (!IS_ENABLED(CONFIG_DEBUG_VM)) { + ret = 0; + goto out; + } if (unlikely(!PageTail(page))) { bad_page(page, "PageTail not set", 0); - return 1; + goto out; } - if (unlikely(page->first_page != head_page)) { - bad_page(page, "first_page not consistent", 0); - return 1; + if (unlikely(compound_head(page) != head_page)) { + bad_page(page, "compound_head not consistent", 0); + goto out; } - return 0; + ret = 0; +out: + clear_compound_head(page); + return ret; } static void __meminit __init_single_page(struct page *page, unsigned long pfn, @@ -931,6 +941,10 @@ void __meminit reserve_bootmem_region(unsigned long start, unsigned long end) struct page *page = pfn_to_page(start_pfn); init_reserved_page(start_pfn); + + /* Avoid false-positive PageTail() */ + INIT_LIST_HEAD(&page->lru); + SetPageReserved(page); } } diff --git a/mm/swap.c b/mm/swap.c index 983f692a47fd..39395fb549c0 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -201,7 +201,7 @@ out_put_single: __put_single_page(page); return; } - VM_BUG_ON_PAGE(page_head != page->first_page, page); + VM_BUG_ON_PAGE(page_head != compound_head(page), page); /* * We can release the refcount taken by * get_page_unless_zero() now that @@ -262,7 +262,7 @@ static void put_compound_page(struct page *page) * Case 3 is possible, as we may race with * __split_huge_page_refcount tearing down a THP page. */ - page_head = compound_head_by_tail(page); + page_head = compound_head(page); if (!__compound_tail_refcounted(page_head)) put_unrefcounted_compound_page(page_head, page); else -- cgit v1.2.3 From d00181b96eb86c914cb327d1de974a1b71366e1b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:57 -0800 Subject: mm: use 'unsigned int' for page order Let's try to be consistent about data type of page order. [sfr@canb.auug.org.au: fix build (type of pageblock_order)] [hughd@google.com: some configs end up with MAX_ORDER and pageblock_order having different types] Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Andrea Arcangeli Cc: "Paul E. McKenney" Cc: Andi Kleen Cc: Aneesh Kumar K.V Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Sergey Senozhatsky Signed-off-by: Stephen Rothwell Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++-- include/linux/pageblock-flags.h | 2 +- mm/hugetlb.c | 19 ++++++++++--------- mm/internal.h | 4 ++-- mm/page_alloc.c | 29 ++++++++++++++++------------- 5 files changed, 32 insertions(+), 27 deletions(-) (limited to 'include/linux/mm.h') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9671b6f23eda..00bad7793788 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -550,7 +550,7 @@ static inline compound_page_dtor *get_compound_page_dtor(struct page *page) return compound_page_dtors[page[1].compound_dtor]; } -static inline int compound_order(struct page *page) +static inline unsigned int compound_order(struct page *page) { if (!PageHead(page)) return 0; @@ -1810,7 +1810,8 @@ extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern __printf(3, 4) -void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); +void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, + const char *fmt, ...); extern void setup_per_cpu_pageset(void); diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 2baeee12f48e..e942558b3585 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -44,7 +44,7 @@ enum pageblock_bits { #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE /* Huge page sizes are variable */ -extern int pageblock_order; +extern unsigned int pageblock_order; #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4eb0f0964883..7ce07d681265 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -994,7 +994,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) #if defined(CONFIG_CMA) && defined(CONFIG_X86_64) static void destroy_compound_gigantic_page(struct page *page, - unsigned long order) + unsigned int order) { int i; int nr_pages = 1 << order; @@ -1009,7 +1009,7 @@ static void destroy_compound_gigantic_page(struct page *page, __ClearPageHead(page); } -static void free_gigantic_page(struct page *page, unsigned order) +static void free_gigantic_page(struct page *page, unsigned int order) { free_contig_range(page_to_pfn(page), 1 << order); } @@ -1053,7 +1053,7 @@ static bool zone_spans_last_pfn(const struct zone *zone, return zone_spans_pfn(zone, last_pfn); } -static struct page *alloc_gigantic_page(int nid, unsigned order) +static struct page *alloc_gigantic_page(int nid, unsigned int order) { unsigned long nr_pages = 1 << order; unsigned long ret, pfn, flags; @@ -1089,7 +1089,7 @@ static struct page *alloc_gigantic_page(int nid, unsigned order) } static void prep_new_huge_page(struct hstate *h, struct page *page, int nid); -static void prep_compound_gigantic_page(struct page *page, unsigned long order); +static void prep_compound_gigantic_page(struct page *page, unsigned int order); static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid) { @@ -1122,9 +1122,9 @@ static int alloc_fresh_gigantic_page(struct hstate *h, static inline bool gigantic_page_supported(void) { return true; } #else static inline bool gigantic_page_supported(void) { return false; } -static inline void free_gigantic_page(struct page *page, unsigned order) { } +static inline void free_gigantic_page(struct page *page, unsigned int order) { } static inline void destroy_compound_gigantic_page(struct page *page, - unsigned long order) { } + unsigned int order) { } static inline int alloc_fresh_gigantic_page(struct hstate *h, nodemask_t *nodes_allowed) { return 0; } #endif @@ -1250,7 +1250,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) put_page(page); /* free it into the hugepage allocator */ } -static void prep_compound_gigantic_page(struct page *page, unsigned long order) +static void prep_compound_gigantic_page(struct page *page, unsigned int order) { int i; int nr_pages = 1 << order; @@ -1968,7 +1968,8 @@ found: return 1; } -static void __init prep_compound_huge_page(struct page *page, int order) +static void __init prep_compound_huge_page(struct page *page, + unsigned int order) { if (unlikely(order > (MAX_ORDER - 1))) prep_compound_gigantic_page(page, order); @@ -2679,7 +2680,7 @@ static int __init hugetlb_init(void) module_init(hugetlb_init); /* Should be called on processing a hugepagesz=... option */ -void __init hugetlb_add_hstate(unsigned order) +void __init hugetlb_add_hstate(unsigned int order) { struct hstate *h; unsigned long i; diff --git a/mm/internal.h b/mm/internal.h index a7f5670fea23..38e24b89e4c4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -177,7 +177,7 @@ __find_buddy_index(unsigned long page_idx, unsigned int order) extern int __isolate_free_page(struct page *page, unsigned int order); extern void __free_pages_bootmem(struct page *page, unsigned long pfn, unsigned int order); -extern void prep_compound_page(struct page *page, unsigned long order); +extern void prep_compound_page(struct page *page, unsigned int order); #ifdef CONFIG_MEMORY_FAILURE extern bool is_free_buddy_page(struct page *page); #endif @@ -235,7 +235,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, * page cannot be allocated or merged in parallel. Alternatively, it must * handle invalid values gracefully, and use page_order_unsafe() below. */ -static inline unsigned long page_order(struct page *page) +static inline unsigned int page_order(struct page *page) { /* PageBuddy() must be checked by the caller */ return page_private(page); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e361001519d3..208e4c7e771b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -181,7 +181,7 @@ bool pm_suspended_storage(void) #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE -int pageblock_order __read_mostly; +unsigned int pageblock_order __read_mostly; #endif static void __free_pages_ok(struct page *page, unsigned int order); @@ -462,7 +462,7 @@ static void free_compound_page(struct page *page) __free_pages_ok(page, compound_order(page)); } -void prep_compound_page(struct page *page, unsigned long order) +void prep_compound_page(struct page *page, unsigned int order) { int i; int nr_pages = 1 << order; @@ -662,7 +662,7 @@ static inline void __free_one_page(struct page *page, unsigned long combined_idx; unsigned long uninitialized_var(buddy_idx); struct page *buddy; - int max_order = MAX_ORDER; + unsigned int max_order = MAX_ORDER; VM_BUG_ON(!zone_is_initialized(zone)); VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); @@ -675,7 +675,7 @@ static inline void __free_one_page(struct page *page, * pageblock. Without this, pageblock isolation * could cause incorrect freepage accounting. */ - max_order = min(MAX_ORDER, pageblock_order + 1); + max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); } else { __mod_zone_freepage_state(zone, 1 << order, migratetype); } @@ -1471,7 +1471,7 @@ int move_freepages(struct zone *zone, int migratetype) { struct page *page; - unsigned long order; + unsigned int order; int pages_moved = 0; #ifndef CONFIG_HOLES_IN_ZONE @@ -1584,7 +1584,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt) static void steal_suitable_fallback(struct zone *zone, struct page *page, int start_type) { - int current_order = page_order(page); + unsigned int current_order = page_order(page); int pages; /* Take ownership for orders >= pageblock_order */ @@ -2637,7 +2637,7 @@ static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); -void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) +void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...) { unsigned int filter = SHOW_MEM_FILTER_NODES; @@ -2671,7 +2671,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) va_end(args); } - pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n", + pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n", current->comm, order, gfp_mask); dump_stack(); @@ -3449,7 +3449,8 @@ void free_kmem_pages(unsigned long addr, unsigned int order) } } -static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) +static void *make_alloc_exact(unsigned long addr, unsigned int order, + size_t size) { if (addr) { unsigned long alloc_end = addr + (PAGE_SIZE << order); @@ -3499,7 +3500,7 @@ EXPORT_SYMBOL(alloc_pages_exact); */ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) { - unsigned order = get_order(size); + unsigned int order = get_order(size); struct page *p = alloc_pages_node(nid, gfp_mask, order); if (!p) return NULL; @@ -3800,7 +3801,8 @@ void show_free_areas(unsigned int filter) } for_each_populated_zone(zone) { - unsigned long nr[MAX_ORDER], flags, order, total = 0; + unsigned int order; + unsigned long nr[MAX_ORDER], flags, total = 0; unsigned char types[MAX_ORDER]; if (skip_free_areas_node(filter, zone_to_nid(zone))) @@ -4149,7 +4151,7 @@ static void build_zonelists(pg_data_t *pgdat) nodemask_t used_mask; int local_node, prev_node; struct zonelist *zonelist; - int order = current_zonelist_order; + unsigned int order = current_zonelist_order; /* initialize zonelists */ for (i = 0; i < MAX_ZONELISTS; i++) { @@ -6678,7 +6680,8 @@ int alloc_contig_range(unsigned long start, unsigned long end, unsigned migratetype) { unsigned long outer_start, outer_end; - int ret = 0, order; + unsigned int order; + int ret = 0; struct compact_control cc = { .nr_migratepages = 0, -- cgit v1.2.3