diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 19:29:45 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 19:29:45 -0800 |
commit | e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770 (patch) | |
tree | f7ed7753a2e66486a4ffe0fbbf98404ec4ba2212 /mm/page_alloc.c | |
parent | 7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (diff) | |
parent | c45bc55a99957b20e4e0333bcd42e12d1833a7f5 (diff) |
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
- More userfaultfs work from Peter Xu
- Several convert-to-folios series from Sidhartha Kumar and Huang Ying
- Some filemap cleanups from Vishal Moola
- David Hildenbrand added the ability to selftest anon memory COW
handling
- Some cpuset simplifications from Liu Shixin
- Addition of vmalloc tracing support by Uladzislau Rezki
- Some pagecache folioifications and simplifications from Matthew
Wilcox
- A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use
it
- Miguel Ojeda contributed some cleanups for our use of the
__no_sanitize_thread__ gcc keyword.
This series should have been in the non-MM tree, my bad
- Naoya Horiguchi improved the interaction between memory poisoning and
memory section removal for huge pages
- DAMON cleanups and tuneups from SeongJae Park
- Tony Luck fixed the handling of COW faults against poisoned pages
- Peter Xu utilized the PTE marker code for handling swapin errors
- Hugh Dickins reworked compound page mapcount handling, simplifying it
and making it more efficient
- Removal of the autonuma savedwrite infrastructure from Nadav Amit and
David Hildenbrand
- zram support for multiple compression streams from Sergey Senozhatsky
- David Hildenbrand reworked the GUP code's R/O long-term pinning so
that drivers no longer need to use the FOLL_FORCE workaround which
didn't work very well anyway
- Mel Gorman altered the page allocator so that local IRQs can remnain
enabled during per-cpu page allocations
- Vishal Moola removed the try_to_release_page() wrapper
- Stefan Roesch added some per-BDI sysfs tunables which are used to
prevent network block devices from dirtying excessive amounts of
pagecache
- David Hildenbrand did some cleanup and repair work on KSM COW
breaking
- Nhat Pham and Johannes Weiner have implemented writeback in zswap's
zsmalloc backend
- Brian Foster has fixed a longstanding corner-case oddity in
file[map]_write_and_wait_range()
- sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang
Chen
- Shiyang Ruan has done some work on fsdax, to make its reflink mode
work better under xfstests. Better, but still not perfect
- Christoph Hellwig has removed the .writepage() method from several
filesystems. They only need .writepages()
- Yosry Ahmed wrote a series which fixes the memcg reclaim target
beancounting
- David Hildenbrand has fixed some of our MM selftests for 32-bit
machines
- Many singleton patches, as usual
* tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits)
mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio
mm: mmu_gather: allow more than one batch of delayed rmaps
mm: fix typo in struct pglist_data code comment
kmsan: fix memcpy tests
mm: add cond_resched() in swapin_walk_pmd_entry()
mm: do not show fs mm pc for VM_LOCKONFAULT pages
selftests/vm: ksm_functional_tests: fixes for 32bit
selftests/vm: cow: fix compile warning on 32bit
selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions
mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem
mm,thp,rmap: fix races between updates of subpages_mapcount
mm: memcg: fix swapcached stat accounting
mm: add nodes= arg to memory.reclaim
mm: disable top-tier fallback to reclaim on proactive reclaim
selftests: cgroup: make sure reclaim target memcg is unprotected
selftests: cgroup: refactor proactive reclaim code to reclaim_until()
mm: memcg: fix stale protection of reclaim target memcg
mm/mmap: properly unaccount memory on mas_preallocate() failure
omfs: remove ->writepage
jfs: remove ->writepage
...
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 167 |
1 files changed, 78 insertions, 89 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e60657875d3..0745aedebb37 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -170,21 +170,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock); _ret; \ }) -#define pcpu_spin_lock_irqsave(type, member, ptr, flags) \ +#define pcpu_spin_trylock(type, member, ptr) \ ({ \ type *_ret; \ pcpu_task_pin(); \ _ret = this_cpu_ptr(ptr); \ - spin_lock_irqsave(&_ret->member, flags); \ - _ret; \ -}) - -#define pcpu_spin_trylock_irqsave(type, member, ptr, flags) \ -({ \ - type *_ret; \ - pcpu_task_pin(); \ - _ret = this_cpu_ptr(ptr); \ - if (!spin_trylock_irqsave(&_ret->member, flags)) { \ + if (!spin_trylock(&_ret->member)) { \ pcpu_task_unpin(); \ _ret = NULL; \ } \ @@ -197,27 +188,16 @@ static DEFINE_MUTEX(pcp_batch_high_lock); pcpu_task_unpin(); \ }) -#define pcpu_spin_unlock_irqrestore(member, ptr, flags) \ -({ \ - spin_unlock_irqrestore(&ptr->member, flags); \ - pcpu_task_unpin(); \ -}) - /* struct per_cpu_pages specific helpers. */ #define pcp_spin_lock(ptr) \ pcpu_spin_lock(struct per_cpu_pages, lock, ptr) -#define pcp_spin_lock_irqsave(ptr, flags) \ - pcpu_spin_lock_irqsave(struct per_cpu_pages, lock, ptr, flags) - -#define pcp_spin_trylock_irqsave(ptr, flags) \ - pcpu_spin_trylock_irqsave(struct per_cpu_pages, lock, ptr, flags) +#define pcp_spin_trylock(ptr) \ + pcpu_spin_trylock(struct per_cpu_pages, lock, ptr) #define pcp_spin_unlock(ptr) \ pcpu_spin_unlock(lock, ptr) -#define pcp_spin_unlock_irqrestore(ptr, flags) \ - pcpu_spin_unlock_irqrestore(lock, ptr, flags) #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); @@ -798,6 +778,7 @@ static void prep_compound_head(struct page *page, unsigned int order) set_compound_page_dtor(page, COMPOUND_PAGE_DTOR); set_compound_order(page, order); atomic_set(compound_mapcount_ptr(page), -1); + atomic_set(subpages_mapcount_ptr(page), 0); atomic_set(compound_pincount_ptr(page), 0); } @@ -1324,11 +1305,19 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) } switch (page - head_page) { case 1: - /* the first tail page: ->mapping may be compound_mapcount() */ - if (unlikely(compound_mapcount(page))) { + /* the first tail page: these may be in place of ->mapping */ + if (unlikely(head_compound_mapcount(head_page))) { bad_page(page, "nonzero compound_mapcount"); goto out; } + if (unlikely(atomic_read(subpages_mapcount_ptr(head_page)))) { + bad_page(page, "nonzero subpages_mapcount"); + goto out; + } + if (unlikely(head_compound_pincount(head_page))) { + bad_page(page, "nonzero compound_pincount"); + goto out; + } break; case 2: /* @@ -1431,10 +1420,8 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); - if (compound) { - ClearPageDoubleMap(page); + if (compound) ClearPageHasHWPoisoned(page); - } for (i = 1; i < (1 << order); i++) { if (compound) bad += free_tail_pages_check(page, page + i); @@ -1547,6 +1534,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, struct per_cpu_pages *pcp, int pindex) { + unsigned long flags; int min_pindex = 0; int max_pindex = NR_PCP_LISTS - 1; unsigned int order; @@ -1562,8 +1550,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* Ensure requested pindex is drained first. */ pindex = pindex - 1; - /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */ - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); isolated_pageblocks = has_isolate_pageblock(zone); while (count > 0) { @@ -1611,7 +1598,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, } while (count > 0 && !list_empty(list)); } - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); } static void free_one_page(struct zone *zone, @@ -1715,6 +1702,11 @@ static void __free_pages_ok(struct page *page, unsigned int order, if (!free_pages_prepare(page, order, true, fpi_flags)) return; + /* + * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here + * is used to avoid calling get_pfnblock_migratetype() under the lock. + * This will reduce the lock holding time. + */ migratetype = get_pfnblock_migratetype(page, pfn); spin_lock_irqsave(&zone->lock, flags); @@ -3125,10 +3117,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, unsigned int alloc_flags) { + unsigned long flags; int i, allocated = 0; - /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */ - spin_lock(&zone->lock); + spin_lock_irqsave(&zone->lock, flags); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype, alloc_flags); @@ -3162,7 +3154,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages added to the pcp list. */ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); - spin_unlock(&zone->lock); + spin_unlock_irqrestore(&zone->lock, flags); return allocated; } @@ -3179,16 +3171,9 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) { - unsigned long flags; - - /* - * free_pcppages_bulk expects IRQs disabled for zone->lock - * so even though pcp->lock is not intended to be IRQ-safe, - * it's needed in this context. - */ - spin_lock_irqsave(&pcp->lock, flags); + spin_lock(&pcp->lock); free_pcppages_bulk(zone, to_drain, pcp, 0); - spin_unlock_irqrestore(&pcp->lock, flags); + spin_unlock(&pcp->lock); } } #endif @@ -3202,12 +3187,9 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); if (pcp->count) { - unsigned long flags; - - /* See drain_zone_pages on why this is disabling IRQs */ - spin_lock_irqsave(&pcp->lock, flags); + spin_lock(&pcp->lock); free_pcppages_bulk(zone, pcp->count, pcp, 0); - spin_unlock_irqrestore(&pcp->lock, flags); + spin_unlock(&pcp->lock); } } @@ -3473,7 +3455,6 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, */ void free_unref_page(struct page *page, unsigned int order) { - unsigned long flags; unsigned long __maybe_unused UP_flags; struct per_cpu_pages *pcp; struct zone *zone; @@ -3501,10 +3482,10 @@ void free_unref_page(struct page *page, unsigned int order) zone = page_zone(page); pcp_trylock_prepare(UP_flags); - pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (pcp) { free_unref_page_commit(zone, pcp, page, migratetype, order); - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); } else { free_one_page(zone, page, pfn, order, migratetype, FPI_NONE); } @@ -3516,10 +3497,10 @@ void free_unref_page(struct page *page, unsigned int order) */ void free_unref_page_list(struct list_head *list) { + unsigned long __maybe_unused UP_flags; struct page *page, *next; struct per_cpu_pages *pcp = NULL; struct zone *locked_zone = NULL; - unsigned long flags; int batch_count = 0; int migratetype; @@ -3546,39 +3527,54 @@ void free_unref_page_list(struct list_head *list) list_for_each_entry_safe(page, next, list, lru) { struct zone *zone = page_zone(page); - /* Different zone, different pcp lock. */ - if (zone != locked_zone) { - if (pcp) - pcp_spin_unlock_irqrestore(pcp, flags); + list_del(&page->lru); + migratetype = get_pcppage_migratetype(page); + + /* + * Either different zone requiring a different pcp lock or + * excessive lock hold times when freeing a large list of + * pages. + */ + if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) { + if (pcp) { + pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); + } + + batch_count = 0; + /* + * trylock is necessary as pages may be getting freed + * from IRQ or SoftIRQ context after an IO completion. + */ + pcp_trylock_prepare(UP_flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); + if (unlikely(!pcp)) { + pcp_trylock_finish(UP_flags); + free_one_page(zone, page, page_to_pfn(page), + 0, migratetype, FPI_NONE); + locked_zone = NULL; + continue; + } locked_zone = zone; - pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags); } /* * Non-isolated types over MIGRATE_PCPTYPES get added * to the MIGRATE_MOVABLE pcp list. */ - migratetype = get_pcppage_migratetype(page); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) migratetype = MIGRATE_MOVABLE; trace_mm_page_free_batched(page); free_unref_page_commit(zone, pcp, page, migratetype, 0); - - /* - * Guard against excessive IRQ disabled times when we get - * a large list of pages to free. - */ - if (++batch_count == SWAP_CLUSTER_MAX) { - pcp_spin_unlock_irqrestore(pcp, flags); - batch_count = 0; - pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags); - } + batch_count++; } - if (pcp) - pcp_spin_unlock_irqrestore(pcp, flags); + if (pcp) { + pcp_spin_unlock(pcp); + pcp_trylock_finish(UP_flags); + } } /* @@ -3779,15 +3775,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, struct per_cpu_pages *pcp; struct list_head *list; struct page *page; - unsigned long flags; unsigned long __maybe_unused UP_flags; - /* - * spin_trylock may fail due to a parallel drain. In the future, the - * trylock will also protect against IRQ reentrancy. - */ + /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */ pcp_trylock_prepare(UP_flags); - pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (!pcp) { pcp_trylock_finish(UP_flags); return NULL; @@ -3801,7 +3793,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, pcp->free_factor >>= 1; list = &pcp->lists[order_to_pindex(migratetype, order)]; page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list); - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); if (page) { __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); @@ -5372,7 +5364,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, struct page **page_array) { struct page *page; - unsigned long flags; unsigned long __maybe_unused UP_flags; struct zone *zone; struct zoneref *z; @@ -5454,9 +5445,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(!zone)) goto failed; - /* Is a parallel drain in progress? */ + /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */ pcp_trylock_prepare(UP_flags); - pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags); + pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (!pcp) goto failed_irq; @@ -5475,7 +5466,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(!page)) { /* Try and allocate at least one page */ if (!nr_account) { - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); goto failed_irq; } break; @@ -5490,7 +5481,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, nr_populated++; } - pcp_spin_unlock_irqrestore(pcp, flags); + pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account); @@ -6874,13 +6865,11 @@ static void __ref memmap_init_compound(struct page *head, set_page_count(page, 0); /* - * The first tail page stores compound_mapcount_ptr() and - * compound_order() and the second tail page stores - * compound_pincount_ptr(). Call prep_compound_head() after - * the first and second tail pages have been initialized to - * not have the data overwritten. + * The first tail page stores important compound page info. + * Call prep_compound_head() after the first tail page has + * been initialized, to not have the data overwritten. */ - if (pfn == head_pfn + 2) + if (pfn == head_pfn + 1) prep_compound_head(head, order); } } |