summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-05 16:32:45 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-05 16:32:45 -0700
commit6614a3c3164a5df2b54abb0b3559f51041cf705b (patch)
tree1c25c23d9efed988705287fc2ccb78e0e76e311d /include/linux
parent74cae210a335d159f2eb822e261adee905b6951a (diff)
parent360614c01f81f48a89d8b13f8fa69c3ae0a1f5c7 (diff)
Merge tag 'mm-stable-2022-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Most of the MM queue. A few things are still pending. Liam's maple tree rework didn't make it. This has resulted in a few other minor patch series being held over for next time. Multi-gen LRU still isn't merged as we were waiting for mapletree to stabilize. The current plan is to merge MGLRU into -mm soon and to later reintroduce mapletree, with a view to hopefully getting both into 6.1-rc1. Summary: - The usual batches of cleanups from Baoquan He, Muchun Song, Miaohe Lin, Yang Shi, Anshuman Khandual and Mike Rapoport - Some kmemleak fixes from Patrick Wang and Waiman Long - DAMON updates from SeongJae Park - memcg debug/visibility work from Roman Gushchin - vmalloc speedup from Uladzislau Rezki - more folio conversion work from Matthew Wilcox - enhancements for coherent device memory mapping from Alex Sierra - addition of shared pages tracking and CoW support for fsdax, from Shiyang Ruan - hugetlb optimizations from Mike Kravetz - Mel Gorman has contributed some pagealloc changes to improve latency and realtime behaviour. - mprotect soft-dirty checking has been improved by Peter Xu - Many other singleton patches all over the place" [ XFS merge from hell as per Darrick Wong in https://lore.kernel.org/all/YshKnxb4VwXycPO8@magnolia/ ] * tag 'mm-stable-2022-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (282 commits) tools/testing/selftests/vm/hmm-tests.c: fix build mm: Kconfig: fix typo mm: memory-failure: convert to pr_fmt() mm: use is_zone_movable_page() helper hugetlbfs: fix inaccurate comment in hugetlbfs_statfs() hugetlbfs: cleanup some comments in inode.c hugetlbfs: remove unneeded header file hugetlbfs: remove unneeded hugetlbfs_ops forward declaration hugetlbfs: use helper macro SZ_1{K,M} mm: cleanup is_highmem() mm/hmm: add a test for cross device private faults selftests: add soft-dirty into run_vmtests.sh selftests: soft-dirty: add test for mprotect mm/mprotect: fix soft-dirty check in can_change_pte_writable() mm: memcontrol: fix potential oom_lock recursion deadlock mm/gup.c: fix formatting in check_and_migrate_movable_page() xfs: fail dax mount if reflink is enabled on a partition mm/memcontrol.c: remove the redundant updating of stats_flush_threshold userfaultfd: don't fail on unrecognized features hugetlb_cgroup: fix wrong hugetlb cgroup numa stat ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev.h23
-rw-r--r--include/linux/damon.h25
-rw-r--r--include/linux/dax.h56
-rw-r--r--include/linux/fs.h12
-rw-r--r--include/linux/highmem.h23
-rw-r--r--include/linux/hmm.h4
-rw-r--r--include/linux/huge_mm.h94
-rw-r--r--include/linux/hugetlb.h28
-rw-r--r--include/linux/khugepaged.h30
-rw-r--r--include/linux/kmemleak.h8
-rw-r--r--include/linux/memcontrol.h74
-rw-r--r--include/linux/memory_hotplug.h9
-rw-r--r--include/linux/memremap.h35
-rw-r--r--include/linux/migrate.h1
-rw-r--r--include/linux/mm.h133
-rw-r--r--include/linux/mm_types.h7
-rw-r--r--include/linux/mmu_notifier.h2
-rw-r--r--include/linux/mmzone.h159
-rw-r--r--include/linux/page-flags.h23
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--include/linux/pagevec.h1
-rw-r--r--include/linux/pgtable.h28
-rw-r--r--include/linux/rmap.h4
-rw-r--r--include/linux/sched/mm.h4
-rw-r--r--include/linux/shmem_fs.h11
-rw-r--r--include/linux/shrinker.h33
-rw-r--r--include/linux/swap.h12
-rw-r--r--include/linux/swapops.h12
28 files changed, 528 insertions, 325 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index d452071db572..439815cc1ab9 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -140,12 +140,6 @@ static inline bool mapping_can_writeback(struct address_space *mapping)
return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK;
}
-static inline int bdi_sched_wait(void *word)
-{
- schedule();
- return 0;
-}
-
#ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
@@ -236,18 +230,6 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
}
/**
- * inode_to_wb_is_valid - test whether an inode has a wb associated
- * @inode: inode of interest
- *
- * Returns %true if @inode has a wb associated. May be called without any
- * locking.
- */
-static inline bool inode_to_wb_is_valid(struct inode *inode)
-{
- return inode->i_wb;
-}
-
-/**
* inode_to_wb - determine the wb of an inode
* @inode: inode of interest
*
@@ -345,11 +327,6 @@ wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp)
return &bdi->wb;
}
-static inline bool inode_to_wb_is_valid(struct inode *inode)
-{
- return true;
-}
-
static inline struct bdi_writeback *inode_to_wb(struct inode *inode)
{
return &inode_to_bdi(inode)->wb;
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 7c62da31ce4b..7b1f4a488230 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -86,6 +86,8 @@ struct damon_target {
* @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT.
* @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE.
* @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
+ * @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists.
+ * @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists.
* @DAMOS_STAT: Do nothing but count the stat.
* @NR_DAMOS_ACTIONS: Total number of DAMOS actions
*/
@@ -95,6 +97,8 @@ enum damos_action {
DAMOS_PAGEOUT,
DAMOS_HUGEPAGE,
DAMOS_NOHUGEPAGE,
+ DAMOS_LRU_PRIO,
+ DAMOS_LRU_DEPRIO,
DAMOS_STAT, /* Do nothing but only record the stat */
NR_DAMOS_ACTIONS,
};
@@ -397,7 +401,6 @@ struct damon_callback {
* detail.
*
* @kdamond: Kernel thread who does the monitoring.
- * @kdamond_stop: Notifies whether kdamond should stop.
* @kdamond_lock: Mutex for the synchronizations with @kdamond.
*
* For each monitoring context, one kernel thread for the monitoring is
@@ -406,14 +409,14 @@ struct damon_callback {
* Once started, the monitoring thread runs until explicitly required to be
* terminated or every monitoring target is invalid. The validity of the
* targets is checked via the &damon_operations.target_valid of @ops. The
- * termination can also be explicitly requested by writing non-zero to
- * @kdamond_stop. The thread sets @kdamond to NULL when it terminates.
- * Therefore, users can know whether the monitoring is ongoing or terminated by
- * reading @kdamond. Reads and writes to @kdamond and @kdamond_stop from
- * outside of the monitoring thread must be protected by @kdamond_lock.
+ * termination can also be explicitly requested by calling damon_stop().
+ * The thread sets @kdamond to NULL when it terminates. Therefore, users can
+ * know whether the monitoring is ongoing or terminated by reading @kdamond.
+ * Reads and writes to @kdamond from outside of the monitoring thread must
+ * be protected by @kdamond_lock.
*
- * Note that the monitoring thread protects only @kdamond and @kdamond_stop via
- * @kdamond_lock. Accesses to other fields must be protected by themselves.
+ * Note that the monitoring thread protects only @kdamond via @kdamond_lock.
+ * Accesses to other fields must be protected by themselves.
*
* @ops: Set of monitoring operations for given use cases.
* @callback: Set of callbacks for monitoring events notifications.
@@ -526,6 +529,12 @@ bool damon_is_registered_ops(enum damon_ops_id id);
int damon_register_ops(struct damon_operations *ops);
int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id);
+static inline bool damon_target_has_pid(const struct damon_ctx *ctx)
+{
+ return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR;
+}
+
+
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index e7b81634c52a..ba985333e26b 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -43,8 +43,21 @@ struct dax_operations {
void *addr, size_t bytes, struct iov_iter *iter);
};
+struct dax_holder_operations {
+ /*
+ * notify_failure - notify memory failure into inner holder device
+ * @dax_dev: the dax device which contains the holder
+ * @offset: offset on this dax device where memory failure occurs
+ * @len: length of this memory failure event
+ * @flags: action flags for memory failure handler
+ */
+ int (*notify_failure)(struct dax_device *dax_dev, u64 offset,
+ u64 len, int mf_flags);
+};
+
#if IS_ENABLED(CONFIG_DAX)
struct dax_device *alloc_dax(void *private, const struct dax_operations *ops);
+void *dax_holder(struct dax_device *dax_dev);
void put_dax(struct dax_device *dax_dev);
void kill_dax(struct dax_device *dax_dev);
void dax_write_cache(struct dax_device *dax_dev, bool wc);
@@ -66,6 +79,10 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
return dax_synchronous(dax_dev);
}
#else
+static inline void *dax_holder(struct dax_device *dax_dev)
+{
+ return NULL;
+}
static inline struct dax_device *alloc_dax(void *private,
const struct dax_operations *ops)
{
@@ -114,12 +131,9 @@ struct writeback_control;
#if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
void dax_remove_host(struct gendisk *disk);
-struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
- u64 *start_off);
-static inline void fs_put_dax(struct dax_device *dax_dev)
-{
- put_dax(dax_dev);
-}
+struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off,
+ void *holder, const struct dax_holder_operations *ops);
+void fs_put_dax(struct dax_device *dax_dev, void *holder);
#else
static inline int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk)
{
@@ -129,11 +143,12 @@ static inline void dax_remove_host(struct gendisk *disk)
{
}
static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev,
- u64 *start_off)
+ u64 *start_off, void *holder,
+ const struct dax_holder_operations *ops)
{
return NULL;
}
-static inline void fs_put_dax(struct dax_device *dax_dev)
+static inline void fs_put_dax(struct dax_device *dax_dev, void *holder)
{
}
#endif /* CONFIG_BLOCK && CONFIG_FS_DAX */
@@ -146,6 +161,10 @@ struct page *dax_layout_busy_page(struct address_space *mapping);
struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t start, loff_t end);
dax_entry_t dax_lock_page(struct page *page);
void dax_unlock_page(struct page *page, dax_entry_t cookie);
+dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
+ unsigned long index, struct page **page);
+void dax_unlock_mapping_entry(struct address_space *mapping,
+ unsigned long index, dax_entry_t cookie);
#else
static inline struct page *dax_layout_busy_page(struct address_space *mapping)
{
@@ -173,6 +192,17 @@ static inline dax_entry_t dax_lock_page(struct page *page)
static inline void dax_unlock_page(struct page *page, dax_entry_t cookie)
{
}
+
+static inline dax_entry_t dax_lock_mapping_entry(struct address_space *mapping,
+ unsigned long index, struct page **page)
+{
+ return 0;
+}
+
+static inline void dax_unlock_mapping_entry(struct address_space *mapping,
+ unsigned long index, dax_entry_t cookie)
+{
+}
#endif
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
@@ -203,6 +233,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages);
+int dax_holder_notify_failure(struct dax_device *dax_dev, u64 off, u64 len,
+ int mf_flags);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
@@ -214,6 +246,14 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
+int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+ struct inode *dest, loff_t destoff,
+ loff_t len, bool *is_same,
+ const struct iomap_ops *ops);
+int dax_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *len, unsigned int remap_flags,
+ const struct iomap_ops *ops);
static inline bool dax_mapping(struct address_space *mapping)
{
return mapping->host && IS_DAX(mapping->host);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc64873d76c5..a3522bd811f9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -74,6 +74,7 @@ struct fsverity_operations;
struct fs_context;
struct fs_parameter_spec;
struct fileattr;
+struct iomap_ops;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
@@ -2200,10 +2201,13 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags);
-extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- loff_t *count,
- unsigned int remap_flags);
+int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *len, unsigned int remap_flags,
+ const struct iomap_ops *dax_read_ops);
+int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ loff_t *count, unsigned int remap_flags);
extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 56d6a0196534..177b07944640 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -243,6 +243,16 @@ static inline void clear_highpage(struct page *page)
kunmap_local(kaddr);
}
+static inline void clear_highpage_kasan_tagged(struct page *page)
+{
+ u8 tag;
+
+ tag = page_kasan_tag(page);
+ page_kasan_tag_reset(page);
+ clear_highpage(page);
+ page_kasan_tag_set(page, tag);
+}
+
#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
static inline void tag_clear_highpage(struct page *page)
@@ -336,19 +346,6 @@ static inline void memcpy_page(struct page *dst_page, size_t dst_off,
kunmap_local(dst);
}
-static inline void memmove_page(struct page *dst_page, size_t dst_off,
- struct page *src_page, size_t src_off,
- size_t len)
-{
- char *dst = kmap_local_page(dst_page);
- char *src = kmap_local_page(src_page);
-
- VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
- memmove(dst + dst_off, src + src_off, len);
- kunmap_local(src);
- kunmap_local(dst);
-}
-
static inline void memset_page(struct page *page, size_t offset, int val,
size_t len)
{
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index d5a6f101f843..126a36571667 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -4,7 +4,7 @@
*
* Authors: Jérôme Glisse <jglisse@redhat.com>
*
- * See Documentation/vm/hmm.rst for reasons and overview of what HMM is.
+ * See Documentation/mm/hmm.rst for reasons and overview of what HMM is.
*/
#ifndef LINUX_HMM_H
#define LINUX_HMM_H
@@ -100,7 +100,7 @@ struct hmm_range {
};
/*
- * Please see Documentation/vm/hmm.rst for how to use the range API.
+ * Please see Documentation/mm/hmm.rst for how to use the range API.
*/
int hmm_range_fault(struct hmm_range *range);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 4ddaf6ad73ef..768e5261fdae 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -116,9 +116,30 @@ extern struct kobj_attribute shmem_enabled_attr;
extern unsigned long transparent_hugepage_flags;
+#define hugepage_flags_enabled() \
+ (transparent_hugepage_flags & \
+ ((1<<TRANSPARENT_HUGEPAGE_FLAG) | \
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)))
+#define hugepage_flags_always() \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_FLAG))
+
+/*
+ * Do the below checks:
+ * - For file vma, check if the linear page offset of vma is
+ * HPAGE_PMD_NR aligned within the file. The hugepage is
+ * guaranteed to be hugepage-aligned within the file, but we must
+ * check that the PMD-aligned addresses in the VMA map to
+ * PMD-aligned offsets within the file, else the hugepage will
+ * not be PMD-mappable.
+ * - For all vmas, check if the haddr is in an aligned HPAGE_PMD_SIZE
+ * area.
+ */
static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
- unsigned long haddr)
+ unsigned long addr)
{
+ unsigned long haddr;
+
/* Don't have to check pgoff for anonymous vma */
if (!vma_is_anonymous(vma)) {
if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
@@ -126,53 +147,13 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
return false;
}
- if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
- return false;
- return true;
-}
+ haddr = addr & HPAGE_PMD_MASK;
-static inline bool transhuge_vma_enabled(struct vm_area_struct *vma,
- unsigned long vm_flags)
-{
- /* Explicitly disabled through madvise. */
- if ((vm_flags & VM_NOHUGEPAGE) ||
- test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+ if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
return false;
return true;
}
-/*
- * to be used on vmas which are known to support THP.
- * Use transparent_hugepage_active otherwise
- */
-static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
-{
-
- /*
- * If the hardware/firmware marked hugepage support disabled.
- */
- if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX))
- return false;
-
- if (!transhuge_vma_enabled(vma, vma->vm_flags))
- return false;
-
- if (vma_is_temporary_stack(vma))
- return false;
-
- if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
- return true;
-
- if (vma_is_dax(vma))
- return true;
-
- if (transparent_hugepage_flags &
- (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
- return !!(vma->vm_flags & VM_HUGEPAGE);
-
- return false;
-}
-
static inline bool file_thp_enabled(struct vm_area_struct *vma)
{
struct inode *inode;
@@ -187,7 +168,9 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma)
!inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);
}
-bool transparent_hugepage_active(struct vm_area_struct *vma);
+bool hugepage_vma_check(struct vm_area_struct *vma,
+ unsigned long vm_flags,
+ bool smaps, bool in_pf);
#define transparent_hugepage_use_zero_page() \
(transparent_hugepage_flags & \
@@ -290,7 +273,7 @@ static inline bool is_huge_zero_page(struct page *page)
static inline bool is_huge_zero_pmd(pmd_t pmd)
{
- return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
+ return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
}
static inline bool is_huge_zero_pud(pud_t pud)
@@ -311,8 +294,8 @@ static inline bool thp_migration_supported(void)
static inline struct list_head *page_deferred_list(struct page *page)
{
/*
- * Global or memcg deferred list in the second tail pages is
- * occupied by compound_head.
+ * See organization of tail pages of compound page in
+ * "struct page" definition.
*/
return &page[2].deferred_list;
}
@@ -331,24 +314,15 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
return false;
}
-static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
-{
- return false;
-}
-
-static inline bool transparent_hugepage_active(struct vm_area_struct *vma)
-{
- return false;
-}
-
static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
- unsigned long haddr)
+ unsigned long addr)
{
return false;
}
-static inline bool transhuge_vma_enabled(struct vm_area_struct *vma,
- unsigned long vm_flags)
+static inline bool hugepage_vma_check(struct vm_area_struct *vma,
+ unsigned long vm_flags,
+ bool smaps, bool in_pf)
{
return false;
}
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e4cff27d1198..4cdfce976644 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -152,7 +152,7 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
struct page *ref_page, zap_flags_t zap_flags);
void hugetlb_report_meminfo(struct seq_file *);
int hugetlb_report_node_meminfo(char *buf, int len, int nid);
-void hugetlb_show_meminfo(void);
+void hugetlb_show_meminfo_node(int nid);
unsigned long hugetlb_total_pages(void);
vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
@@ -170,7 +170,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
vm_flags_t vm_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
-bool isolate_huge_page(struct page *page, struct list_head *list);
+int isolate_hugetlb(struct page *page, struct list_head *list);
int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
void putback_active_hugepage(struct page *page);
@@ -194,8 +194,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz);
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
+unsigned long hugetlb_mask_last_page(struct hstate *h);
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long *addr, pte_t *ptep);
+ unsigned long addr, pte_t *ptep);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
@@ -242,7 +243,7 @@ static inline struct address_space *hugetlb_page_mapping_lock_write(
static inline int huge_pmd_unshare(struct mm_struct *mm,
struct vm_area_struct *vma,
- unsigned long *addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep)
{
return 0;
}
@@ -297,7 +298,7 @@ static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid)
return 0;
}
-static inline void hugetlb_show_meminfo(void)
+static inline void hugetlb_show_meminfo_node(int nid)
{
}
@@ -376,9 +377,9 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
return NULL;
}
-static inline bool isolate_huge_page(struct page *page, struct list_head *list)
+static inline int isolate_hugetlb(struct page *page, struct list_head *list)
{
- return false;
+ return -EBUSY;
}
static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
@@ -903,14 +904,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
atomic_long_sub(l, &mm->hugetlb_usage);
}
-#ifndef set_huge_swap_pte_at
-static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte, unsigned long sz)
-{
- set_huge_pte_at(mm, addr, ptep, pte);
-}
-#endif
-
#ifndef huge_ptep_modify_prot_start
#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
@@ -1094,11 +1087,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
{
}
-static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte, unsigned long sz)
-{
-}
-
static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 392d34c3c59a..384f034ae947 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -10,8 +10,6 @@ extern struct attribute_group khugepaged_attr_group;
extern int khugepaged_init(void);
extern void khugepaged_destroy(void);
extern int start_stop_khugepaged(void);
-extern bool hugepage_vma_check(struct vm_area_struct *vma,
- unsigned long vm_flags);
extern void __khugepaged_enter(struct mm_struct *mm);
extern void __khugepaged_exit(struct mm_struct *mm);
extern void khugepaged_enter_vma(struct vm_area_struct *vma,
@@ -26,20 +24,6 @@ static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
}
#endif
-#define khugepaged_enabled() \
- (transparent_hugepage_flags & \
- ((1<<TRANSPARENT_HUGEPAGE_FLAG) | \
- (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)))
-#define khugepaged_always() \
- (transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_FLAG))
-#define khugepaged_req_madv() \
- (transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
-#define khugepaged_defrag() \
- (transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG))
-
static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags))
@@ -51,16 +35,6 @@ static inline void khugepaged_exit(struct mm_struct *mm)
if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
__khugepaged_exit(mm);
}
-
-static inline void khugepaged_enter(struct vm_area_struct *vma,
- unsigned long vm_flags)
-{
- if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
- khugepaged_enabled()) {
- if (hugepage_vma_check(vma, vm_flags))
- __khugepaged_enter(vma->vm_mm);
- }
-}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
@@ -68,10 +42,6 @@ static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm
static inline void khugepaged_exit(struct mm_struct *mm)
{
}
-static inline void khugepaged_enter(struct vm_area_struct *vma,
- unsigned long vm_flags)
-{
-}
static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
unsigned long vm_flags)
{
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 34684b2026ab..6a3cd1bf4680 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -29,10 +29,9 @@ extern void kmemleak_not_leak(const void *ptr) __ref;
extern void kmemleak_ignore(const void *ptr) __ref;
extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
extern void kmemleak_no_scan(const void *ptr) __ref;
-extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
+extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size,
gfp_t gfp) __ref;
extern void kmemleak_free_part_phys(phys_addr_t phys, size_t size) __ref;
-extern void kmemleak_not_leak_phys(phys_addr_t phys) __ref;
extern void kmemleak_ignore_phys(phys_addr_t phys) __ref;
static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
@@ -107,15 +106,12 @@ static inline void kmemleak_no_scan(const void *ptr)
{
}
static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size,
- int min_count, gfp_t gfp)
+ gfp_t gfp)
{
}
static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size)
{
}
-static inline void kmemleak_not_leak_phys(phys_addr_t phys)
-{
-}
static inline void kmemleak_ignore_phys(phys_addr_t phys)
{
}
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9ecead1042b9..4d31ce55b1c0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -837,6 +837,15 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
}
struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
+#ifdef CONFIG_SHRINKER_DEBUG
+static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg)
+{
+ return memcg ? cgroup_ino(memcg->css.cgroup) : 0;
+}
+
+struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino);
+#endif
+
static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
{
return mem_cgroup_from_css(seq_css(m));
@@ -1343,6 +1352,18 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
return NULL;
}
+#ifdef CONFIG_SHRINKER_DEBUG
+static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
+static inline struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino)
+{
+ return NULL;
+}
+#endif
+
static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
{
return NULL;
@@ -1740,6 +1761,7 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg)
}
struct mem_cgroup *mem_cgroup_from_obj(void *p);
+struct mem_cgroup *mem_cgroup_from_slab_obj(void *p);
static inline void count_objcg_event(struct obj_cgroup *objcg,
enum vm_event_item idx)
@@ -1755,6 +1777,42 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
rcu_read_unlock();
}
+/**
+ * get_mem_cgroup_from_obj - get a memcg associated with passed kernel object.
+ * @p: pointer to object from which memcg should be extracted. It can be NULL.
+ *
+ * Retrieves the memory group into which the memory of the pointed kernel
+ * object is accounted. If memcg is found, its reference is taken.
+ * If a passed kernel object is uncharged, or if proper memcg cannot be found,
+ * as well as if mem_cgroup is disabled, NULL is returned.
+ *
+ * Return: valid memcg pointer with taken reference or NULL.
+ */
+static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p)
+{
+ struct mem_cgroup *memcg;
+
+ rcu_read_lock();
+ do {
+ memcg = mem_cgroup_from_obj(p);
+ } while (memcg && !css_tryget(&memcg->css));
+ rcu_read_unlock();
+ return memcg;
+}
+
+/**
+ * mem_cgroup_or_root - always returns a pointer to a valid memory cgroup.
+ * @memcg: pointer to a valid memory cgroup or NULL.
+ *
+ * If passed argument is not NULL, returns it without any additional checks
+ * and changes. Otherwise, root_mem_cgroup is returned.
+ *
+ * NOTE: root_mem_cgroup can be NULL during early boot.
+ */
+static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg)
+{
+ return memcg ? memcg : root_mem_cgroup;
+}
#else
static inline bool mem_cgroup_kmem_disabled(void)
{
@@ -1798,7 +1856,12 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg)
static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
{
- return NULL;
+ return NULL;
+}
+
+static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
+{
+ return NULL;
}
static inline void count_objcg_event(struct obj_cgroup *objcg,
@@ -1806,6 +1869,15 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
{
}
+static inline struct mem_cgroup *get_mem_cgroup_from_obj(void *p)
+{
+ return NULL;
+}
+
+static inline struct mem_cgroup *mem_cgroup_or_root(struct mem_cgroup *memcg)
+{
+ return NULL;
+}
#endif /* CONFIG_MEMCG_KMEM */
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 20d7edf62a6a..e0b2209ab71c 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -351,13 +351,4 @@ void arch_remove_linear_mapping(u64 start, u64 size);
extern bool mhp_supports_memmap_on_memory(unsigned long size);
#endif /* CONFIG_MEMORY_HOTPLUG */
-#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
-bool mhp_memmap_on_memory(void);
-#else
-static inline bool mhp_memmap_on_memory(void)
-{
- return false;
-}
-#endif
-
#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 8af304f6b504..19010491a603 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -2,7 +2,7 @@
#ifndef _LINUX_MEMREMAP_H_
#define _LINUX_MEMREMAP_H_
-#include <linux/mm.h>
+#include <linux/mmzone.h>
#include <linux/range.h>
#include <linux/ioport.h>
#include <linux/percpu-refcount.h>
@@ -39,7 +39,14 @@ struct vmem_altmap {
* must be treated as an opaque object, rather than a "normal" struct page.
*
* A more complete discussion of unaddressable memory may be found in
- * include/linux/hmm.h and Documentation/vm/hmm.rst.
+ * include/linux/hmm.h and Documentation/mm/hmm.rst.
+ *
+ * MEMORY_DEVICE_COHERENT:
+ * Device memory that is cache coherent from device and CPU point of view. This
+ * is used on platforms that have an advanced system bus (like CAPI or CXL). A
+ * driver can hotplug the device memory using ZONE_DEVICE and with that memory
+ * type. Any page of a process can be migrated to such memory. However no one
+ * should be allowed to pin such memory so that it can always be evicted.
*
* MEMORY_DEVICE_FS_DAX:
* Host memory that has similar access semantics as System RAM i.e. DMA
@@ -61,6 +68,7 @@ struct vmem_altmap {
enum memory_type {
/* 0 is reserved to catch uninitialized type fields */
MEMORY_DEVICE_PRIVATE = 1,
+ MEMORY_DEVICE_COHERENT,
MEMORY_DEVICE_FS_DAX,
MEMORY_DEVICE_GENERIC,
MEMORY_DEVICE_PCI_P2PDMA,
@@ -79,6 +87,18 @@ struct dev_pagemap_ops {
* the page back to a CPU accessible page.
*/
vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf);
+
+ /*
+ * Handle the memory failure happens on a range of pfns. Notify the
+ * processes who are using these pfns, and try to recover the data on
+ * them if necessary. The mf_flags is finally passed to the recover
+ * function through the whole notify routine.
+ *
+ * When this is not implemented, or it returns -EOPNOTSUPP, the caller
+ * will fall back to a common handler called mf_generic_kill_procs().
+ */
+ int (*memory_failure)(struct dev_pagemap *pgmap, unsigned long pfn,
+ unsigned long nr_pages, int mf_flags);
};
#define PGMAP_ALTMAP_VALID (1 << 0)
@@ -150,6 +170,17 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
}
+static inline bool is_device_coherent_page(const struct page *page)
+{
+ return is_zone_device_page(page) &&
+ page->pgmap->type == MEMORY_DEVICE_COHERENT;
+}
+
+static inline bool folio_is_device_coherent(const struct folio *folio)
+{
+ return is_device_coherent_page(&folio->page);
+}
+
#ifdef CONFIG_ZONE_DEVICE
void *memremap_pages(struct dev_pagemap *pgmap, int nid);
void memunmap_pages(struct dev_pagemap *pgmap);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ae5bb67a9ba1..22c0a0cf5e0c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -182,6 +182,7 @@ static inline unsigned long migrate_pfn(unsigned long pfn)
enum migrate_vma_direction {
MIGRATE_VMA_SELECT_SYSTEM = 1 << 0,
MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1,
+ MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2,
};
struct migrate_vma {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a37b8c062daa..18e01474cf6b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -28,6 +28,7 @@
#include <linux/sched.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
+#include <linux/memremap.h>
struct mempolicy;
struct anon_vma;
@@ -427,7 +428,6 @@ extern unsigned int kobjsize(const void *objp);
* mapping from the currently active vm_flags protection bits (the
* low four bits) to a page protection mask..
*/
-extern pgprot_t protection_map[16];
/*
* The default fault flags that should be used by most of the
@@ -858,7 +858,7 @@ static inline struct folio *virt_to_folio(const void *x)
return page_folio(page);
}
-void __put_page(struct page *page);
+void __folio_put(struct folio *folio);
void put_pages_list(struct list_head *pages);
@@ -895,11 +895,7 @@ static inline void set_compound_page_dtor(struct page *page,
page[1].compound_dtor = compound_dtor;
}
-static inline void destroy_compound_page(struct page *page)
-{
- VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page);
- compound_page_dtors[page[1].compound_dtor](page);
-}
+void destroy_large_folio(struct folio *folio);
static inline int head_compound_pincount(struct page *head)
{
@@ -1052,84 +1048,6 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
* back into memory.
*/
-/*
- * The zone field is never updated after free_area_init_core()
- * sets it, so none of the operations on it need to be atomic.
- */
-
-/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */
-#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
-#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH)
-#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
-#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
-#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
-
-/*
- * Define the bit shifts to access each section. For non-existent
- * sections we define the shift as 0; that plus a 0 mask ensures
- * the compiler will optimise away reference to them.
- */
-#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
-#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0))
-#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0))
-#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))
-#define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0))
-
-/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
-#ifdef NODE_NOT_IN_PAGE_FLAGS
-#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT)
-#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \
- SECTIONS_PGOFF : ZONES_PGOFF)
-#else
-#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT)
-#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF)? \
- NODES_PGOFF : ZONES_PGOFF)
-#endif
-
-#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0))
-
-#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)
-#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
-#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
-#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1)
-#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1)
-#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)
-
-static inline enum zone_type page_zonenum(const struct page *page)
-{
- ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT);
- return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
-}
-
-static inline enum zone_type folio_zonenum(const struct folio *folio)
-{
- return page_zonenum(&folio->page);
-}
-
-#ifdef CONFIG_ZONE_DEVICE
-static inline bool is_zone_device_page(const struct page *page)
-{
- return page_zonenum(page) == ZONE_DEVICE;
-}
-extern void memmap_init_zone_device(struct zone *, unsigned long,
- unsigned long, struct dev_pagemap *);
-#else
-static inline bool is_zone_device_page(const struct page *page)
-{
- return false;
-}
-#endif
-
-static inline bool folio_is_zone_device(const struct folio *folio)
-{
- return is_zone_device_page(&folio->page);
-}
-
-static inline bool is_zone_movable_page(const struct page *page)
-{
- return page_zonenum(page) == ZONE_MOVABLE;
-}
-
#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
@@ -1204,7 +1122,7 @@ static inline __must_check bool try_get_page(struct page *page)
static inline void folio_put(struct folio *folio)
{
if (folio_put_testzero(folio))
- __put_page(&folio->page);
+ __folio_put(folio);
}
/**
@@ -1224,7 +1142,26 @@ static inline void folio_put(struct folio *folio)
static inline void folio_put_refs(struct folio *folio, int refs)
{
if (folio_ref_sub_and_test(folio, refs))
- __put_page(&folio->page);
+ __folio_put(folio);
+}
+
+void release_pages(struct page **pages, int nr);
+
+/**
+ * folios_put - Decrement the reference count on an array of folios.
+ * @folios: The folios.
+ * @nr: How many folios there are.
+ *
+ * Like folio_put(), but for an array of folios. This is more efficient
+ * than writing the loop yourself as it will optimise the locks which
+ * need to be taken if the folios are freed.
+ *
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context. May be called while holding a spinlock.
+ */
+static inline void folios_put(struct folio **folios, unsigned int nr)
+{
+ release_pages((struct page **)folios, nr);
}
static inline void put_page(struct page *page)
@@ -1599,7 +1536,7 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
#ifdef CONFIG_MIGRATION
-static inline bool is_pinnable_page(struct page *page)
+static inline bool is_longterm_pinnable_page(struct page *page)
{
#ifdef CONFIG_CMA
int mt = get_pageblock_migratetype(page);
@@ -1607,18 +1544,20 @@ static inline bool is_pinnable_page(struct page *page)
if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
#endif
- return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page));
+ return !(is_device_coherent_page(page) ||
+ is_zone_movable_page(page) ||
+ is_zero_pfn(page_to_pfn(page)));
}
#else
-static inline bool is_pinnable_page(struct page *page)
+static inline bool is_longterm_pinnable_page(struct page *page)
{
return true;
}
#endif
-static inline bool folio_is_pinnable(struct folio *folio)
+static inline bool folio_is_longterm_pinnable(struct folio *folio)
{
- return is_pinnable_page(&folio->page);
+ return is_longterm_pinnable_page(&folio->page);
}
static inline void set_page_zone(struct page *page, enum zone_type zone)
@@ -1969,8 +1908,12 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
* for now all the callers are only use one of the flags at the same
* time.
*/
-/* Whether we should allow dirty bit accounting */
-#define MM_CP_DIRTY_ACCT (1UL << 0)
+/*
+ * Whether we should manually check if we can map individual PTEs writable,
+ * because something (e.g., COW, uffd-wp) blocks that from happening for all
+ * PTEs automatically in a writable mapping.
+ */
+#define MM_CP_TRY_CHANGE_WRITABLE (1UL << 0)
/* Whether this protection change is for NUMA hints */
#define MM_CP_PROT_NUMA (1UL << 1)
/* Whether this change is for write protecting */
@@ -3241,6 +3184,8 @@ enum mf_flags {
MF_UNPOISON = 1 << 4,
MF_SW_SIMULATED = 1 << 5,
};
+int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
+ unsigned long count, int mf_flags);
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c29ab4c0cd5c..cf97f3884fda 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -87,6 +87,7 @@ struct page {
*/
union {
struct list_head lru;
+
/* Or, for the Unevictable "LRU list" slot */
struct {
/* Always even, to negate PageTail */
@@ -94,6 +95,10 @@ struct page {
/* Count page's or folio's mlocks */
unsigned int mlock_count;
};
+
+ /* Or, free page */
+ struct list_head buddy_list;
+ struct list_head pcp_list;
};
/* See page-flags.h for PAGE_MAPPING_FLAGS */
struct address_space *mapping;
@@ -729,6 +734,7 @@ typedef __bitwise unsigned int vm_fault_t;
* @VM_FAULT_NEEDDSYNC: ->fault did not modify page tables and needs
* fsync() to complete (for synchronous page faults
* in DAX)
+ * @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released
* @VM_FAULT_HINDEX_MASK: mask HINDEX value
*
*/
@@ -746,6 +752,7 @@ enum vm_fault_reason {
VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800,
VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000,
VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000,
+ VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000,
VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000,
};
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 45fc2c81e370..d6c06e140277 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -198,7 +198,7 @@ struct mmu_notifier_ops {
* invalidate_range_start()/end() notifiers, as
* invalidate_range() already catches the points in time when an
* external TLB range needs to be flushed. For more in depth
- * discussion on this see Documentation/vm/mmu_notifier.rst
+ * discussion on this see Documentation/mm/mmu_notifier.rst
*
* Note that this function might be called with just a sub-range
* of what was passed to invalidate_range_start()/end(), if
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index aab70355d64f..e24b40c52468 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -355,15 +355,18 @@ enum zone_watermarks {
};
/*
- * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional
- * for pageblock size for THP if configured.
+ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list
+ * for THP which will usually be GFP_MOVABLE. Even if it is another type,
+ * it should not contribute to serious fragmentation causing THP allocation
+ * failures.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define NR_PCP_THP 1
#else
#define NR_PCP_THP 0
#endif
-#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP))
+#define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1))
+#define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP)
/*
* Shift to encode migratetype and order in the same integer, with order
@@ -379,6 +382,7 @@ enum zone_watermarks {
/* Fields and list protected by pagesets local_lock in page_alloc.c */
struct per_cpu_pages {
+ spinlock_t lock; /* Protects lists field */
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
int batch; /* chunk size for buddy add/remove */
@@ -389,7 +393,7 @@ struct per_cpu_pages {
/* Lists of pages, one per migrate type stored on the pcp-lists */
struct list_head lists[NR_PCP_LISTS];
-};
+} ____cacheline_aligned_in_smp;
struct per_cpu_zonestat {
#ifdef CONFIG_SMP
@@ -591,8 +595,8 @@ struct zone {
* give them a chance of being in the same cacheline.
*
* Write access to present_pages at runtime should be protected by
- * mem_hotplug_begin/end(). Any reader who can't tolerant drift of
- * present_pages should get_online_mems() to get a stable value.
+ * mem_hotplug_begin/done(). Any reader who can't tolerant drift of
+ * present_pages should use get_online_mems() to get a stable value.
*/
atomic_long_t managed_pages;
unsigned long spanned_pages;
@@ -730,6 +734,86 @@ static inline bool zone_is_empty(struct zone *zone)
return zone->spanned_pages == 0;
}
+#ifndef BUILD_VDSO32_64
+/*
+ * The zone field is never updated after free_area_init_core()
+ * sets it, so none of the operations on it need to be atomic.
+ */
+
+/* Page flags: | [SECTION] | [NODE] | ZONE | [LAST_CPUPID] | ... | FLAGS | */
+#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
+#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH)
+#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
+#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
+#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
+
+/*
+ * Define the bit shifts to access each section. For non-existent
+ * sections we define the shift as 0; that plus a 0 mask ensures
+ * the compiler will optimise away reference to them.
+ */
+#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
+#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0))
+#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0))
+#define LAST_CPUPID_PGSHIFT (LAST_CPUPID_PGOFF * (LAST_CPUPID_WIDTH != 0))
+#define KASAN_TAG_PGSHIFT (KASAN_TAG_PGOFF * (KASAN_TAG_WIDTH != 0))
+
+/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */
+#ifdef NODE_NOT_IN_PAGE_FLAGS
+#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT)
+#define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF) ? \
+ SECTIONS_PGOFF : ZONES_PGOFF)
+#else
+#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT)
+#define ZONEID_PGOFF ((NODES_PGOFF < ZONES_PGOFF) ? \
+ NODES_PGOFF : ZONES_PGOFF)
+#endif
+
+#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0))
+
+#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)
+#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
+#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
+#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1)
+#define KASAN_TAG_MASK ((1UL << KASAN_TAG_WIDTH) - 1)
+#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1)
+
+static inline enum zone_type page_zonenum(const struct page *page)
+{
+ ASSERT_EXCLUSIVE_BITS(page->flags, ZONES_MASK << ZONES_PGSHIFT);
+ return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
+}
+
+static inline enum zone_type folio_zonenum(const struct folio *folio)
+{
+ return page_zonenum(&folio->page);
+}
+
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool is_zone_device_page(const struct page *page)
+{
+ return page_zonenum(page) == ZONE_DEVICE;
+}
+extern void memmap_init_zone_device(struct zone *, unsigned long,
+ unsigned long, struct dev_pagemap *);
+#else
+static inline bool is_zone_device_page(const struct page *page)
+{
+ return false;
+}
+#endif
+
+static inline bool folio_is_zone_device(const struct folio *folio)
+{
+ return is_zone_device_page(&folio->page);
+}
+
+static inline bool is_zone_movable_page(const struct page *page)
+{
+ return page_zonenum(page) == ZONE_MOVABLE;
+}
+#endif
+
/*
* Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty
* intersection with the given zone
@@ -870,7 +954,7 @@ typedef struct pglist_data {
unsigned long nr_reclaim_start; /* nr pages written while throttled
* when throttling started. */
struct task_struct *kswapd; /* Protected by
- mem_hotplug_begin/end() */
+ mem_hotplug_begin/done() */
int kswapd_order;
enum zone_type kswapd_highest_zoneidx;
@@ -1053,15 +1137,6 @@ static inline int is_highmem_idx(enum zone_type idx)
#endif
}
-#ifdef CONFIG_ZONE_DMA
-bool has_managed_dma(void);
-#else
-static inline bool has_managed_dma(void)
-{
- return false;
-}
-#endif
-
/**
* is_highmem - helper function to quickly check if a struct zone is a
* highmem zone or not. This is an attempt to keep references
@@ -1071,12 +1146,17 @@ static inline bool has_managed_dma(void)
*/
static inline int is_highmem(struct zone *zone)
{
-#ifdef CONFIG_HIGHMEM
return is_highmem_idx(zone_idx(zone));
+}
+
+#ifdef CONFIG_ZONE_DMA
+bool has_managed_dma(void);
#else
- return 0;
-#endif
+static inline bool has_managed_dma(void)
+{
+ return false;
}
+#endif
/* These two functions are used to setup the per zone pages min values */
struct ctl_table;
@@ -1418,16 +1498,32 @@ extern size_t mem_section_usage_size(void);
* (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
* worst combination is powerpc with 256k pages,
* which results in PFN_SECTION_SHIFT equal 6.
- * To sum it up, at least 6 bits are available.
+ * To sum it up, at least 6 bits are available on all architectures.
+ * However, we can exceed 6 bits on some other architectures except
+ * powerpc (e.g. 15 bits are available on x86_64, 13 bits are available
+ * with the worst case of 64K pages on arm64) if we make sure the
+ * exceeded bit is not applicable to powerpc.
*/
-#define SECTION_MARKED_PRESENT (1UL<<0)
-#define SECTION_HAS_MEM_MAP (1UL<<1)
-#define SECTION_IS_ONLINE (1UL<<2)
-#define SECTION_IS_EARLY (1UL<<3)
-#define SECTION_TAINT_ZONE_DEVICE (1UL<<4)
-#define SECTION_MAP_LAST_BIT (1UL<<5)
-#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
-#define SECTION_NID_SHIFT 6
+enum {
+ SECTION_MARKED_PRESENT_BIT,
+ SECTION_HAS_MEM_MAP_BIT,
+ SECTION_IS_ONLINE_BIT,
+ SECTION_IS_EARLY_BIT,
+#ifdef CONFIG_ZONE_DEVICE
+ SECTION_TAINT_ZONE_DEVICE_BIT,
+#endif
+ SECTION_MAP_LAST_BIT,
+};
+
+#define SECTION_MARKED_PRESENT BIT(SECTION_MARKED_PRESENT_BIT)
+#define SECTION_HAS_MEM_MAP BIT(SECTION_HAS_MEM_MAP_BIT)
+#define SECTION_IS_ONLINE BIT(SECTION_IS_ONLINE_BIT)
+#define SECTION_IS_EARLY BIT(SECTION_IS_EARLY_BIT)
+#ifdef CONFIG_ZONE_DEVICE
+#define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT)
+#endif
+#define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1))
+#define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT
static inline struct page *__section_mem_map_addr(struct mem_section *section)
{
@@ -1466,12 +1562,19 @@ static inline int online_section(struct mem_section *section)
return (section && (section->section_mem_map & SECTION_IS_ONLINE));
}
+#ifdef CONFIG_ZONE_DEVICE
static inline int online_device_section(struct mem_section *section)
{
unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;
return section && ((section->section_mem_map & flags) == flags);
}
+#else
+static inline int online_device_section(struct mem_section *section)
+{
+ return 0;
+}
+#endif
static inline int online_section_nr(unsigned long nr)
{
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3f5490f6f038..ea19528564d1 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -193,6 +193,11 @@ enum pageflags {
/* Only valid for buddy pages. Used to track pages that are reported */
PG_reported = PG_uptodate,
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /* For self-hosted memmap pages */
+ PG_vmemmap_self_hosted = PG_owner_priv_1,
+#endif
};
#define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1)
@@ -628,6 +633,12 @@ PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison)
*/
__PAGEFLAG(Reported, reported, PF_NO_COMPOUND)
+#ifdef CONFIG_MEMORY_HOTPLUG
+PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY)
+#else
+PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
+#endif
+
/*
* On an anonymous page mapped into a user virtual memory area,
* page->mapping points to its anon_vma, not to a struct address_space;
@@ -650,6 +661,12 @@ __PAGEFLAG(Reported, reported, PF_NO_COMPOUND)
#define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
+/*
+ * Different with flags above, this flag is used only for fsdax mode. It
+ * indicates that this page->mapping is now under reflink case.
+ */
+#define PAGE_MAPPING_DAX_COW 0x1
+
static __always_inline bool folio_mapping_flags(struct folio *folio)
{
return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0;
@@ -670,6 +687,12 @@ static __always_inline bool PageAnon(struct page *page)
return folio_test_anon(page_folio(page));
}
+static __always_inline bool __folio_test_movable(const struct folio *folio)
+{
+ return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
+ PAGE_MAPPING_MOVABLE;
+}
+
static __always_inline int __PageMovable(struct page *page)
{
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cc9adbaddb59..0178b2040ea3 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -345,8 +345,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
#endif
}
-void release_pages(struct page **pages, int nr);
-
struct address_space *page_mapping(struct page *);
struct address_space *folio_mapping(struct folio *);
struct address_space *swapcache_mapping(struct folio *);
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 6649154a2115..215eb6c3bdc9 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -26,7 +26,6 @@ struct pagevec {
};
void __pagevec_release(struct pagevec *pvec);
-void __pagevec_lru_add(struct pagevec *pvec);
unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
xa_mark_t tag);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 3cdc16cfd867..014ee8f0fbaa 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1689,4 +1689,32 @@ typedef unsigned int pgtbl_mod_mask;
#define MAX_PTRS_PER_P4D PTRS_PER_P4D
#endif
+/* description of effects of mapping type and prot in current implementation.
+ * this is due to the limited x86 page protection hardware. The expected
+ * behavior is in parens:
+ *
+ * map_type prot
+ * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC
+ * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes
+ * w: (no) no w: (no) no w: (yes) yes w: (no) no
+ * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
+ *
+ * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
+ * w: (no) no w: (no) no w: (copy) copy w: (no) no
+ * x: (no) no x: (no) yes x: (no) yes x: (yes) yes
+ *
+ * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
+ * MAP_PRIVATE (with Enhanced PAN supported):
+ * r: (no) no
+ * w: (no) no
+ * x: (yes) yes
+ */
+#define DECLARE_VM_GET_PAGE_PROT \
+pgprot_t vm_get_page_prot(unsigned long vm_flags) \
+{ \
+ return protection_map[vm_flags & \
+ (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)]; \
+} \
+EXPORT_SYMBOL(vm_get_page_prot);
+
#endif /* _LINUX_PGTABLE_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 9ec23138e410..bf80adca980b 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -325,8 +325,8 @@ struct page_vma_mapped_walk {
#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \
struct page_vma_mapped_walk name = { \
.pfn = page_to_pfn(_page), \
- .nr_pages = compound_nr(page), \
- .pgoff = page_to_pgoff(page), \
+ .nr_pages = compound_nr(_page), \
+ .pgoff = page_to_pgoff(_page), \
.vma = _vma, \
.address = _address, \
.flags = _flags, \
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 8cd975a8bfeb..2a243616f222 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -29,7 +29,7 @@ extern struct mm_struct *mm_alloc(void);
*
* Use mmdrop() to release the reference acquired by mmgrab().
*
- * See also <Documentation/vm/active_mm.rst> for an in-depth explanation
+ * See also <Documentation/mm/active_mm.rst> for an in-depth explanation
* of &mm_struct.mm_count vs &mm_struct.mm_users.
*/
static inline void mmgrab(struct mm_struct *mm)
@@ -92,7 +92,7 @@ static inline void mmdrop_sched(struct mm_struct *mm)
*
* Use mmput() to release the reference acquired by mmget().
*
- * See also <Documentation/vm/active_mm.rst> for an in-depth explanation
+ * See also <Documentation/mm/active_mm.rst> for an in-depth explanation
* of &mm_struct.mm_count vs &mm_struct.mm_users.
*/
static inline void mmget(struct mm_struct *mm)
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a68f982f22d1..1b6c4013f691 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -25,9 +25,20 @@ struct shmem_inode_info {
struct simple_xattrs xattrs; /* list of xattrs */
atomic_t stop_eviction; /* hold when working on inode */
struct timespec64 i_crtime; /* file creation time */
+ unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */
struct inode vfs_inode;
};
+#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE
+#define SHMEM_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE
+#define SHMEM_FL_INHERITED FS_FL_USER_MODIFIABLE
+
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define SHMEM_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
+
+/* Flags that are appropriate for non-directories/regular files. */
+#define SHMEM_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
+
struct shmem_sb_info {
unsigned long max_blocks; /* How many blocks are allowed */
struct percpu_counter used_blocks; /* How many are allocated */
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 76fbf92b04d9..08e6054e061f 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -73,6 +73,11 @@ struct shrinker {
/* ID in shrinker_idr */
int id;
#endif
+#ifdef CONFIG_SHRINKER_DEBUG
+ int debugfs_id;
+ const char *name;
+ struct dentry *debugfs_entry;
+#endif
/* objs pending delete, per node */
atomic_long_t *nr_deferred;
};
@@ -88,10 +93,32 @@ struct shrinker {
*/
#define SHRINKER_NONSLAB (1 << 3)
-extern int prealloc_shrinker(struct shrinker *shrinker);
+extern int __printf(2, 3) prealloc_shrinker(struct shrinker *shrinker,
+ const char *fmt, ...);
extern void register_shrinker_prepared(struct shrinker *shrinker);
-extern int register_shrinker(struct shrinker *shrinker);
+extern int __printf(2, 3) register_shrinker(struct shrinker *shrinker,
+ const char *fmt, ...);
extern void unregister_shrinker(struct shrinker *shrinker);
extern void free_prealloced_shrinker(struct shrinker *shrinker);
extern void synchronize_shrinkers(void);
-#endif
+
+#ifdef CONFIG_SHRINKER_DEBUG
+extern int shrinker_debugfs_add(struct shrinker *shrinker);
+extern void shrinker_debugfs_remove(struct shrinker *shrinker);
+extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker,
+ const char *fmt, ...);
+#else /* CONFIG_SHRINKER_DEBUG */
+static inline int shrinker_debugfs_add(struct shrinker *shrinker)
+{
+ return 0;
+}
+static inline void shrinker_debugfs_remove(struct shrinker *shrinker)
+{
+}
+static inline __printf(2, 3)
+int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
+{
+ return 0;
+}
+#endif /* CONFIG_SHRINKER_DEBUG */
+#endif /* _LINUX_SHRINKER_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8672a7123ccd..43150b9bbc5c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -74,7 +74,7 @@ static inline int current_is_kswapd(void)
/*
* Unaddressable device memory support. See include/linux/hmm.h and
- * Documentation/vm/hmm.rst. Short description is we need struct pages for
+ * Documentation/mm/hmm.rst. Short description is we need struct pages for
* device memory that is unaddressable (inaccessible) by CPU, so that we can
* migrate part of a process memory to device memory.
*
@@ -411,10 +411,13 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
extern unsigned long zone_reclaimable_pages(struct zone *zone);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
+
+#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
+#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- bool may_swap);
+ unsigned int reclaim_options);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
@@ -456,6 +459,7 @@ static inline unsigned long total_swapcache_pages(void)
return global_node_page_state(NR_SWAPCACHE);
}
+extern void free_swap_cache(struct page *page);
extern void free_page_and_swap_cache(struct page *);
extern void free_pages_and_swap_cache(struct page **, int);
/* linux/mm/swapfile.c */
@@ -540,6 +544,10 @@ static inline void put_swap_device(struct swap_info_struct *si)
/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
#define free_swap_and_cache(e) is_pfn_swap_entry(e)
+static inline void free_swap_cache(struct page *page)
+{
+}
+
static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
{
return 0;
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index f24775b41880..bb7afd03a324 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -244,8 +244,10 @@ extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
spinlock_t *ptl);
extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address);
-extern void migration_entry_wait_huge(struct vm_area_struct *vma,
- struct mm_struct *mm, pte_t *pte);
+#ifdef CONFIG_HUGETLB_PAGE
+extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl);
+extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte);
+#endif
#else
static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
{
@@ -271,8 +273,10 @@ static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
spinlock_t *ptl) { }
static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address) { }
-static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
- struct mm_struct *mm, pte_t *pte) { }
+#ifdef CONFIG_HUGETLB_PAGE
+static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { }
+static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { }
+#endif
static inline int is_writable_migration_entry(swp_entry_t entry)
{
return 0;