summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-15 12:53:37 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-15 12:53:37 -0800
commitac73e3dc8acd0a3be292755db30388c3580f5674 (patch)
tree5abef6cb82b205b5dbbb69dca950b8a5aae716de /include
parent148842c98a24e508aecb929718818fbf4c2a6ff3 (diff)
parentdfefd226b0bf7c435a58d75a0ce2f9273b9825f6 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: - a few random little subsystems - almost all of the MM patches which are staged ahead of linux-next material. I'll trickle to post-linux-next work in as the dependents get merged up. Subsystems affected by this patch series: kthread, kbuild, ide, ntfs, ocfs2, arch, and mm (slab-generic, slab, slub, dax, debug, pagecache, gup, swap, shmem, memcg, pagemap, mremap, hmm, vmalloc, documentation, kasan, pagealloc, memory-failure, hugetlb, vmscan, z3fold, compaction, oom-kill, migration, cma, page-poison, userfaultfd, zswap, zsmalloc, uaccess, zram, and cleanups). * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (200 commits) mm: cleanup kstrto*() usage mm: fix fall-through warnings for Clang mm: slub: convert sysfs sprintf family to sysfs_emit/sysfs_emit_at mm: shmem: convert shmem_enabled_show to use sysfs_emit_at mm:backing-dev: use sysfs_emit in macro defining functions mm: huge_memory: convert remaining use of sprintf to sysfs_emit and neatening mm: use sysfs_emit for struct kobject * uses mm: fix kernel-doc markups zram: break the strict dependency from lzo zram: add stat to gather incompressible pages since zram set up zram: support page writeback mm/process_vm_access: remove redundant initialization of iov_r mm/zsmalloc.c: rework the list_add code in insert_zspage() mm/zswap: move to use crypto_acomp API for hardware acceleration mm/zswap: fix passing zero to 'PTR_ERR' warning mm/zswap: make struct kernel_param_ops definitions const userfaultfd/selftests: hint the test runner on required privilege userfaultfd/selftests: fix retval check for userfaultfd_open() userfaultfd/selftests: always dump something in modes userfaultfd: selftests: make __{s,u}64 format specifiers portable ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/cgroup-defs.h15
-rw-r--r--include/linux/compaction.h12
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/highmem.h19
-rw-r--r--include/linux/huge_mm.h93
-rw-r--r--include/linux/memcontrol.h148
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mm.h114
-rw-r--r--include/linux/mm_types.h8
-rw-r--r--include/linux/mmap_lock.h94
-rw-r--r--include/linux/mmzone.h50
-rw-r--r--include/linux/page-flags.h6
-rw-r--r--include/linux/page_ext.h8
-rw-r--r--include/linux/pagevec.h3
-rw-r--r--include/linux/poison.h4
-rw-r--r--include/linux/rmap.h1
-rw-r--r--include/linux/sched/mm.h16
-rw-r--r--include/linux/set_memory.h5
-rw-r--r--include/linux/shmem_fs.h6
-rw-r--r--include/linux/slab.h18
-rw-r--r--include/linux/vmalloc.h8
-rw-r--r--include/linux/vmstat.h104
-rw-r--r--include/trace/events/mmap_lock.h107
-rw-r--r--include/trace/events/sched.h84
-rw-r--r--include/uapi/linux/const.h5
-rw-r--r--include/uapi/linux/ethtool.h2
-rw-r--r--include/uapi/linux/kernel.h9
-rw-r--r--include/uapi/linux/lightnvm.h2
-rw-r--r--include/uapi/linux/mroute6.h2
-rw-r--r--include/uapi/linux/netfilter/x_tables.h2
-rw-r--r--include/uapi/linux/netlink.h2
-rw-r--r--include/uapi/linux/sysctl.h2
-rw-r--r--include/uapi/linux/userfaultfd.h9
34 files changed, 609 insertions, 357 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index fee0b5547cd0..559ee05f86b2 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -668,21 +668,6 @@ struct cgroup_subsys {
*/
bool threaded:1;
- /*
- * If %false, this subsystem is properly hierarchical -
- * configuration, resource accounting and restriction on a parent
- * cgroup cover those of its children. If %true, hierarchy support
- * is broken in some ways - some subsystems ignore hierarchy
- * completely while others are only implemented half-way.
- *
- * It's now disallowed to create nested cgroups if the subsystem is
- * broken and cgroup core will emit a warning message on such
- * cases. Eventually, all subsystems will be made properly
- * hierarchical and this will go away.
- */
- bool broken_hierarchy:1;
- bool warned_broken_hierarchy:1;
-
/* the following two fields are initialized automtically during boot */
int id;
const char *name;
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 1de5a1151ee7..ed4070ed41ef 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -98,11 +98,8 @@ extern void reset_isolation_suitable(pg_data_t *pgdat);
extern enum compact_result compaction_suitable(struct zone *zone, int order,
unsigned int alloc_flags, int highest_zoneidx);
-extern void defer_compaction(struct zone *zone, int order);
-extern bool compaction_deferred(struct zone *zone, int order);
extern void compaction_defer_reset(struct zone *zone, int order,
bool alloc_success);
-extern bool compaction_restarting(struct zone *zone, int order);
/* Compaction has made some progress and retrying makes sense */
static inline bool compaction_made_progress(enum compact_result result)
@@ -194,15 +191,6 @@ static inline enum compact_result compaction_suitable(struct zone *zone, int ord
return COMPACT_SKIPPED;
}
-static inline void defer_compaction(struct zone *zone, int order)
-{
-}
-
-static inline bool compaction_deferred(struct zone *zone, int order)
-{
- return true;
-}
-
static inline bool compaction_made_progress(enum compact_result result)
{
return false;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8667d0cdc71e..1fcc2b00582b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3230,7 +3230,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma)
{
struct inode *inode;
- if (!vma->vm_file)
+ if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file)
return false;
if (!vma_is_dax(vma))
return false;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c603237e006c..6e479e9c48ce 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -580,8 +580,6 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
-extern void free_unref_page(struct page *page);
-extern void free_unref_page_list(struct list_head *list);
struct page_frag_cache;
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index f597830f26b4..d2c70d3772a3 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -204,13 +204,22 @@ static inline void clear_highpage(struct page *page)
kunmap_atomic(kaddr);
}
+/*
+ * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
+ * If we pass in a head page, we can zero up to the size of the compound page.
+ */
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
+ unsigned start2, unsigned end2);
+#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
static inline void zero_user_segments(struct page *page,
- unsigned start1, unsigned end1,
- unsigned start2, unsigned end2)
+ unsigned start1, unsigned end1,
+ unsigned start2, unsigned end2)
{
void *kaddr = kmap_atomic(page);
+ unsigned int i;
- BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
+ BUG_ON(end1 > page_size(page) || end2 > page_size(page));
if (end1 > start1)
memset(kaddr + start1, 0, end1 - start1);
@@ -219,8 +228,10 @@ static inline void zero_user_segments(struct page *page,
memset(kaddr + start2, 0, end2 - start2);
kunmap_atomic(kaddr);
- flush_dcache_page(page);
+ for (i = 0; i < compound_nr(page); i++)
+ flush_dcache_page(page + i);
}
+#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
static inline void zero_user_segment(struct page *page,
unsigned start, unsigned end)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 0365aa97f8e7..6a19f35f836b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -7,43 +7,37 @@
#include <linux/fs.h> /* only for vma_is_dax() */
-extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
-extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
- struct vm_area_struct *vma);
-extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
-extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
- struct vm_area_struct *vma);
+vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
+int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
+ struct vm_area_struct *vma);
+void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
+int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+ struct vm_area_struct *vma);
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
+void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
#else
static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
{
}
#endif
-extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
-extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
- unsigned long addr,
- pmd_t *pmd,
- unsigned int flags);
-extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
- struct vm_area_struct *vma,
- pmd_t *pmd, unsigned long addr, unsigned long next);
-extern int zap_huge_pmd(struct mmu_gather *tlb,
- struct vm_area_struct *vma,
- pmd_t *pmd, unsigned long addr);
-extern int zap_huge_pud(struct mmu_gather *tlb,
- struct vm_area_struct *vma,
- pud_t *pud, unsigned long addr);
-extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
- unsigned long new_addr,
- pmd_t *old_pmd, pmd_t *new_pmd);
-extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, pgprot_t newprot,
- unsigned long cp_flags);
+vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
+struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd,
+ unsigned int flags);
+bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long addr, unsigned long next);
+int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr);
+int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr);
+bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+ unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd);
+int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
+ pgprot_t newprot, unsigned long cp_flags);
vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
pgprot_t pgprot, bool write);
@@ -100,13 +94,13 @@ enum transparent_hugepage_flag {
struct kobject;
struct kobj_attribute;
-extern ssize_t single_hugepage_flag_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count,
- enum transparent_hugepage_flag flag);
-extern ssize_t single_hugepage_flag_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf,
- enum transparent_hugepage_flag flag);
+ssize_t single_hugepage_flag_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count,
+ enum transparent_hugepage_flag flag);
+ssize_t single_hugepage_flag_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf,
+ enum transparent_hugepage_flag flag);
extern struct kobj_attribute shmem_enabled_attr;
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
@@ -179,12 +173,11 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
-extern unsigned long thp_get_unmapped_area(struct file *filp,
- unsigned long addr, unsigned long len, unsigned long pgoff,
- unsigned long flags);
+unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags);
-extern void prep_transhuge_page(struct page *page);
-extern void free_transhuge_page(struct page *page);
+void prep_transhuge_page(struct page *page);
+void free_transhuge_page(struct page *page);
bool is_transparent_hugepage(struct page *page);
bool can_split_huge_page(struct page *page, int *pextra_pins);
@@ -222,16 +215,12 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
__split_huge_pud(__vma, __pud, __address); \
} while (0)
-extern int hugepage_madvise(struct vm_area_struct *vma,
- unsigned long *vm_flags, int advice);
-extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
- unsigned long start,
- unsigned long end,
- long adjust_next);
-extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
- struct vm_area_struct *vma);
-extern spinlock_t *__pud_trans_huge_lock(pud_t *pud,
- struct vm_area_struct *vma);
+int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
+ int advice);
+void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, long adjust_next);
+spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma);
+spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma);
static inline int is_swap_pmd(pmd_t pmd)
{
@@ -294,7 +283,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
pud_t *pud, int flags, struct dev_pagemap **pgmap);
-extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
+vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *huge_zero_page;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 922a7f600465..f530d634f055 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -235,11 +235,6 @@ struct mem_cgroup {
struct vmpressure vmpressure;
/*
- * Should the accounting and control be hierarchical, per subtree?
- */
- bool use_hierarchy;
-
- /*
* Should the OOM killer kill all belonging tasks, had it kill one?
*/
bool oom_group;
@@ -296,7 +291,6 @@ struct mem_cgroup {
int tcpmem_pressure;
#ifdef CONFIG_MEMCG_KMEM
- /* Index in the kmem_cache->memcg_params.memcg_caches array */
int kmemcg_id;
enum memcg_kmem_state kmem_state;
struct obj_cgroup __rcu *objcg;
@@ -589,8 +583,6 @@ static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
{
if (root == memcg)
return true;
- if (!root->use_hierarchy)
- return false;
return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
}
@@ -794,19 +786,15 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
-void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
- int val);
-void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
-
-void mod_memcg_obj_state(void *p, int idx, int val);
+void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);
-static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
int val)
{
unsigned long flags;
local_irq_save(flags);
- __mod_lruvec_slab_state(p, idx, val);
+ __mod_lruvec_kmem_state(p, idx, val);
local_irq_restore(flags);
}
@@ -820,43 +808,6 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
local_irq_restore(flags);
}
-static inline void mod_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __mod_lruvec_state(lruvec, idx, val);
- local_irq_restore(flags);
-}
-
-static inline void __mod_lruvec_page_state(struct page *page,
- enum node_stat_item idx, int val)
-{
- struct page *head = compound_head(page); /* rmap on tail pages */
- pg_data_t *pgdat = page_pgdat(page);
- struct lruvec *lruvec;
-
- /* Untracked pages have no memcg, no lruvec. Update only the node */
- if (!head->mem_cgroup) {
- __mod_node_page_state(pgdat, idx, val);
- return;
- }
-
- lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat);
- __mod_lruvec_state(lruvec, idx, val);
-}
-
-static inline void mod_lruvec_page_state(struct page *page,
- enum node_stat_item idx, int val)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __mod_lruvec_page_state(page, idx, val);
- local_irq_restore(flags);
-}
-
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
@@ -1215,31 +1166,7 @@ static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
{
}
-static inline void __mod_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
- __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-}
-
-static inline void mod_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
- mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-}
-
-static inline void __mod_lruvec_page_state(struct page *page,
- enum node_stat_item idx, int val)
-{
- __mod_node_page_state(page_pgdat(page), idx, val);
-}
-
-static inline void mod_lruvec_page_state(struct page *page,
- enum node_stat_item idx, int val)
-{
- mod_node_page_state(page_pgdat(page), idx, val);
-}
-
-static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
int val)
{
struct page *page = virt_to_head_page(p);
@@ -1247,7 +1174,7 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
__mod_node_page_state(page_pgdat(page), idx, val);
}
-static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
int val)
{
struct page *page = virt_to_head_page(p);
@@ -1255,10 +1182,6 @@ static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
mod_node_page_state(page_pgdat(page), idx, val);
}
-static inline void mod_memcg_obj_state(void *p, int idx, int val)
-{
-}
-
static inline
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
@@ -1322,38 +1245,14 @@ static inline void __dec_memcg_page_state(struct page *page,
__mod_memcg_page_state(page, idx, -1);
}
-static inline void __inc_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- __mod_lruvec_state(lruvec, idx, 1);
-}
-
-static inline void __dec_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- __mod_lruvec_state(lruvec, idx, -1);
-}
-
-static inline void __inc_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- __mod_lruvec_page_state(page, idx, 1);
-}
-
-static inline void __dec_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- __mod_lruvec_page_state(page, idx, -1);
-}
-
-static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx)
+static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
{
- __mod_lruvec_slab_state(p, idx, 1);
+ __mod_lruvec_kmem_state(p, idx, 1);
}
-static inline void __dec_lruvec_slab_state(void *p, enum node_stat_item idx)
+static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx)
{
- __mod_lruvec_slab_state(p, idx, -1);
+ __mod_lruvec_kmem_state(p, idx, -1);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
@@ -1384,30 +1283,6 @@ static inline void dec_memcg_page_state(struct page *page,
mod_memcg_page_state(page, idx, -1);
}
-static inline void inc_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- mod_lruvec_state(lruvec, idx, 1);
-}
-
-static inline void dec_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- mod_lruvec_state(lruvec, idx, -1);
-}
-
-static inline void inc_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- mod_lruvec_page_state(page, idx, 1);
-}
-
-static inline void dec_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- mod_lruvec_page_state(page, idx, -1);
-}
-
static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
{
struct mem_cgroup *memcg;
@@ -1568,9 +1443,8 @@ static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg,
}
/*
- * helper for accessing a memcg's index. It will be used as an index in the
- * child cache array in kmem_cache, and also to derive its name. This function
- * will return -1 when this is not a kmem-limited memcg.
+ * A helper for accessing memcg's kmem_id, used for getting
+ * corresponding LRU lists.
*/
static inline int memcg_cache_id(struct mem_cgroup *memcg)
{
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 0f8d1583fa8e..4594838a0f7c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -45,8 +45,8 @@ extern struct page *alloc_migration_target(struct page *page, unsigned long priv
extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
-extern int migrate_prep(void);
-extern int migrate_prep_local(void);
+extern void migrate_prep(void);
+extern void migrate_prep_local(void);
extern void migrate_page_states(struct page *newpage, struct page *page);
extern void migrate_page_copy(struct page *newpage, struct page *page);
extern int migrate_huge_page_move_mapping(struct address_space *mapping,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1813fa86b981..e189509323f8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -557,8 +557,9 @@ enum page_entry_size {
struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
- int (*split)(struct vm_area_struct * area, unsigned long addr);
- int (*mremap)(struct vm_area_struct * area);
+ /* Called any time before splitting to check if it's allowed */
+ int (*may_split)(struct vm_area_struct *area, unsigned long addr);
+ int (*mremap)(struct vm_area_struct *area, unsigned long flags);
/*
* Called by mprotect() to make driver-specific permission
* checks before mprotect() is finalised. The VMA must not
@@ -1723,8 +1724,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
-extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long addr, void *buf, int len, unsigned int gup_flags);
+extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags);
long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
@@ -2210,7 +2211,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page)
if (!ptlock_init(page))
return false;
__SetPageTable(page);
- inc_zone_page_state(page, NR_PAGETABLE);
+ inc_lruvec_page_state(page, NR_PAGETABLE);
return true;
}
@@ -2218,7 +2219,7 @@ static inline void pgtable_pte_page_dtor(struct page *page)
{
ptlock_free(page);
__ClearPageTable(page);
- dec_zone_page_state(page, NR_PAGETABLE);
+ dec_lruvec_page_state(page, NR_PAGETABLE);
}
#define pte_offset_map_lock(mm, pmd, address, ptlp) \
@@ -2305,7 +2306,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page)
if (!pmd_ptlock_init(page))
return false;
__SetPageTable(page);
- inc_zone_page_state(page, NR_PAGETABLE);
+ inc_lruvec_page_state(page, NR_PAGETABLE);
return true;
}
@@ -2313,7 +2314,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page)
{
pmd_ptlock_free(page);
__ClearPageTable(page);
- dec_zone_page_state(page, NR_PAGETABLE);
+ dec_lruvec_page_state(page, NR_PAGETABLE);
}
/*
@@ -2440,9 +2441,6 @@ static inline int early_pfn_to_nid(unsigned long pfn)
#else
/* please see mm/page_alloc.c */
extern int __meminit early_pfn_to_nid(unsigned long pfn);
-/* there is a per-arch backend function. */
-extern int __meminit __early_pfn_to_nid(unsigned long pfn,
- struct mminit_pfnnid_cache *state);
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
@@ -2881,44 +2879,56 @@ extern int apply_to_existing_page_range(struct mm_struct *mm,
unsigned long address, unsigned long size,
pte_fn_t fn, void *data);
+extern void init_mem_debugging_and_hardening(void);
#ifdef CONFIG_PAGE_POISONING
-extern bool page_poisoning_enabled(void);
-extern void kernel_poison_pages(struct page *page, int numpages, int enable);
+extern void __kernel_poison_pages(struct page *page, int numpages);
+extern void __kernel_unpoison_pages(struct page *page, int numpages);
+extern bool _page_poisoning_enabled_early;
+DECLARE_STATIC_KEY_FALSE(_page_poisoning_enabled);
+static inline bool page_poisoning_enabled(void)
+{
+ return _page_poisoning_enabled_early;
+}
+/*
+ * For use in fast paths after init_mem_debugging() has run, or when a
+ * false negative result is not harmful when called too early.
+ */
+static inline bool page_poisoning_enabled_static(void)
+{
+ return static_branch_unlikely(&_page_poisoning_enabled);
+}
+static inline void kernel_poison_pages(struct page *page, int numpages)
+{
+ if (page_poisoning_enabled_static())
+ __kernel_poison_pages(page, numpages);
+}
+static inline void kernel_unpoison_pages(struct page *page, int numpages)
+{
+ if (page_poisoning_enabled_static())
+ __kernel_unpoison_pages(page, numpages);
+}
#else
static inline bool page_poisoning_enabled(void) { return false; }
-static inline void kernel_poison_pages(struct page *page, int numpages,
- int enable) { }
+static inline bool page_poisoning_enabled_static(void) { return false; }
+static inline void __kernel_poison_pages(struct page *page, int nunmpages) { }
+static inline void kernel_poison_pages(struct page *page, int numpages) { }
+static inline void kernel_unpoison_pages(struct page *page, int numpages) { }
#endif
-#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON
-DECLARE_STATIC_KEY_TRUE(init_on_alloc);
-#else
DECLARE_STATIC_KEY_FALSE(init_on_alloc);
-#endif
static inline bool want_init_on_alloc(gfp_t flags)
{
- if (static_branch_unlikely(&init_on_alloc) &&
- !page_poisoning_enabled())
+ if (static_branch_unlikely(&init_on_alloc))
return true;
return flags & __GFP_ZERO;
}
-#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON
-DECLARE_STATIC_KEY_TRUE(init_on_free);
-#else
DECLARE_STATIC_KEY_FALSE(init_on_free);
-#endif
static inline bool want_init_on_free(void)
{
- return static_branch_unlikely(&init_on_free) &&
- !page_poisoning_enabled();
+ return static_branch_unlikely(&init_on_free);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-extern void init_debug_pagealloc(void);
-#else
-static inline void init_debug_pagealloc(void) {}
-#endif
extern bool _debug_pagealloc_enabled_early;
DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
@@ -2940,28 +2950,28 @@ static inline bool debug_pagealloc_enabled_static(void)
return static_branch_unlikely(&_debug_pagealloc_enabled);
}
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
-extern void __kernel_map_pages(struct page *page, int numpages, int enable);
-
+#ifdef CONFIG_DEBUG_PAGEALLOC
/*
- * When called in DEBUG_PAGEALLOC context, the call should most likely be
- * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static()
+ * To support DEBUG_PAGEALLOC architecture must ensure that
+ * __kernel_map_pages() never fails
*/
-static inline void
-kernel_map_pages(struct page *page, int numpages, int enable)
-{
- __kernel_map_pages(page, numpages, enable);
-}
-#ifdef CONFIG_HIBERNATION
-extern bool kernel_page_present(struct page *page);
-#endif /* CONFIG_HIBERNATION */
-#else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
-static inline void
-kernel_map_pages(struct page *page, int numpages, int enable) {}
-#ifdef CONFIG_HIBERNATION
-static inline bool kernel_page_present(struct page *page) { return true; }
-#endif /* CONFIG_HIBERNATION */
-#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
+extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+
+static inline void debug_pagealloc_map_pages(struct page *page, int numpages)
+{
+ if (debug_pagealloc_enabled_static())
+ __kernel_map_pages(page, numpages, 1);
+}
+
+static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages)
+{
+ if (debug_pagealloc_enabled_static())
+ __kernel_map_pages(page, numpages, 0);
+}
+#else /* CONFIG_DEBUG_PAGEALLOC */
+static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {}
+static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {}
+#endif /* CONFIG_DEBUG_PAGEALLOC */
#ifdef __HAVE_ARCH_GATE_AREA
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5a9238f6caad..915f4f100383 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -14,6 +14,7 @@
#include <linux/uprobes.h>
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
+#include <linux/seqlock.h>
#include <asm/mmu.h>
@@ -446,6 +447,13 @@ struct mm_struct {
*/
atomic_t has_pinned;
+ /**
+ * @write_protect_seq: Locked when any thread is write
+ * protecting pages mapped by this mm to enforce a later COW,
+ * for instance during page table copying for fork().
+ */
+ seqcount_t write_protect_seq;
+
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* PTE page table pages */
#endif
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 18e7eae9b5ba..0540f0156f58 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -1,11 +1,65 @@
#ifndef _LINUX_MMAP_LOCK_H
#define _LINUX_MMAP_LOCK_H
+#include <linux/lockdep.h>
+#include <linux/mm_types.h>
#include <linux/mmdebug.h>
+#include <linux/rwsem.h>
+#include <linux/tracepoint-defs.h>
+#include <linux/types.h>
#define MMAP_LOCK_INITIALIZER(name) \
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
+DECLARE_TRACEPOINT(mmap_lock_start_locking);
+DECLARE_TRACEPOINT(mmap_lock_acquire_returned);
+DECLARE_TRACEPOINT(mmap_lock_released);
+
+#ifdef CONFIG_TRACING
+
+void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write);
+void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
+ bool success);
+void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write);
+
+static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
+ bool write)
+{
+ if (tracepoint_enabled(mmap_lock_start_locking))
+ __mmap_lock_do_trace_start_locking(mm, write);
+}
+
+static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
+ bool write, bool success)
+{
+ if (tracepoint_enabled(mmap_lock_acquire_returned))
+ __mmap_lock_do_trace_acquire_returned(mm, write, success);
+}
+
+static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
+{
+ if (tracepoint_enabled(mmap_lock_released))
+ __mmap_lock_do_trace_released(mm, write);
+}
+
+#else /* !CONFIG_TRACING */
+
+static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
+ bool write)
+{
+}
+
+static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
+ bool write, bool success)
+{
+}
+
+static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
+{
+}
+
+#endif /* CONFIG_TRACING */
+
static inline void mmap_init_lock(struct mm_struct *mm)
{
init_rwsem(&mm->mmap_lock);
@@ -13,57 +67,86 @@ static inline void mmap_init_lock(struct mm_struct *mm)
static inline void mmap_write_lock(struct mm_struct *mm)
{
+ __mmap_lock_trace_start_locking(mm, true);
down_write(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, true, true);
}
static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
{
+ __mmap_lock_trace_start_locking(mm, true);
down_write_nested(&mm->mmap_lock, subclass);
+ __mmap_lock_trace_acquire_returned(mm, true, true);
}
static inline int mmap_write_lock_killable(struct mm_struct *mm)
{
- return down_write_killable(&mm->mmap_lock);
+ int ret;
+
+ __mmap_lock_trace_start_locking(mm, true);
+ ret = down_write_killable(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, true, ret == 0);
+ return ret;
}
static inline bool mmap_write_trylock(struct mm_struct *mm)
{
- return down_write_trylock(&mm->mmap_lock) != 0;
+ bool ret;
+
+ __mmap_lock_trace_start_locking(mm, true);
+ ret = down_write_trylock(&mm->mmap_lock) != 0;
+ __mmap_lock_trace_acquire_returned(mm, true, ret);
+ return ret;
}
static inline void mmap_write_unlock(struct mm_struct *mm)
{
up_write(&mm->mmap_lock);
+ __mmap_lock_trace_released(mm, true);
}
static inline void mmap_write_downgrade(struct mm_struct *mm)
{
downgrade_write(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, false, true);
}
static inline void mmap_read_lock(struct mm_struct *mm)
{
+ __mmap_lock_trace_start_locking(mm, false);
down_read(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, false, true);
}
static inline int mmap_read_lock_killable(struct mm_struct *mm)
{
- return down_read_killable(&mm->mmap_lock);
+ int ret;
+
+ __mmap_lock_trace_start_locking(mm, false);
+ ret = down_read_killable(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, false, ret == 0);
+ return ret;
}
static inline bool mmap_read_trylock(struct mm_struct *mm)
{
- return down_read_trylock(&mm->mmap_lock) != 0;
+ bool ret;
+
+ __mmap_lock_trace_start_locking(mm, false);
+ ret = down_read_trylock(&mm->mmap_lock) != 0;
+ __mmap_lock_trace_acquire_returned(mm, false, ret);
+ return ret;
}
static inline void mmap_read_unlock(struct mm_struct *mm)
{
up_read(&mm->mmap_lock);
+ __mmap_lock_trace_released(mm, false);
}
static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
{
- if (down_read_trylock(&mm->mmap_lock)) {
+ if (mmap_read_trylock(mm)) {
rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
return true;
}
@@ -73,6 +156,7 @@ static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
up_read_non_owner(&mm->mmap_lock);
+ __mmap_lock_trace_released(mm, false);
}
static inline void mmap_assert_locked(struct mm_struct *mm)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9d0c454d23cd..98a80c01d150 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -152,7 +152,6 @@ enum zone_stat_item {
NR_ZONE_UNEVICTABLE,
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
- NR_PAGETABLE, /* used for pagetables */
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
@@ -207,6 +206,7 @@ enum node_stat_item {
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
NR_KERNEL_SCS_KB, /* measured in KiB */
#endif
+ NR_PAGETABLE, /* used for pagetables */
NR_VM_NODE_STAT_ITEMS
};
@@ -450,6 +450,12 @@ struct zone {
#endif
struct pglist_data *zone_pgdat;
struct per_cpu_pageset __percpu *pageset;
+ /*
+ * the high and batch values are copied to individual pagesets for
+ * faster access
+ */
+ int pageset_high;
+ int pageset_batch;
#ifndef CONFIG_SPARSEMEM
/*
@@ -1409,17 +1415,6 @@ void sparse_init(void);
#endif /* CONFIG_SPARSEMEM */
/*
- * During memory init memblocks map pfns to nids. The search is expensive and
- * this caches recent lookups. The implementation of __early_pfn_to_nid
- * may treat start/end as pfns or sections.
- */
-struct mminit_pfnnid_cache {
- unsigned long last_start;
- unsigned long last_end;
- int last_nid;
-};
-
-/*
* If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
* need to check pfn validity within that MAX_ORDER_NR_PAGES block.
* pfn_valid_within() should be used in this case; we optimise this away
@@ -1431,37 +1426,6 @@ struct mminit_pfnnid_cache {
#define pfn_valid_within(pfn) (1)
#endif
-#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
-/*
- * pfn_valid() is meant to be able to tell if a given PFN has valid memmap
- * associated with it or not. This means that a struct page exists for this
- * pfn. The caller cannot assume the page is fully initialized in general.
- * Hotplugable pages might not have been onlined yet. pfn_to_online_page()
- * will ensure the struct page is fully online and initialized. Special pages
- * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly.
- *
- * In FLATMEM, it is expected that holes always have valid memmap as long as
- * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed
- * that a valid section has a memmap for the entire section.
- *
- * However, an ARM, and maybe other embedded architectures in the future
- * free memmap backing holes to save memory on the assumption the memmap is
- * never used. The page_zone linkages are then broken even though pfn_valid()
- * returns true. A walker of the full memmap must then do this additional
- * check to ensure the memmap they are looking at is sane by making sure
- * the zone and PFN linkages are still valid. This is expensive, but walkers
- * of the full memmap are extremely rare.
- */
-bool memmap_valid_within(unsigned long pfn,
- struct page *page, struct zone *zone);
-#else
-static inline bool memmap_valid_within(unsigned long pfn,
- struct page *page, struct zone *zone)
-{
- return true;
-}
-#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
-
#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4f6ba9379112..c1368af622c7 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -86,8 +86,7 @@
*/
/*
- * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break
- * locked- and dirty-page accounting.
+ * Don't use the pageflags directly. Use the PageFoo macros.
*
* The page flags field is split into two parts, the main flags area
* which extends from the low bits upwards, and the fields area which
@@ -363,8 +362,7 @@ PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
* for its own purposes.
* - PG_private and PG_private_2 cause releasepage() and co to be invoked
*/
-PAGEFLAG(Private, private, PF_ANY) __SETPAGEFLAG(Private, private, PF_ANY)
- __CLEARPAGEFLAG(Private, private, PF_ANY)
+PAGEFLAG(Private, private, PF_ANY)
PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index cfce186f0c4e..aff81ba31bd8 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -44,8 +44,12 @@ static inline void page_ext_init_flatmem(void)
{
}
extern void page_ext_init(void);
+static inline void page_ext_init_flatmem_late(void)
+{
+}
#else
extern void page_ext_init_flatmem(void);
+extern void page_ext_init_flatmem_late(void);
static inline void page_ext_init(void)
{
}
@@ -76,6 +80,10 @@ static inline void page_ext_init(void)
{
}
+static inline void page_ext_init_flatmem_late(void)
+{
+}
+
static inline void page_ext_init_flatmem(void)
{
}
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 081d934eda64..ad4ddc17d403 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -43,9 +43,6 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec,
unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
xa_mark_t tag);
-unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
- struct address_space *mapping, pgoff_t *index, pgoff_t end,
- xa_mark_t tag, unsigned max_pages);
static inline unsigned pagevec_lookup_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, xa_mark_t tag)
{
diff --git a/include/linux/poison.h b/include/linux/poison.h
index dc8ae5d8db03..aff1c9250c82 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -27,11 +27,7 @@
#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA)
/********** mm/page_poison.c **********/
-#ifdef CONFIG_PAGE_POISONING_ZERO
-#define PAGE_POISON 0x00
-#else
#define PAGE_POISON 0xaa
-#endif
/********** mm/page_alloc.c ************/
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 3a6adfa70fb0..70085ca1a3fc 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -91,7 +91,6 @@ enum ttu_flags {
TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */
TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */
- TTU_IGNORE_ACCESS = 0x10, /* don't age */
TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */
TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible
* and caller guarantees they will
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a91fb3ad9ec7..1ae08b8462a4 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -181,6 +181,22 @@ static inline void fs_reclaim_release(gfp_t gfp_mask) { }
#endif
/**
+ * might_alloc - Mark possible allocation sites
+ * @gfp_mask: gfp_t flags that would be used to allocate
+ *
+ * Similar to might_sleep() and other annotations, this can be used in functions
+ * that might allocate, but often don't. Compiles to nothing without
+ * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking.
+ */
+static inline void might_alloc(gfp_t gfp_mask)
+{
+ fs_reclaim_acquire(gfp_mask);
+ fs_reclaim_release(gfp_mask);
+
+ might_sleep_if(gfpflags_allow_blocking(gfp_mask));
+}
+
+/**
* memalloc_noio_save - Marks implicit GFP_NOIO allocation scope.
*
* This functions marks the beginning of the GFP_NOIO allocation scope.
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index 860e0f843c12..fe1aa4e54680 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -23,6 +23,11 @@ static inline int set_direct_map_default_noflush(struct page *page)
{
return 0;
}
+
+static inline bool kernel_page_present(struct page *page)
+{
+ return true;
+}
#endif
#ifndef set_mce_nospec
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a5a5d1d4d7b1..d82b6f396588 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -67,7 +67,11 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags);
extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
#ifdef CONFIG_SHMEM
-extern bool shmem_mapping(struct address_space *mapping);
+extern const struct address_space_operations shmem_aops;
+static inline bool shmem_mapping(struct address_space *mapping)
+{
+ return mapping->a_ops == &shmem_aops;
+}
#else
static inline bool shmem_mapping(struct address_space *mapping)
{
diff --git a/include/linux/slab.h b/include/linux/slab.h
index dd6897f62010..be4ba5867ac5 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -593,6 +593,24 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
}
/**
+ * krealloc_array - reallocate memory for an array.
+ * @p: pointer to the memory chunk to reallocate
+ * @new_n: new number of elements to alloc
+ * @new_size: new size of a single member of the array
+ * @flags: the type of memory to allocate (see kmalloc)
+ */
+static __must_check inline void *
+krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
+ return NULL;
+
+ return krealloc(p, bytes, flags);
+}
+
+/**
* kcalloc - allocate memory for an array. The memory is set to zero.
* @n: number of elements.
* @size: element size.
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 938eaf9517e2..80c0181c411d 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -72,16 +72,14 @@ struct vmap_area {
struct list_head list; /* address sorted list */
/*
- * The following three variables can be packed, because
- * a vmap_area object is always one of the three states:
+ * The following two variables can be packed, because
+ * a vmap_area object can be either:
* 1) in "free" tree (root is vmap_area_root)
- * 2) in "busy" tree (root is free_vmap_area_root)
- * 3) in purge list (head is vmap_purge_list)
+ * 2) or "busy" tree (root is free_vmap_area_root)
*/
union {
unsigned long subtree_max_size; /* in "free" tree */
struct vm_struct *vm; /* in "busy" tree */
- struct llist_node purge_list; /* in purge list */
};
};
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 322dcbfcc933..773135fc6e19 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -450,4 +450,108 @@ static inline const char *vm_event_name(enum vm_event_item item)
}
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
+#ifdef CONFIG_MEMCG
+
+void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val);
+
+static inline void mod_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_lruvec_state(lruvec, idx, val);
+ local_irq_restore(flags);
+}
+
+void __mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val);
+
+static inline void mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_lruvec_page_state(page, idx, val);
+ local_irq_restore(flags);
+}
+
+#else
+
+static inline void __mod_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+ __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+}
+
+static inline void mod_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+ mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+}
+
+static inline void __mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
+{
+ __mod_node_page_state(page_pgdat(page), idx, val);
+}
+
+static inline void mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
+{
+ mod_node_page_state(page_pgdat(page), idx, val);
+}
+
+#endif /* CONFIG_MEMCG */
+
+static inline void __inc_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+{
+ __mod_lruvec_state(lruvec, idx, 1);
+}
+
+static inline void __dec_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+{
+ __mod_lruvec_state(lruvec, idx, -1);
+}
+
+static inline void __inc_lruvec_page_state(struct page *page,
+ enum node_stat_item idx)
+{
+ __mod_lruvec_page_state(page, idx, 1);
+}
+
+static inline void __dec_lruvec_page_state(struct page *page,
+ enum node_stat_item idx)
+{
+ __mod_lruvec_page_state(page, idx, -1);
+}
+
+static inline void inc_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+{
+ mod_lruvec_state(lruvec, idx, 1);
+}
+
+static inline void dec_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+{
+ mod_lruvec_state(lruvec, idx, -1);
+}
+
+static inline void inc_lruvec_page_state(struct page *page,
+ enum node_stat_item idx)
+{
+ mod_lruvec_page_state(page, idx, 1);
+}
+
+static inline void dec_lruvec_page_state(struct page *page,
+ enum node_stat_item idx)
+{
+ mod_lruvec_page_state(page, idx, -1);
+}
+
#endif /* _LINUX_VMSTAT_H */
diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h
new file mode 100644
index 000000000000..0abff67b96f0
--- /dev/null
+++ b/include/trace/events/mmap_lock.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmap_lock
+
+#if !defined(_TRACE_MMAP_LOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMAP_LOCK_H
+
+#include <linux/tracepoint.h>
+#include <linux/types.h>
+
+struct mm_struct;
+
+extern int trace_mmap_lock_reg(void);
+extern void trace_mmap_lock_unreg(void);
+
+TRACE_EVENT_FN(mmap_lock_start_locking,
+
+ TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),
+
+ TP_ARGS(mm, memcg_path, write),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct *, mm)
+ __string(memcg_path, memcg_path)
+ __field(bool, write)
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __assign_str(memcg_path, memcg_path);
+ __entry->write = write;
+ ),
+
+ TP_printk(
+ "mm=%p memcg_path=%s write=%s\n",
+ __entry->mm,
+ __get_str(memcg_path),
+ __entry->write ? "true" : "false"
+ ),
+
+ trace_mmap_lock_reg, trace_mmap_lock_unreg
+);
+
+TRACE_EVENT_FN(mmap_lock_acquire_returned,
+
+ TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write,
+ bool success),
+
+ TP_ARGS(mm, memcg_path, write, success),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct *, mm)
+ __string(memcg_path, memcg_path)
+ __field(bool, write)
+ __field(bool, success)
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __assign_str(memcg_path, memcg_path);
+ __entry->write = write;
+ __entry->success = success;
+ ),
+
+ TP_printk(
+ "mm=%p memcg_path=%s write=%s success=%s\n",
+ __entry->mm,
+ __get_str(memcg_path),
+ __entry->write ? "true" : "false",
+ __entry->success ? "true" : "false"
+ ),
+
+ trace_mmap_lock_reg, trace_mmap_lock_unreg
+);
+
+TRACE_EVENT_FN(mmap_lock_released,
+
+ TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write),
+
+ TP_ARGS(mm, memcg_path, write),
+
+ TP_STRUCT__entry(
+ __field(struct mm_struct *, mm)
+ __string(memcg_path, memcg_path)
+ __field(bool, write)
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __assign_str(memcg_path, memcg_path);
+ __entry->write = write;
+ ),
+
+ TP_printk(
+ "mm=%p memcg_path=%s write=%s\n",
+ __entry->mm,
+ __get_str(memcg_path),
+ __entry->write ? "true" : "false"
+ ),
+
+ trace_mmap_lock_reg, trace_mmap_lock_unreg
+);
+
+#endif /* _TRACE_MMAP_LOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index c96a4337afe6..5039af667645 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -5,6 +5,7 @@
#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_SCHED_H
+#include <linux/kthread.h>
#include <linux/sched/numa_balancing.h>
#include <linux/tracepoint.h>
#include <linux/binfmts.h>
@@ -51,6 +52,89 @@ TRACE_EVENT(sched_kthread_stop_ret,
TP_printk("ret=%d", __entry->ret)
);
+/**
+ * sched_kthread_work_queue_work - called when a work gets queued
+ * @worker: pointer to the kthread_worker
+ * @work: pointer to struct kthread_work
+ *
+ * This event occurs when a work is queued immediately or once a
+ * delayed work is actually queued (ie: once the delay has been
+ * reached).
+ */
+TRACE_EVENT(sched_kthread_work_queue_work,
+
+ TP_PROTO(struct kthread_worker *worker,
+ struct kthread_work *work),
+
+ TP_ARGS(worker, work),
+
+ TP_STRUCT__entry(
+ __field( void *, work )
+ __field( void *, function)
+ __field( void *, worker)
+ ),
+
+ TP_fast_assign(
+ __entry->work = work;
+ __entry->function = work->func;
+ __entry->worker = worker;
+ ),
+
+ TP_printk("work struct=%p function=%ps worker=%p",
+ __entry->work, __entry->function, __entry->worker)
+);
+
+/**
+ * sched_kthread_work_execute_start - called immediately before the work callback
+ * @work: pointer to struct kthread_work
+ *
+ * Allows to track kthread work execution.
+ */
+TRACE_EVENT(sched_kthread_work_execute_start,
+
+ TP_PROTO(struct kthread_work *work),
+
+ TP_ARGS(work),
+
+ TP_STRUCT__entry(
+ __field( void *, work )
+ __field( void *, function)
+ ),
+
+ TP_fast_assign(
+ __entry->work = work;
+ __entry->function = work->func;
+ ),
+
+ TP_printk("work struct %p: function %ps", __entry->work, __entry->function)
+);
+
+/**
+ * sched_kthread_work_execute_end - called immediately after the work callback
+ * @work: pointer to struct work_struct
+ * @function: pointer to worker function
+ *
+ * Allows to track workqueue execution.
+ */
+TRACE_EVENT(sched_kthread_work_execute_end,
+
+ TP_PROTO(struct kthread_work *work, kthread_work_func_t function),
+
+ TP_ARGS(work, function),
+
+ TP_STRUCT__entry(
+ __field( void *, work )
+ __field( void *, function)
+ ),
+
+ TP_fast_assign(
+ __entry->work = work;
+ __entry->function = function;
+ ),
+
+ TP_printk("work struct %p: function %ps", __entry->work, __entry->function)
+);
+
/*
* Tracepoint for waking up a task:
*/
diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h
index 5ed721ad5b19..af2a44c08683 100644
--- a/include/uapi/linux/const.h
+++ b/include/uapi/linux/const.h
@@ -28,4 +28,9 @@
#define _BITUL(x) (_UL(1) << (x))
#define _BITULL(x) (_ULL(1) << (x))
+#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
#endif /* _UAPI_LINUX_CONST_H */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9ca87bc73c44..cde753bb2093 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -14,7 +14,7 @@
#ifndef _UAPI_LINUX_ETHTOOL_H
#define _UAPI_LINUX_ETHTOOL_H
-#include <linux/kernel.h>
+#include <linux/const.h>
#include <linux/types.h>
#include <linux/if_ether.h>
diff --git a/include/uapi/linux/kernel.h b/include/uapi/linux/kernel.h
index 0ff8f7477847..fadf2db71fe8 100644
--- a/include/uapi/linux/kernel.h
+++ b/include/uapi/linux/kernel.h
@@ -3,13 +3,6 @@
#define _UAPI_LINUX_KERNEL_H
#include <linux/sysinfo.h>
-
-/*
- * 'kernel.h' contains some often-used function prototypes etc
- */
-#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
-#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
-
-#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#include <linux/const.h>
#endif /* _UAPI_LINUX_KERNEL_H */
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
index f9a1be7fc696..ead2e72e5c88 100644
--- a/include/uapi/linux/lightnvm.h
+++ b/include/uapi/linux/lightnvm.h
@@ -21,7 +21,7 @@
#define _UAPI_LINUX_LIGHTNVM_H
#ifdef __KERNEL__
-#include <linux/kernel.h>
+#include <linux/const.h>
#include <linux/ioctl.h>
#else /* __KERNEL__ */
#include <stdio.h>
diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index c36177a86516..a1fd6173e2db 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -2,7 +2,7 @@
#ifndef _UAPI__LINUX_MROUTE6_H
#define _UAPI__LINUX_MROUTE6_H
-#include <linux/kernel.h>
+#include <linux/const.h>
#include <linux/types.h>
#include <linux/sockios.h>
#include <linux/in6.h> /* For struct sockaddr_in6. */
diff --git a/include/uapi/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h
index a8283f7dbc51..b8c6bb233ac1 100644
--- a/include/uapi/linux/netfilter/x_tables.h
+++ b/include/uapi/linux/netfilter/x_tables.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_X_TABLES_H
#define _UAPI_X_TABLES_H
-#include <linux/kernel.h>
+#include <linux/const.h>
#include <linux/types.h>
#define XT_FUNCTION_MAXNAMELEN 30
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index c3816ff7bfc3..3d94269bbfa8 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -2,7 +2,7 @@
#ifndef _UAPI__LINUX_NETLINK_H
#define _UAPI__LINUX_NETLINK_H
-#include <linux/kernel.h>
+#include <linux/const.h>
#include <linux/socket.h> /* for __kernel_sa_family_t */
#include <linux/types.h>
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 27c1ed2822e6..458179df9b27 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -23,7 +23,7 @@
#ifndef _UAPI_LINUX_SYSCTL_H
#define _UAPI_LINUX_SYSCTL_H
-#include <linux/kernel.h>
+#include <linux/const.h>
#include <linux/types.h>
#include <linux/compiler.h>
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index e7e98bde221f..5f2d88212f7c 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -257,4 +257,13 @@ struct uffdio_writeprotect {
__u64 mode;
};
+/*
+ * Flags for the userfaultfd(2) system call itself.
+ */
+
+/*
+ * Create a userfaultfd that can handle page faults only in user mode.
+ */
+#define UFFD_USER_MODE_ONLY 1
+
#endif /* _LINUX_USERFAULTFD_H */