summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2018-01-11 16:16:10 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2018-01-11 16:16:12 +1100
commite130bc1d00a4a8597f18c31f2e75cd77a09fb7b1 (patch)
treec34476e5afc3d29e9b38373de097faa9597753b8 /include
parente2d7fe89e8ae9ee56c560095b4fe8042c3e347cd (diff)
parent70286688e5ad172ebd9cd1fd468fd54361619479 (diff)
Merge branch 'akpm-current/current'
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/bitops/find.h20
-rw-r--r--include/asm-generic/bug.h1
-rw-r--r--include/asm-generic/pgtable.h25
-rw-r--r--include/linux/bitmap.h48
-rw-r--r--include/linux/cgroup-defs.h5
-rw-r--r--include/linux/compiler-clang.h8
-rw-r--r--include/linux/compiler-gcc.h15
-rw-r--r--include/linux/compiler.h5
-rw-r--r--include/linux/cpumask.h2
-rw-r--r--include/linux/crash_core.h2
-rw-r--r--include/linux/hugetlb.h20
-rw-r--r--include/linux/kallsyms.h2
-rw-r--r--include/linux/kasan.h13
-rw-r--r--include/linux/list_lru.h3
-rw-r--r--include/linux/memcontrol.h199
-rw-r--r--include/linux/memory_hotplug.h3
-rw-r--r--include/linux/migrate.h7
-rw-r--r--include/linux/mm.h27
-rw-r--r--include/linux/mm_types.h154
-rw-r--r--include/linux/mmu_notifier.h22
-rw-r--r--include/linux/mmzone.h2
-rw-r--r--include/linux/oom.h12
-rw-r--r--include/linux/page-isolation.h3
-rw-r--r--include/linux/pagevec.h6
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/sched/mm.h24
-rw-r--r--include/linux/sched/signal.h3
-rw-r--r--include/linux/shmem_fs.h6
-rw-r--r--include/linux/shrinker.h2
-rw-r--r--include/linux/swap.h4
-rw-r--r--include/linux/uuid.h1
-rw-r--r--include/linux/vmstat.h17
-rw-r--r--include/trace/events/vmscan.h23
-rw-r--r--include/uapi/asm-generic/mman-common.h3
-rw-r--r--include/uapi/linux/prctl.h4
-rw-r--r--include/uapi/linux/sysctl.h4
-rw-r--r--include/uapi/linux/uuid.h1
37 files changed, 437 insertions, 264 deletions
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h
index 1ba611e16fa0..8a1ee10014de 100644
--- a/include/asm-generic/bitops/find.h
+++ b/include/asm-generic/bitops/find.h
@@ -16,6 +16,22 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
size, unsigned long offset);
#endif
+#ifndef find_next_and_bit
+/**
+ * find_next_and_bit - find the next set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_next_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size,
+ unsigned long offset);
+#endif
+
#ifndef find_next_zero_bit
/**
* find_next_zero_bit - find the next cleared bit in a memory region
@@ -55,8 +71,12 @@ extern unsigned long find_first_zero_bit(const unsigned long *addr,
unsigned long size);
#else /* CONFIG_GENERIC_FIND_FIRST_BIT */
+#ifndef find_first_bit
#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+#endif
+#ifndef find_first_zero_bit
#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
+#endif
#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 963b755d19b0..a7613e1b0c87 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -52,6 +52,7 @@ struct bug_entry {
#ifndef HAVE_ARCH_BUG
#define BUG() do { \
printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
+ barrier_before_unreachable(); \
panic("BUG!"); \
} while (0)
#endif
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 868e68561f91..2cfa3075d148 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -309,19 +309,26 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#endif
-#ifndef __HAVE_ARCH_PMDP_INVALIDATE
-extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp);
-#endif
-
-#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE
-static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * This is an implementation of pmdp_establish() that is only suitable for an
+ * architecture that doesn't have hardware dirty/accessed bits. In this case we
+ * can't race with CPU which sets these bits and non-atomic aproach is fine.
+ */
+static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp, pmd_t pmd)
{
-
+ pmd_t old_pmd = *pmdp;
+ set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+ return old_pmd;
}
#endif
+#ifndef __HAVE_ARCH_PMDP_INVALIDATE
+extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp);
+#endif
+
#ifndef __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 3489253e38fc..a17b5121d4f5 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -64,8 +64,8 @@
* bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region
* bitmap_release_region(bitmap, pos, order) Free specified bit region
* bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region
- * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words)
- * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words)
+ * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst
+ * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst
*
*/
@@ -83,8 +83,12 @@
* test_and_change_bit(bit, addr) Change bit and return old value
* find_first_zero_bit(addr, nbits) Position first zero bit in *addr
* find_first_bit(addr, nbits) Position first set bit in *addr
- * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit
+ * find_next_zero_bit(addr, nbits, bit)
+ * Position next zero bit in *addr >= bit
* find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit
+ * find_next_and_bit(addr1, addr2, nbits, bit)
+ * Same as find_next_bit, but in
+ * (*addr1 & *addr2)
*
*/
@@ -174,14 +178,7 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
-extern unsigned int bitmap_from_u32array(unsigned long *bitmap,
- unsigned int nbits,
- const u32 *buf,
- unsigned int nwords);
-extern unsigned int bitmap_to_u32array(u32 *buf,
- unsigned int nwords,
- const unsigned long *bitmap,
- unsigned int nbits);
+
#ifdef __BIG_ENDIAN
extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits);
#else
@@ -228,6 +225,35 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
}
}
+/*
+ * Copy bitmap and clear tail bits in last word.
+ */
+static inline void bitmap_copy_safe(unsigned long *dst,
+ const unsigned long *src, unsigned int nbits)
+{
+ bitmap_copy(dst, src, nbits);
+ if (nbits % BITS_PER_LONG)
+ dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits);
+}
+
+/*
+ * On 32-bit systems bitmaps are represented as u32 arrays internally, and
+ * therefore conversion is not needed when copying data from/to arrays of u32.
+ */
+#if BITS_PER_LONG == 64
+extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf,
+ unsigned int nbits);
+extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap,
+ unsigned int nbits);
+#else
+#define bitmap_from_arr32(bitmap, buf, nbits) \
+ bitmap_copy_safe((unsigned long *) (bitmap), \
+ (const unsigned long *) (buf), (nbits))
+#define bitmap_to_arr32(buf, bitmap, nbits) \
+ bitmap_copy_safe((unsigned long *) (buf), \
+ (const unsigned long *) (bitmap), (nbits))
+#endif
+
static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 9f242b876fde..8e12d7c54308 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -81,6 +81,11 @@ enum {
* Enable cpuset controller in v1 cgroup to use v2 behavior.
*/
CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
+
+ /*
+ * Enable cgroup-aware OOM killer.
+ */
+ CGRP_GROUP_OOM = (1 << 5),
};
/* cftype->flags */
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 3b609edffa8f..d02a4df3f473 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -19,3 +19,11 @@
#define randomized_struct_fields_start struct {
#define randomized_struct_fields_end };
+
+/* all clang versions usable with the kernel support KASAN ABI version 5 */
+#define KASAN_ABI_VERSION 5
+
+/* emulate gcc's __SANITIZE_ADDRESS__ flag */
+#if __has_feature(address_sanitizer)
+#define __SANITIZE_ADDRESS__
+#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 2272ded07496..e6aba21452ec 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -205,6 +205,15 @@
#endif
/*
+ * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
+ * confuse the stack allocation in gcc, leading to overly large stack
+ * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365
+ *
+ * Adding an empty inline assembly before it works around the problem
+ */
+#define barrier_before_unreachable() asm volatile("")
+
+/*
* Mark a position in code as unreachable. This can be used to
* suppress control flow warnings after asm blocks that transfer
* control elsewhere.
@@ -214,7 +223,11 @@
* unreleased. Really, we need to have autoconf for the kernel.
*/
#define unreachable() \
- do { annotate_unreachable(); __builtin_unreachable(); } while (0)
+ do { \
+ annotate_unreachable(); \
+ barrier_before_unreachable(); \
+ __builtin_unreachable(); \
+ } while (0)
/* Mark a function definition as prohibited from being cloned. */
#define __noclone __attribute__((__noclone__, __optimize__("no-tracer")))
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 52e611ab9a6c..97847f2f86cf 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -86,6 +86,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
# define barrier_data(ptr) barrier()
#endif
+/* workaround for GCC PR82365 if needed */
+#ifndef barrier_before_unreachable
+# define barrier_before_unreachable() do { } while (0)
+#endif
+
/* Unreachable code */
#ifdef CONFIG_STACK_VALIDATION
/*
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 75b565194437..d4a2a7dcd72d 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -640,7 +640,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
/**
* cpumask_size - size to allocate for a 'struct cpumask' in bytes
*/
-static inline size_t cpumask_size(void)
+static inline unsigned int cpumask_size(void)
{
return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long);
}
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 06097ef30449..8ad0a70f4b7d 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -57,6 +57,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
#define VMCOREINFO_CONFIG(name) \
vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+#define VMCOREINFO_PHYS_BASE(value) \
+ vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value)
extern u32 *vmcoreinfo_note;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 82a25880714a..612a29b7f6c6 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -119,6 +119,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
bool isolate_huge_page(struct page *page, struct list_head *list);
void putback_active_hugepage(struct page *page);
+void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
void free_huge_page(struct page *page);
void hugetlb_fix_reserve_counts(struct inode *inode);
extern struct mutex *hugetlb_fault_mutex_table;
@@ -129,7 +130,6 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
-extern int hugepages_treat_as_movable;
extern int sysctl_hugetlb_shm_group;
extern struct list_head huge_boot_pages;
@@ -158,6 +158,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot);
bool is_hugetlb_entry_migration(pte_t pte);
+
#else /* !CONFIG_HUGETLB_PAGE */
static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
@@ -198,6 +199,7 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list)
return false;
}
#define putback_active_hugepage(p) do {} while (0)
+#define move_hugetlb_state(old, new, reason) do {} while (0)
static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot)
@@ -271,6 +273,17 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
return sb->s_fs_info;
}
+struct hugetlbfs_inode_info {
+ struct shared_policy policy;
+ struct inode vfs_inode;
+ unsigned int seals;
+};
+
+static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
+{
+ return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
+}
+
extern const struct file_operations hugetlbfs_file_operations;
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
@@ -343,10 +356,9 @@ struct huge_bootmem_page {
struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
struct page *alloc_huge_page_node(struct hstate *h, int nid);
-struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
- unsigned long addr, int avoid_reserve);
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask);
+struct page *alloc_huge_page_vma(struct vm_area_struct *vma, unsigned long address);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);
@@ -524,7 +536,7 @@ struct hstate {};
#define alloc_huge_page(v, a, r) NULL
#define alloc_huge_page_node(h, nid) NULL
#define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL
-#define alloc_huge_page_noerr(v, a, r) NULL
+#define alloc_huge_page_vma(vma, address) NULL
#define alloc_bootmem_huge_page(h) NULL
#define hstate_file(f) NULL
#define hstate_sizelog(s) NULL
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 7288f9c395b6..23190e5c940b 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -185,7 +185,7 @@ static inline void print_symbol(const char *fmt, unsigned long addr)
static inline void print_ip_sym(unsigned long ip)
{
- printk("[<%p>] %pS\n", (void *) ip, (void *) ip);
+ printk("[<%px>] %pS\n", (void *) ip, (void *) ip);
}
#endif /*_LINUX_KALLSYMS_H*/
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index e3eb834c9a35..fc45f8952d1e 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -56,14 +56,14 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object);
void kasan_init_slab_obj(struct kmem_cache *cache, const void *object);
void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
-void kasan_kfree_large(const void *ptr);
-void kasan_poison_kfree(void *ptr);
+void kasan_kfree_large(void *ptr, unsigned long ip);
+void kasan_poison_kfree(void *ptr, unsigned long ip);
void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
gfp_t flags);
void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);
void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
-bool kasan_slab_free(struct kmem_cache *s, void *object);
+bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip);
struct kasan_cache {
int alloc_meta_offset;
@@ -108,8 +108,8 @@ static inline void kasan_init_slab_obj(struct kmem_cache *cache,
const void *object) {}
static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
-static inline void kasan_kfree_large(const void *ptr) {}
-static inline void kasan_poison_kfree(void *ptr) {}
+static inline void kasan_kfree_large(void *ptr, unsigned long ip) {}
+static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {}
static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
size_t size, gfp_t flags) {}
static inline void kasan_krealloc(const void *object, size_t new_size,
@@ -117,7 +117,8 @@ static inline void kasan_krealloc(const void *object, size_t new_size,
static inline void kasan_slab_alloc(struct kmem_cache *s, void *object,
gfp_t flags) {}
-static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
+static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
+ unsigned long ip)
{
return false;
}
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index bb8129a3474d..96def9d15b1b 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -32,6 +32,7 @@ struct list_lru_one {
};
struct list_lru_memcg {
+ struct rcu_head rcu;
/* array of per cgroup lists, indexed by memcg_cache_id */
struct list_lru_one *lru[0];
};
@@ -43,7 +44,7 @@ struct list_lru_node {
struct list_lru_one lru;
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
/* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
- struct list_lru_memcg *memcg_lrus;
+ struct list_lru_memcg __rcu *memcg_lrus;
#endif
long nr_items;
} ____cacheline_aligned_in_smp;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 69966c461d1c..7b8bcdf6571d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,6 +35,7 @@ struct mem_cgroup;
struct page;
struct mm_struct;
struct kmem_cache;
+struct oom_control;
/* Cgroup-specific page state, on top of universal node page state */
enum memcg_stat_item {
@@ -108,7 +109,10 @@ struct lruvec_stat {
*/
struct mem_cgroup_per_node {
struct lruvec lruvec;
- struct lruvec_stat __percpu *lruvec_stat;
+
+ struct lruvec_stat __percpu *lruvec_stat_cpu;
+ atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS];
+
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
@@ -199,6 +203,13 @@ struct mem_cgroup {
/* OOM-Killer disable */
int oom_kill_disable;
+ /*
+ * Treat the sub-tree as an indivisible memory consumer,
+ * kill all belonging tasks if the memory cgroup selected
+ * as OOM victim.
+ */
+ bool oom_group;
+
/* handle for "memory.events" */
struct cgroup_file events_file;
@@ -227,10 +238,10 @@ struct mem_cgroup {
spinlock_t move_lock;
struct task_struct *move_lock_task;
unsigned long move_lock_flags;
- /*
- * percpu counter.
- */
- struct mem_cgroup_stat_cpu __percpu *stat;
+
+ struct mem_cgroup_stat_cpu __percpu *stat_cpu;
+ atomic_long_t stat[MEMCG_NR_STAT];
+ atomic_long_t events[MEMCG_NR_EVENTS];
unsigned long socket_pressure;
@@ -265,6 +276,12 @@ struct mem_cgroup {
/* WARNING: nodeinfo must be the last member here */
};
+/*
+ * size of first charge trial. "32" comes from vmscan.c's magic value.
+ * TODO: maybe necessary to use big numbers in big irons.
+ */
+#define MEMCG_CHARGE_BATCH 32U
+
extern struct mem_cgroup *root_mem_cgroup;
static inline bool mem_cgroup_disabled(void)
@@ -272,13 +289,6 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
- enum memcg_event_item event)
-{
- this_cpu_inc(memcg->stat->events[event]);
- cgroup_file_notify(&memcg->events_file);
-}
-
bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
@@ -342,6 +352,11 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
}
+static inline void mem_cgroup_put(struct mem_cgroup *memcg)
+{
+ css_put(&memcg->css);
+}
+
#define mem_cgroup_from_counter(counter, member) \
container_of(counter, struct mem_cgroup, member)
@@ -480,6 +495,13 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
bool mem_cgroup_oom_synchronize(bool wait);
+bool mem_cgroup_select_oom_victim(struct oom_control *oc);
+
+static inline bool mem_cgroup_oom_group(struct mem_cgroup *memcg)
+{
+ return memcg->oom_group;
+}
+
#ifdef CONFIG_MEMCG_SWAP
extern int do_swap_account;
#endif
@@ -492,32 +514,38 @@ void unlock_page_memcg(struct page *page);
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
int idx)
{
- long val = 0;
- int cpu;
-
- for_each_possible_cpu(cpu)
- val += per_cpu(memcg->stat->count[idx], cpu);
-
- if (val < 0)
- val = 0;
-
- return val;
+ long x = atomic_long_read(&memcg->stat[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
int idx, int val)
{
- if (!mem_cgroup_disabled())
- __this_cpu_add(memcg->stat->count[idx], val);
+ long x;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
+ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
+ atomic_long_add(x, &memcg->stat[idx]);
+ x = 0;
+ }
+ __this_cpu_write(memcg->stat_cpu->count[idx], x);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
static inline void mod_memcg_state(struct mem_cgroup *memcg,
int idx, int val)
{
- if (!mem_cgroup_disabled())
- this_cpu_add(memcg->stat->count[idx], val);
+ preempt_disable();
+ __mod_memcg_state(memcg, idx, val);
+ preempt_enable();
}
/**
@@ -555,87 +583,108 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
struct mem_cgroup_per_node *pn;
- long val = 0;
- int cpu;
+ long x;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- for_each_possible_cpu(cpu)
- val += per_cpu(pn->lruvec_stat->count[idx], cpu);
-
- if (val < 0)
- val = 0;
-
- return val;
+ x = atomic_long_read(&pn->lruvec_stat[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
}
static inline void __mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
struct mem_cgroup_per_node *pn;
+ long x;
+ /* Update node */
__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+
if (mem_cgroup_disabled())
return;
+
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+
+ /* Update memcg */
__mod_memcg_state(pn->memcg, idx, val);
- __this_cpu_add(pn->lruvec_stat->count[idx], val);
+
+ /* Update lruvec */
+ x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
+ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
+ atomic_long_add(x, &pn->lruvec_stat[idx]);
+ x = 0;
+ }
+ __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
}
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
- struct mem_cgroup_per_node *pn;
-
- mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
- if (mem_cgroup_disabled())
- return;
- pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- mod_memcg_state(pn->memcg, idx, val);
- this_cpu_add(pn->lruvec_stat->count[idx], val);
+ preempt_disable();
+ __mod_lruvec_state(lruvec, idx, val);
+ preempt_enable();
}
static inline void __mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
- struct mem_cgroup_per_node *pn;
+ pg_data_t *pgdat = page_pgdat(page);
+ struct lruvec *lruvec;
- __mod_node_page_state(page_pgdat(page), idx, val);
- if (mem_cgroup_disabled() || !page->mem_cgroup)
+ /* Untracked pages have no memcg, no lruvec. Update only the node */
+ if (!page->mem_cgroup) {
+ __mod_node_page_state(pgdat, idx, val);
return;
- __mod_memcg_state(page->mem_cgroup, idx, val);
- pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
- __this_cpu_add(pn->lruvec_stat->count[idx], val);
+ }
+
+ lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup);
+ __mod_lruvec_state(lruvec, idx, val);
}
static inline void mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
- struct mem_cgroup_per_node *pn;
-
- mod_node_page_state(page_pgdat(page), idx, val);
- if (mem_cgroup_disabled() || !page->mem_cgroup)
- return;
- mod_memcg_state(page->mem_cgroup, idx, val);
- pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
- this_cpu_add(pn->lruvec_stat->count[idx], val);
+ preempt_disable();
+ __mod_lruvec_page_state(page, idx, val);
+ preempt_enable();
}
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
+/* idx can be of type enum memcg_event_item or vm_event_item */
+static inline void __count_memcg_events(struct mem_cgroup *memcg,
+ int idx, unsigned long count)
+{
+ unsigned long x;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ x = count + __this_cpu_read(memcg->stat_cpu->events[idx]);
+ if (unlikely(x > MEMCG_CHARGE_BATCH)) {
+ atomic_long_add(x, &memcg->events[idx]);
+ x = 0;
+ }
+ __this_cpu_write(memcg->stat_cpu->events[idx], x);
+}
+
static inline void count_memcg_events(struct mem_cgroup *memcg,
- enum vm_event_item idx,
- unsigned long count)
+ int idx, unsigned long count)
{
- if (!mem_cgroup_disabled())
- this_cpu_add(memcg->stat->events[idx], count);
+ preempt_disable();
+ __count_memcg_events(memcg, idx, count);
+ preempt_enable();
}
-/* idx can be of type enum memcg_stat_item or node_stat_item */
+/* idx can be of type enum memcg_event_item or vm_event_item */
static inline void count_memcg_page_event(struct page *page,
int idx)
{
@@ -654,12 +703,20 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
rcu_read_lock();
memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
if (likely(memcg)) {
- this_cpu_inc(memcg->stat->events[idx]);
+ count_memcg_events(memcg, idx, 1);
if (idx == OOM_KILL)
cgroup_file_notify(&memcg->events_file);
}
rcu_read_unlock();
}
+
+static inline void mem_cgroup_event(struct mem_cgroup *memcg,
+ enum memcg_event_item event)
+{
+ count_memcg_events(memcg, event, 1);
+ cgroup_file_notify(&memcg->events_file);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void mem_cgroup_split_huge_fixup(struct page *head);
#endif
@@ -744,6 +801,10 @@ static inline bool task_in_mem_cgroup(struct task_struct *task,
return true;
}
+static inline void mem_cgroup_put(struct mem_cgroup *memcg)
+{
+}
+
static inline struct mem_cgroup *
mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
@@ -936,6 +997,16 @@ static inline
void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
{
}
+
+static inline bool mem_cgroup_select_oom_victim(struct oom_control *oc)
+{
+ return false;
+}
+
+static inline bool mem_cgroup_oom_group(struct mem_cgroup *memcg)
+{
+ return false;
+}
#endif /* CONFIG_MEMCG */
/* idx can be of type enum memcg_stat_item or node_stat_item */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 58e110aee7ab..703069a73c61 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -231,9 +231,6 @@ void put_online_mems(void);
void mem_hotplug_begin(void);
void mem_hotplug_done(void);
-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
#else /* ! CONFIG_MEMORY_HOTPLUG */
#define pfn_to_online_page(pfn) \
({ \
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index a2246cf670ba..0c6fe904bc97 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,8 +7,7 @@
#include <linux/migrate_mode.h>
#include <linux/hugetlb.h>
-typedef struct page *new_page_t(struct page *page, unsigned long private,
- int **reason);
+typedef struct page *new_page_t(struct page *page, unsigned long private);
typedef void free_page_t(struct page *page, unsigned long private);
/*
@@ -43,9 +42,9 @@ static inline struct page *new_page_nodemask(struct page *page,
return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
preferred_nid, nodemask);
- if (thp_migration_supported() && PageTransHuge(page)) {
- order = HPAGE_PMD_ORDER;
+ if (PageTransHuge(page)) {
gfp_mask |= GFP_TRANSHUGE;
+ order = HPAGE_PMD_ORDER;
}
if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ea818ff739cd..63f7ba111f64 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1312,8 +1312,6 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma);
-void unmap_mapping_range(struct address_space *mapping,
- loff_t const holebegin, loff_t const holelen, int even_cows);
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
unsigned long *start, unsigned long *end,
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
@@ -1324,12 +1322,6 @@ int follow_phys(struct vm_area_struct *vma, unsigned long address,
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
-static inline void unmap_shared_mapping_range(struct address_space *mapping,
- loff_t const holebegin, loff_t const holelen)
-{
- unmap_mapping_range(mapping, holebegin, holelen, 0);
-}
-
extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize);
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
@@ -1344,6 +1336,10 @@ extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked);
+void unmap_mapping_pages(struct address_space *mapping,
+ pgoff_t start, pgoff_t nr, bool even_cows);
+void unmap_mapping_range(struct address_space *mapping,
+ loff_t const holebegin, loff_t const holelen, int even_cows);
#else
static inline int handle_mm_fault(struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -1360,10 +1356,20 @@ static inline int fixup_user_fault(struct task_struct *tsk,
BUG();
return -EFAULT;
}
+static inline void unmap_mapping_pages(struct address_space *mapping,
+ pgoff_t start, pgoff_t nr, bool even_cows) { }
+static inline void unmap_mapping_range(struct address_space *mapping,
+ loff_t const holebegin, loff_t const holelen, int even_cows) { }
#endif
-extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
- unsigned int gup_flags);
+static inline void unmap_shared_mapping_range(struct address_space *mapping,
+ loff_t const holebegin, loff_t const holelen)
+{
+ unmap_mapping_range(mapping, holebegin, holelen, 0);
+}
+
+extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
@@ -2090,6 +2096,7 @@ extern void setup_per_cpu_pageset(void);
extern void zone_pcp_update(struct zone *zone);
extern void zone_pcp_reset(struct zone *zone);
+extern void setup_zone_pageset(struct zone *zone);
/* page_alloc.c */
extern int min_free_kbytes;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index cfd0ac4e5e0e..fd1af6b9591d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -31,28 +31,56 @@ struct hmm;
* it to keep track of whatever it is we are using the page for at the
* moment. Note that we have no way to track which tasks are using
* a page, though if it is a pagecache page, rmap structures can tell us
- * who is mapping it.
+ * who is mapping it. If you allocate the page using alloc_pages(), you
+ * can use some of the space in struct page for your own purposes.
*
- * The objects in struct page are organized in double word blocks in
- * order to allows us to use atomic double word operations on portions
- * of struct page. That is currently only used by slub but the arrangement
- * allows the use of atomic double word operations on the flags/mapping
- * and lru list pointers also.
+ * Pages that were once in the page cache may be found under the RCU lock
+ * even after they have been recycled to a different purpose. The page
+ * cache reads and writes some of the fields in struct page to pin the
+ * page before checking that it's still in the page cache. It is vital
+ * that all users of struct page:
+ * 1. Use the first word as PageFlags.
+ * 2. Clear or preserve bit 0 of page->compound_head. It is used as
+ * PageTail for compound pages, and the page cache must not see false
+ * positives. Some users put a pointer here (guaranteed to be at least
+ * 4-byte aligned), other users avoid using the field altogether.
+ * 3. page->_refcount must either not be used, or must be used in such a
+ * way that other CPUs temporarily incrementing and then decrementing the
+ * refcount does not cause problems. On receiving the page from
+ * alloc_pages(), the refcount will be positive.
+ * 4. Either preserve page->_mapcount or restore it to -1 before freeing it.
+ *
+ * If you allocate pages of order > 0, you can use the fields in the struct
+ * page associated with each page, but bear in mind that the pages may have
+ * been inserted individually into the page cache, so you must use the above
+ * four fields in a compatible way for each struct page.
+ *
+ * SLUB uses cmpxchg_double() to atomically update its freelist and
+ * counters. That requires that freelist & counters be adjacent and
+ * double-word aligned. We align all struct pages to double-word
+ * boundaries, and ensure that 'freelist' is aligned within the
+ * struct.
*/
+#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
+#define _struct_page_alignment __aligned(2 * sizeof(unsigned long))
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
+#define _slub_counter_t unsigned long
+#else
+#define _slub_counter_t unsigned int
+#endif
+#else /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */
+#define _struct_page_alignment
+#define _slub_counter_t unsigned int
+#endif /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */
+
struct page {
/* First double word block */
unsigned long flags; /* Atomic flags, some possibly
* updated asynchronously */
union {
- struct address_space *mapping; /* If low bit clear, points to
- * inode address_space, or NULL.
- * If page mapped as anonymous
- * memory, low bit is set, and
- * it points to anon_vma object
- * or KSM private structure. See
- * PAGE_MAPPING_ANON and
- * PAGE_MAPPING_KSM.
- */
+ /* See page-flags.h for the definition of PAGE_MAPPING_FLAGS */
+ struct address_space *mapping;
+
void *s_mem; /* slab first object */
atomic_t compound_mapcount; /* first tail page */
/* page_deferred_list().next -- second tail page */
@@ -66,40 +94,27 @@ struct page {
};
union {
-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
- defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
- /* Used for cmpxchg_double in slub */
- unsigned long counters;
-#else
- /*
- * Keep _refcount separate from slub cmpxchg_double data.
- * As the rest of the double word is protected by slab_lock
- * but _refcount is not.
- */
- unsigned counters;
-#endif
- struct {
+ _slub_counter_t counters;
+ unsigned int active; /* SLAB */
+ struct { /* SLUB */
+ unsigned inuse:16;
+ unsigned objects:15;
+ unsigned frozen:1;
+ };
+ int units; /* SLOB */
+
+ struct { /* Page cache */
+ /*
+ * Count of ptes mapped in mms, to show when
+ * page is mapped & limit reverse map searches.
+ *
+ * Extra information about page type may be
+ * stored here for pages that are never mapped,
+ * in which case the value MUST BE <= -2.
+ * See page-flags.h for more details.
+ */
+ atomic_t _mapcount;
- union {
- /*
- * Count of ptes mapped in mms, to show when
- * page is mapped & limit reverse map searches.
- *
- * Extra information about page type may be
- * stored here for pages that are never mapped,
- * in which case the value MUST BE <= -2.
- * See page-flags.h for more details.
- */
- atomic_t _mapcount;
-
- unsigned int active; /* SLAB */
- struct { /* SLUB */
- unsigned inuse:16;
- unsigned objects:15;
- unsigned frozen:1;
- };
- int units; /* SLOB */
- };
/*
* Usage count, *USE WRAPPER FUNCTION* when manual
* accounting. See page_ref.h
@@ -109,8 +124,6 @@ struct page {
};
/*
- * Third double word block
- *
* WARNING: bit 0 of the first word encode PageTail(). That means
* the rest users of the storage space MUST NOT use the bit to
* avoid collision and false-positive PageTail().
@@ -145,19 +158,9 @@ struct page {
unsigned long compound_head; /* If bit zero is set */
/* First tail page only */
-#ifdef CONFIG_64BIT
- /*
- * On 64 bit system we have enough space in struct page
- * to encode compound_dtor and compound_order with
- * unsigned int. It can help compiler generate better or
- * smaller code on some archtectures.
- */
- unsigned int compound_dtor;
- unsigned int compound_order;
-#else
- unsigned short int compound_dtor;
- unsigned short int compound_order;
-#endif
+ unsigned char compound_dtor;
+ unsigned char compound_order;
+ /* two/six bytes available here */
};
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS
@@ -171,15 +174,14 @@ struct page {
#endif
};
- /* Remainder is not double word aligned */
union {
- unsigned long private; /* Mapping-private opaque data:
- * usually used for buffer_heads
- * if PagePrivate set; used for
- * swp_entry_t if PageSwapCache;
- * indicates order in the buddy
- * system if PG_buddy is set.
- */
+ /*
+ * Mapping-private opaque data:
+ * Usually used for buffer_heads if PagePrivate
+ * Used for swp_entry_t if PageSwapCache
+ * Indicates order in the buddy system if PageBuddy
+ */
+ unsigned long private;
#if USE_SPLIT_PTE_PTLOCKS
#if ALLOC_SPLIT_PTLOCKS
spinlock_t *ptl;
@@ -212,15 +214,7 @@ struct page {
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
-}
-/*
- * The struct page can be forced to be double word aligned so that atomic ops
- * on double words work. The SLUB allocator can make use of such a feature.
- */
-#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
- __aligned(2 * sizeof(unsigned long))
-#endif
-;
+} _struct_page_alignment;
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b25dc9db19fc..2c6de02a34ae 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_MMU_NOTIFIER_H
#define _LINUX_MMU_NOTIFIER_H
+#include <linux/types.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mm_types.h>
@@ -10,6 +11,9 @@
struct mmu_notifier;
struct mmu_notifier_ops;
+/* mmu_notifier_ops flags */
+#define MMU_INVALIDATE_DOES_NOT_BLOCK (0x01)
+
#ifdef CONFIG_MMU_NOTIFIER
/*
@@ -27,6 +31,15 @@ struct mmu_notifier_mm {
struct mmu_notifier_ops {
/*
+ * Flags to specify behavior of callbacks for this MMU notifier.
+ * Used to determine which context an operation may be called.
+ *
+ * MMU_INVALIDATE_DOES_NOT_BLOCK: invalidate_{start,end} does not
+ * block
+ */
+ int flags;
+
+ /*
* Called either by mmu_notifier_unregister or when the mm is
* being destroyed by exit_mmap, always before all pages are
* freed. This can run concurrently with other mmu notifier
@@ -137,6 +150,9 @@ struct mmu_notifier_ops {
* page. Pages will no longer be referenced by the linux
* address space but may still be referenced by sptes until
* the last refcount is dropped.
+ *
+ * If both of these callbacks cannot block, mmu_notifier_ops.flags
+ * should have MMU_INVALIDATE_DOES_NOT_BLOCK set.
*/
void (*invalidate_range_start)(struct mmu_notifier *mn,
struct mm_struct *mm,
@@ -218,6 +234,7 @@ extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
bool only_end);
extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
unsigned long start, unsigned long end);
+extern bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm);
static inline void mmu_notifier_release(struct mm_struct *mm)
{
@@ -457,6 +474,11 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
{
}
+static inline bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm)
+{
+ return false;
+}
+
static inline void mmu_notifier_mm_init(struct mm_struct *mm)
{
}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 67f2e3c38939..24fcbce6e267 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -886,7 +886,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
+extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5bad038ac012..d4d41c01a74d 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -10,6 +10,13 @@
#include <linux/sched/coredump.h> /* MMF_* */
#include <linux/mm.h> /* VM_FAULT* */
+
+/*
+ * Special value returned by victim selection functions to indicate
+ * that are inflight OOM victims.
+ */
+#define INFLIGHT_VICTIM ((void *)-1UL)
+
struct zonelist;
struct notifier_block;
struct mem_cgroup;
@@ -40,7 +47,8 @@ struct oom_control {
/* Used by oom implementation, do not set */
unsigned long totalpages;
- struct task_struct *chosen;
+ struct task_struct *chosen_task;
+ struct mem_cgroup *chosen_memcg;
unsigned long chosen_points;
};
@@ -111,6 +119,8 @@ extern void oom_killer_enable(void);
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
+extern int oom_evaluate_task(struct task_struct *task, void *arg);
+
/* sysctls */
extern int sysctl_oom_dump_tasks;
extern int sysctl_oom_kill_allocating_task;
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index cdad58bbfd8b..4ae347cbc36d 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -63,7 +63,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
bool skip_hwpoisoned_pages);
-struct page *alloc_migrate_target(struct page *page, unsigned long private,
- int **resultp);
+struct page *alloc_migrate_target(struct page *page, unsigned long private);
#endif
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 5fb6580f7f23..6dc456ac6136 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -9,14 +9,14 @@
#ifndef _LINUX_PAGEVEC_H
#define _LINUX_PAGEVEC_H
-/* 14 pointers + two long's align the pagevec structure to a power of two */
-#define PAGEVEC_SIZE 14
+/* 15 pointers + header align the pagevec structure to a power of two */
+#define PAGEVEC_SIZE 15
struct page;
struct address_space;
struct pagevec {
- unsigned long nr;
+ unsigned char nr;
bool percpu_pvec_drained;
struct page *pages[PAGEVEC_SIZE];
};
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 166144c04ef6..ce5a27304b03 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1489,6 +1489,11 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
extern struct task_struct *find_task_by_vpid(pid_t nr);
extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns);
+/*
+ * find a task by its virtual pid and get the task struct
+ */
+extern struct task_struct *find_get_task_by_vpid(pid_t nr);
+
extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
extern void wake_up_new_task(struct task_struct *tsk);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 3d49b91b674d..bd422561a75e 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -11,7 +11,7 @@
/*
* Routines for handling mm_structs
*/
-extern struct mm_struct * mm_alloc(void);
+extern struct mm_struct *mm_alloc(void);
/**
* mmgrab() - Pin a &struct mm_struct.
@@ -35,27 +35,7 @@ static inline void mmgrab(struct mm_struct *mm)
atomic_inc(&mm->mm_count);
}
-/* mmdrop drops the mm and the page tables */
-extern void __mmdrop(struct mm_struct *);
-static inline void mmdrop(struct mm_struct *mm)
-{
- if (unlikely(atomic_dec_and_test(&mm->mm_count)))
- __mmdrop(mm);
-}
-
-static inline void mmdrop_async_fn(struct work_struct *work)
-{
- struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
- __mmdrop(mm);
-}
-
-static inline void mmdrop_async(struct mm_struct *mm)
-{
- if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
- INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
- schedule_work(&mm->async_put_work);
- }
-}
+extern void mmdrop(struct mm_struct *mm);
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 0aa4548fb492..64d85fc65c41 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -104,6 +104,9 @@ struct signal_struct {
int group_stop_count;
unsigned int flags; /* see SIGNAL_* flags below */
+ /* The signal sent when the parent dies: */
+ int pdeath_signal_proc;
+
/*
* PR_SET_CHILD_SUBREAPER marks a process, like a service
* manager, to re-parent orphan (double-forking) child processes
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 06b295bec00d..73b5e655a76e 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -112,13 +112,11 @@ extern void shmem_uncharge(struct inode *inode, long pages);
#ifdef CONFIG_TMPFS
-extern int shmem_add_seals(struct file *file, unsigned int seals);
-extern int shmem_get_seals(struct file *file);
-extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
+extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
#else
-static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
+static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a)
{
return -EINVAL;
}
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 388ff2936a87..a3894918a436 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -75,6 +75,6 @@ struct shrinker {
#define SHRINKER_NUMA_AWARE (1 << 0)
#define SHRINKER_MEMCG_AWARE (1 << 1)
-extern int register_shrinker(struct shrinker *);
+extern __must_check int register_shrinker(struct shrinker *);
extern void unregister_shrinker(struct shrinker *);
#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c2b8128799c1..a1a3f4ed94ce 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -332,20 +332,16 @@ extern void mark_page_accessed(struct page *);
extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_all(void);
-extern void lru_add_drain_all_cpuslocked(void);
extern void rotate_reclaimable_page(struct page *page);
extern void deactivate_file_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
extern void swap_setup(void);
-extern void add_page_to_unevictable_list(struct page *page);
-
extern void lru_cache_add_active_or_unevictable(struct page *page,
struct vm_area_struct *vma);
/* linux/mm/vmscan.c */
extern unsigned long zone_reclaimable_pages(struct zone *zone);
-extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 33b0bdbb613c..d9c4a6cce3c2 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -17,6 +17,7 @@
#define _LINUX_UUID_H_
#include <uapi/linux/uuid.h>
+#include <linux/string.h>
#define UUID_SIZE 16
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 1779c9817b39..a4c2317d8b9f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -216,23 +216,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
return x;
}
-static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat,
- enum node_stat_item item)
-{
- long x = atomic_long_read(&pgdat->vm_stat[item]);
-
-#ifdef CONFIG_SMP
- int cpu;
- for_each_online_cpu(cpu)
- x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item];
-
- if (x < 0)
- x = 0;
-#endif
- return x;
-}
-
-
#ifdef CONFIG_NUMA
extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item);
extern unsigned long sum_zone_node_page_state(int node,
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index d70b53e65f43..e0b8b9173e1c 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -192,12 +192,12 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_re
TRACE_EVENT(mm_shrink_slab_start,
TP_PROTO(struct shrinker *shr, struct shrink_control *sc,
- long nr_objects_to_shrink, unsigned long pgs_scanned,
- unsigned long lru_pgs, unsigned long cache_items,
- unsigned long long delta, unsigned long total_scan),
+ long nr_objects_to_shrink, unsigned long cache_items,
+ unsigned long long delta, unsigned long total_scan,
+ int priority),
- TP_ARGS(shr, sc, nr_objects_to_shrink, pgs_scanned, lru_pgs,
- cache_items, delta, total_scan),
+ TP_ARGS(shr, sc, nr_objects_to_shrink, cache_items, delta, total_scan,
+ priority),
TP_STRUCT__entry(
__field(struct shrinker *, shr)
@@ -205,11 +205,10 @@ TRACE_EVENT(mm_shrink_slab_start,
__field(int, nid)
__field(long, nr_objects_to_shrink)
__field(gfp_t, gfp_flags)
- __field(unsigned long, pgs_scanned)
- __field(unsigned long, lru_pgs)
__field(unsigned long, cache_items)
__field(unsigned long long, delta)
__field(unsigned long, total_scan)
+ __field(int, priority)
),
TP_fast_assign(
@@ -218,24 +217,22 @@ TRACE_EVENT(mm_shrink_slab_start,
__entry->nid = sc->nid;
__entry->nr_objects_to_shrink = nr_objects_to_shrink;
__entry->gfp_flags = sc->gfp_mask;
- __entry->pgs_scanned = pgs_scanned;
- __entry->lru_pgs = lru_pgs;
__entry->cache_items = cache_items;
__entry->delta = delta;
__entry->total_scan = total_scan;
+ __entry->priority = priority;
),
- TP_printk("%pF %p: nid: %d objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld",
+ TP_printk("%pF %p: nid: %d objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d",
__entry->shrink,
__entry->shr,
__entry->nid,
__entry->nr_objects_to_shrink,
show_gfp_flags(__entry->gfp_flags),
- __entry->pgs_scanned,
- __entry->lru_pgs,
__entry->cache_items,
__entry->delta,
- __entry->total_scan)
+ __entry->total_scan,
+ __entry->priority)
);
TRACE_EVENT(mm_shrink_slab_end,
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index f8b134f5608f..e7ee32861d51 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -27,6 +27,9 @@
# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
#endif
+/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */
+#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
+
/*
* Flags for mlock
*/
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index af5f8c2df87a..3165863aa187 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -207,4 +207,8 @@ struct prctl_mm_map {
# define PR_SVE_VL_LEN_MASK 0xffff
# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
+/* Process-based variant of PDEATHSIG */
+#define PR_SET_PDEATHSIG_PROC 48
+#define PR_GET_PDEATHSIG_PROC 49
+
#endif /* _LINUX_PRCTL_H */
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 0f272818a4d2..be69135eb361 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -27,6 +27,10 @@
#include <linux/types.h>
#include <linux/compiler.h>
+#ifndef __KERNEL__
+#include <stddef.h> /* For size_t. */
+#endif
+
#define CTL_MAXNAME 10 /* how many path components do we allow in a
call to sysctl? In other words, what is
the largest acceptable value for the nlen
diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h
index 5c04130bb524..e5a7eecef7c3 100644
--- a/include/uapi/linux/uuid.h
+++ b/include/uapi/linux/uuid.h
@@ -19,7 +19,6 @@
#define _UAPI_LINUX_UUID_H_
#include <linux/types.h>
-#include <linux/string.h>
typedef struct {
__u8 b[16];