diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2018-01-11 16:16:10 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2018-01-11 16:16:12 +1100 |
commit | e130bc1d00a4a8597f18c31f2e75cd77a09fb7b1 (patch) | |
tree | c34476e5afc3d29e9b38373de097faa9597753b8 /include | |
parent | e2d7fe89e8ae9ee56c560095b4fe8042c3e347cd (diff) | |
parent | 70286688e5ad172ebd9cd1fd468fd54361619479 (diff) |
Merge branch 'akpm-current/current'
Diffstat (limited to 'include')
37 files changed, 437 insertions, 264 deletions
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 1ba611e16fa0..8a1ee10014de 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -16,6 +16,22 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); #endif +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +extern unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset); +#endif + #ifndef find_next_zero_bit /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -55,8 +71,12 @@ extern unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); #else /* CONFIG_GENERIC_FIND_FIRST_BIT */ +#ifndef find_first_bit #define find_first_bit(addr, size) find_next_bit((addr), (size), 0) +#endif +#ifndef find_first_zero_bit #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) +#endif #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 963b755d19b0..a7613e1b0c87 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -52,6 +52,7 @@ struct bug_entry { #ifndef HAVE_ARCH_BUG #define BUG() do { \ printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ + barrier_before_unreachable(); \ panic("BUG!"); \ } while (0) #endif diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 868e68561f91..2cfa3075d148 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -309,19 +309,26 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #endif -#ifndef __HAVE_ARCH_PMDP_INVALIDATE -extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp); -#endif - -#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE -static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* + * This is an implementation of pmdp_establish() that is only suitable for an + * architecture that doesn't have hardware dirty/accessed bits. In this case we + * can't race with CPU which sets these bits and non-atomic aproach is fine. + */ +static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, pmd_t pmd) { - + pmd_t old_pmd = *pmdp; + set_pmd_at(vma->vm_mm, address, pmdp, pmd); + return old_pmd; } #endif +#ifndef __HAVE_ARCH_PMDP_INVALIDATE +extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp); +#endif + #ifndef __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 3489253e38fc..a17b5121d4f5 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -64,8 +64,8 @@ * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region - * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words) - * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words) + * bitmap_from_arr32(dst, buf, nbits) Copy nbits from u32[] buf to dst + * bitmap_to_arr32(buf, src, nbits) Copy nbits from buf to u32[] dst * */ @@ -83,8 +83,12 @@ * test_and_change_bit(bit, addr) Change bit and return old value * find_first_zero_bit(addr, nbits) Position first zero bit in *addr * find_first_bit(addr, nbits) Position first set bit in *addr - * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit + * find_next_zero_bit(addr, nbits, bit) + * Position next zero bit in *addr >= bit * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit + * find_next_and_bit(addr1, addr2, nbits, bit) + * Same as find_next_bit, but in + * (*addr1 & *addr2) * */ @@ -174,14 +178,7 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -extern unsigned int bitmap_from_u32array(unsigned long *bitmap, - unsigned int nbits, - const u32 *buf, - unsigned int nwords); -extern unsigned int bitmap_to_u32array(u32 *buf, - unsigned int nwords, - const unsigned long *bitmap, - unsigned int nbits); + #ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); #else @@ -228,6 +225,35 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, } } +/* + * Copy bitmap and clear tail bits in last word. + */ +static inline void bitmap_copy_safe(unsigned long *dst, + const unsigned long *src, unsigned int nbits) +{ + bitmap_copy(dst, src, nbits); + if (nbits % BITS_PER_LONG) + dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); +} + +/* + * On 32-bit systems bitmaps are represented as u32 arrays internally, and + * therefore conversion is not needed when copying data from/to arrays of u32. + */ +#if BITS_PER_LONG == 64 +extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, + unsigned int nbits); +extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, + unsigned int nbits); +#else +#define bitmap_from_arr32(bitmap, buf, nbits) \ + bitmap_copy_safe((unsigned long *) (bitmap), \ + (const unsigned long *) (buf), (nbits)) +#define bitmap_to_arr32(buf, bitmap, nbits) \ + bitmap_copy_safe((unsigned long *) (buf), \ + (const unsigned long *) (bitmap), (nbits)) +#endif + static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 9f242b876fde..8e12d7c54308 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -81,6 +81,11 @@ enum { * Enable cpuset controller in v1 cgroup to use v2 behavior. */ CGRP_ROOT_CPUSET_V2_MODE = (1 << 4), + + /* + * Enable cgroup-aware OOM killer. + */ + CGRP_GROUP_OOM = (1 << 5), }; /* cftype->flags */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 3b609edffa8f..d02a4df3f473 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -19,3 +19,11 @@ #define randomized_struct_fields_start struct { #define randomized_struct_fields_end }; + +/* all clang versions usable with the kernel support KASAN ABI version 5 */ +#define KASAN_ABI_VERSION 5 + +/* emulate gcc's __SANITIZE_ADDRESS__ flag */ +#if __has_feature(address_sanitizer) +#define __SANITIZE_ADDRESS__ +#endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2272ded07496..e6aba21452ec 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -205,6 +205,15 @@ #endif /* + * calling noreturn functions, __builtin_unreachable() and __builtin_trap() + * confuse the stack allocation in gcc, leading to overly large stack + * frames, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 + * + * Adding an empty inline assembly before it works around the problem + */ +#define barrier_before_unreachable() asm volatile("") + +/* * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer * control elsewhere. @@ -214,7 +223,11 @@ * unreleased. Really, we need to have autoconf for the kernel. */ #define unreachable() \ - do { annotate_unreachable(); __builtin_unreachable(); } while (0) + do { \ + annotate_unreachable(); \ + barrier_before_unreachable(); \ + __builtin_unreachable(); \ + } while (0) /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 52e611ab9a6c..97847f2f86cf 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -86,6 +86,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define barrier_data(ptr) barrier() #endif +/* workaround for GCC PR82365 if needed */ +#ifndef barrier_before_unreachable +# define barrier_before_unreachable() do { } while (0) +#endif + /* Unreachable code */ #ifdef CONFIG_STACK_VALIDATION /* diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 75b565194437..d4a2a7dcd72d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -640,7 +640,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) /** * cpumask_size - size to allocate for a 'struct cpumask' in bytes */ -static inline size_t cpumask_size(void) +static inline unsigned int cpumask_size(void) { return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long); } diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 06097ef30449..8ad0a70f4b7d 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -57,6 +57,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) #define VMCOREINFO_CONFIG(name) \ vmcoreinfo_append_str("CONFIG_%s=y\n", #name) +#define VMCOREINFO_PHYS_BASE(value) \ + vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value) extern u32 *vmcoreinfo_note; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 82a25880714a..612a29b7f6c6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -119,6 +119,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); +void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; @@ -129,7 +130,6 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); -extern int hugepages_treat_as_movable; extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages; @@ -158,6 +158,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot); bool is_hugetlb_entry_migration(pte_t pte); + #else /* !CONFIG_HUGETLB_PAGE */ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) @@ -198,6 +199,7 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list) return false; } #define putback_active_hugepage(p) do {} while (0) +#define move_hugetlb_state(old, new, reason) do {} while (0) static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot) @@ -271,6 +273,17 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) return sb->s_fs_info; } +struct hugetlbfs_inode_info { + struct shared_policy policy; + struct inode vfs_inode; + unsigned int seals; +}; + +static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) +{ + return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); +} + extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, @@ -343,10 +356,9 @@ struct huge_bootmem_page { struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_node(struct hstate *h, int nid); -struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, - unsigned long addr, int avoid_reserve); struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask); +struct page *alloc_huge_page_vma(struct vm_area_struct *vma, unsigned long address); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); @@ -524,7 +536,7 @@ struct hstate {}; #define alloc_huge_page(v, a, r) NULL #define alloc_huge_page_node(h, nid) NULL #define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL -#define alloc_huge_page_noerr(v, a, r) NULL +#define alloc_huge_page_vma(vma, address) NULL #define alloc_bootmem_huge_page(h) NULL #define hstate_file(f) NULL #define hstate_sizelog(s) NULL diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 7288f9c395b6..23190e5c940b 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -185,7 +185,7 @@ static inline void print_symbol(const char *fmt, unsigned long addr) static inline void print_ip_sym(unsigned long ip) { - printk("[<%p>] %pS\n", (void *) ip, (void *) ip); + printk("[<%px>] %pS\n", (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index e3eb834c9a35..fc45f8952d1e 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -56,14 +56,14 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object); void kasan_init_slab_obj(struct kmem_cache *cache, const void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); -void kasan_kfree_large(const void *ptr); -void kasan_poison_kfree(void *ptr); +void kasan_kfree_large(void *ptr, unsigned long ip); +void kasan_poison_kfree(void *ptr, unsigned long ip); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); -bool kasan_slab_free(struct kmem_cache *s, void *object); +bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); struct kasan_cache { int alloc_meta_offset; @@ -108,8 +108,8 @@ static inline void kasan_init_slab_obj(struct kmem_cache *cache, const void *object) {} static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} -static inline void kasan_kfree_large(const void *ptr) {} -static inline void kasan_poison_kfree(void *ptr) {} +static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} +static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) {} static inline void kasan_krealloc(const void *object, size_t new_size, @@ -117,7 +117,8 @@ static inline void kasan_krealloc(const void *object, size_t new_size, static inline void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) {} -static inline bool kasan_slab_free(struct kmem_cache *s, void *object) +static inline bool kasan_slab_free(struct kmem_cache *s, void *object, + unsigned long ip) { return false; } diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index bb8129a3474d..96def9d15b1b 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -32,6 +32,7 @@ struct list_lru_one { }; struct list_lru_memcg { + struct rcu_head rcu; /* array of per cgroup lists, indexed by memcg_cache_id */ struct list_lru_one *lru[0]; }; @@ -43,7 +44,7 @@ struct list_lru_node { struct list_lru_one lru; #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ - struct list_lru_memcg *memcg_lrus; + struct list_lru_memcg __rcu *memcg_lrus; #endif long nr_items; } ____cacheline_aligned_in_smp; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 69966c461d1c..7b8bcdf6571d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,6 +35,7 @@ struct mem_cgroup; struct page; struct mm_struct; struct kmem_cache; +struct oom_control; /* Cgroup-specific page state, on top of universal node page state */ enum memcg_stat_item { @@ -108,7 +109,10 @@ struct lruvec_stat { */ struct mem_cgroup_per_node { struct lruvec lruvec; - struct lruvec_stat __percpu *lruvec_stat; + + struct lruvec_stat __percpu *lruvec_stat_cpu; + atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; + unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1]; @@ -199,6 +203,13 @@ struct mem_cgroup { /* OOM-Killer disable */ int oom_kill_disable; + /* + * Treat the sub-tree as an indivisible memory consumer, + * kill all belonging tasks if the memory cgroup selected + * as OOM victim. + */ + bool oom_group; + /* handle for "memory.events" */ struct cgroup_file events_file; @@ -227,10 +238,10 @@ struct mem_cgroup { spinlock_t move_lock; struct task_struct *move_lock_task; unsigned long move_lock_flags; - /* - * percpu counter. - */ - struct mem_cgroup_stat_cpu __percpu *stat; + + struct mem_cgroup_stat_cpu __percpu *stat_cpu; + atomic_long_t stat[MEMCG_NR_STAT]; + atomic_long_t events[MEMCG_NR_EVENTS]; unsigned long socket_pressure; @@ -265,6 +276,12 @@ struct mem_cgroup { /* WARNING: nodeinfo must be the last member here */ }; +/* + * size of first charge trial. "32" comes from vmscan.c's magic value. + * TODO: maybe necessary to use big numbers in big irons. + */ +#define MEMCG_CHARGE_BATCH 32U + extern struct mem_cgroup *root_mem_cgroup; static inline bool mem_cgroup_disabled(void) @@ -272,13 +289,6 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } -static inline void mem_cgroup_event(struct mem_cgroup *memcg, - enum memcg_event_item event) -{ - this_cpu_inc(memcg->stat->events[event]); - cgroup_file_notify(&memcg->events_file); -} - bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, @@ -342,6 +352,11 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ return css ? container_of(css, struct mem_cgroup, css) : NULL; } +static inline void mem_cgroup_put(struct mem_cgroup *memcg) +{ + css_put(&memcg->css); +} + #define mem_cgroup_from_counter(counter, member) \ container_of(counter, struct mem_cgroup, member) @@ -480,6 +495,13 @@ static inline bool task_in_memcg_oom(struct task_struct *p) bool mem_cgroup_oom_synchronize(bool wait); +bool mem_cgroup_select_oom_victim(struct oom_control *oc); + +static inline bool mem_cgroup_oom_group(struct mem_cgroup *memcg) +{ + return memcg->oom_group; +} + #ifdef CONFIG_MEMCG_SWAP extern int do_swap_account; #endif @@ -492,32 +514,38 @@ void unlock_page_memcg(struct page *page); static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx) { - long val = 0; - int cpu; - - for_each_possible_cpu(cpu) - val += per_cpu(memcg->stat->count[idx], cpu); - - if (val < 0) - val = 0; - - return val; + long x = atomic_long_read(&memcg->stat[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) { - if (!mem_cgroup_disabled()) - __this_cpu_add(memcg->stat->count[idx], val); + long x; + + if (mem_cgroup_disabled()) + return; + + x = val + __this_cpu_read(memcg->stat_cpu->count[idx]); + if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { + atomic_long_add(x, &memcg->stat[idx]); + x = 0; + } + __this_cpu_write(memcg->stat_cpu->count[idx], x); } /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) { - if (!mem_cgroup_disabled()) - this_cpu_add(memcg->stat->count[idx], val); + preempt_disable(); + __mod_memcg_state(memcg, idx, val); + preempt_enable(); } /** @@ -555,87 +583,108 @@ static inline unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx) { struct mem_cgroup_per_node *pn; - long val = 0; - int cpu; + long x; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - for_each_possible_cpu(cpu) - val += per_cpu(pn->lruvec_stat->count[idx], cpu); - - if (val < 0) - val = 0; - - return val; + x = atomic_long_read(&pn->lruvec_stat[idx]); +#ifdef CONFIG_SMP + if (x < 0) + x = 0; +#endif + return x; } static inline void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { struct mem_cgroup_per_node *pn; + long x; + /* Update node */ __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); + if (mem_cgroup_disabled()) return; + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + + /* Update memcg */ __mod_memcg_state(pn->memcg, idx, val); - __this_cpu_add(pn->lruvec_stat->count[idx], val); + + /* Update lruvec */ + x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); + if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { + atomic_long_add(x, &pn->lruvec_stat[idx]); + x = 0; + } + __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); } static inline void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val) { - struct mem_cgroup_per_node *pn; - - mod_node_page_state(lruvec_pgdat(lruvec), idx, val); - if (mem_cgroup_disabled()) - return; - pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - mod_memcg_state(pn->memcg, idx, val); - this_cpu_add(pn->lruvec_stat->count[idx], val); + preempt_disable(); + __mod_lruvec_state(lruvec, idx, val); + preempt_enable(); } static inline void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { - struct mem_cgroup_per_node *pn; + pg_data_t *pgdat = page_pgdat(page); + struct lruvec *lruvec; - __mod_node_page_state(page_pgdat(page), idx, val); - if (mem_cgroup_disabled() || !page->mem_cgroup) + /* Untracked pages have no memcg, no lruvec. Update only the node */ + if (!page->mem_cgroup) { + __mod_node_page_state(pgdat, idx, val); return; - __mod_memcg_state(page->mem_cgroup, idx, val); - pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; - __this_cpu_add(pn->lruvec_stat->count[idx], val); + } + + lruvec = mem_cgroup_lruvec(pgdat, page->mem_cgroup); + __mod_lruvec_state(lruvec, idx, val); } static inline void mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { - struct mem_cgroup_per_node *pn; - - mod_node_page_state(page_pgdat(page), idx, val); - if (mem_cgroup_disabled() || !page->mem_cgroup) - return; - mod_memcg_state(page->mem_cgroup, idx, val); - pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; - this_cpu_add(pn->lruvec_stat->count[idx], val); + preempt_disable(); + __mod_lruvec_page_state(page, idx, val); + preempt_enable(); } unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); +/* idx can be of type enum memcg_event_item or vm_event_item */ +static inline void __count_memcg_events(struct mem_cgroup *memcg, + int idx, unsigned long count) +{ + unsigned long x; + + if (mem_cgroup_disabled()) + return; + + x = count + __this_cpu_read(memcg->stat_cpu->events[idx]); + if (unlikely(x > MEMCG_CHARGE_BATCH)) { + atomic_long_add(x, &memcg->events[idx]); + x = 0; + } + __this_cpu_write(memcg->stat_cpu->events[idx], x); +} + static inline void count_memcg_events(struct mem_cgroup *memcg, - enum vm_event_item idx, - unsigned long count) + int idx, unsigned long count) { - if (!mem_cgroup_disabled()) - this_cpu_add(memcg->stat->events[idx], count); + preempt_disable(); + __count_memcg_events(memcg, idx, count); + preempt_enable(); } -/* idx can be of type enum memcg_stat_item or node_stat_item */ +/* idx can be of type enum memcg_event_item or vm_event_item */ static inline void count_memcg_page_event(struct page *page, int idx) { @@ -654,12 +703,20 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (likely(memcg)) { - this_cpu_inc(memcg->stat->events[idx]); + count_memcg_events(memcg, idx, 1); if (idx == OOM_KILL) cgroup_file_notify(&memcg->events_file); } rcu_read_unlock(); } + +static inline void mem_cgroup_event(struct mem_cgroup *memcg, + enum memcg_event_item event) +{ + count_memcg_events(memcg, event, 1); + cgroup_file_notify(&memcg->events_file); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void mem_cgroup_split_huge_fixup(struct page *head); #endif @@ -744,6 +801,10 @@ static inline bool task_in_mem_cgroup(struct task_struct *task, return true; } +static inline void mem_cgroup_put(struct mem_cgroup *memcg) +{ +} + static inline struct mem_cgroup * mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, @@ -936,6 +997,16 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } + +static inline bool mem_cgroup_select_oom_victim(struct oom_control *oc) +{ + return false; +} + +static inline bool mem_cgroup_oom_group(struct mem_cgroup *memcg) +{ + return false; +} #endif /* CONFIG_MEMCG */ /* idx can be of type enum memcg_stat_item or node_stat_item */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 58e110aee7ab..703069a73c61 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -231,9 +231,6 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); -extern void set_zone_contiguous(struct zone *zone); -extern void clear_zone_contiguous(struct zone *zone); - #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a2246cf670ba..0c6fe904bc97 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -7,8 +7,7 @@ #include <linux/migrate_mode.h> #include <linux/hugetlb.h> -typedef struct page *new_page_t(struct page *page, unsigned long private, - int **reason); +typedef struct page *new_page_t(struct page *page, unsigned long private); typedef void free_page_t(struct page *page, unsigned long private); /* @@ -43,9 +42,9 @@ static inline struct page *new_page_nodemask(struct page *page, return alloc_huge_page_nodemask(page_hstate(compound_head(page)), preferred_nid, nodemask); - if (thp_migration_supported() && PageTransHuge(page)) { - order = HPAGE_PMD_ORDER; + if (PageTransHuge(page)) { gfp_mask |= GFP_TRANSHUGE; + order = HPAGE_PMD_ORDER; } if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) diff --git a/include/linux/mm.h b/include/linux/mm.h index ea818ff739cd..63f7ba111f64 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1312,8 +1312,6 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); -void unmap_mapping_range(struct address_space *mapping, - loff_t const holebegin, loff_t const holelen, int even_cows); int follow_pte_pmd(struct mm_struct *mm, unsigned long address, unsigned long *start, unsigned long *end, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); @@ -1324,12 +1322,6 @@ int follow_phys(struct vm_area_struct *vma, unsigned long address, int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); -static inline void unmap_shared_mapping_range(struct address_space *mapping, - loff_t const holebegin, loff_t const holelen) -{ - unmap_mapping_range(mapping, holebegin, holelen, 0); -} - extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); @@ -1344,6 +1336,10 @@ extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); +void unmap_mapping_pages(struct address_space *mapping, + pgoff_t start, pgoff_t nr, bool even_cows); +void unmap_mapping_range(struct address_space *mapping, + loff_t const holebegin, loff_t const holelen, int even_cows); #else static inline int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -1360,10 +1356,20 @@ static inline int fixup_user_fault(struct task_struct *tsk, BUG(); return -EFAULT; } +static inline void unmap_mapping_pages(struct address_space *mapping, + pgoff_t start, pgoff_t nr, bool even_cows) { } +static inline void unmap_mapping_range(struct address_space *mapping, + loff_t const holebegin, loff_t const holelen, int even_cows) { } #endif -extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, - unsigned int gup_flags); +static inline void unmap_shared_mapping_range(struct address_space *mapping, + loff_t const holebegin, loff_t const holelen) +{ + unmap_mapping_range(mapping, holebegin, holelen, 0); +} + +extern int access_process_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, @@ -2090,6 +2096,7 @@ extern void setup_per_cpu_pageset(void); extern void zone_pcp_update(struct zone *zone); extern void zone_pcp_reset(struct zone *zone); +extern void setup_zone_pageset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index cfd0ac4e5e0e..fd1af6b9591d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -31,28 +31,56 @@ struct hmm; * it to keep track of whatever it is we are using the page for at the * moment. Note that we have no way to track which tasks are using * a page, though if it is a pagecache page, rmap structures can tell us - * who is mapping it. + * who is mapping it. If you allocate the page using alloc_pages(), you + * can use some of the space in struct page for your own purposes. * - * The objects in struct page are organized in double word blocks in - * order to allows us to use atomic double word operations on portions - * of struct page. That is currently only used by slub but the arrangement - * allows the use of atomic double word operations on the flags/mapping - * and lru list pointers also. + * Pages that were once in the page cache may be found under the RCU lock + * even after they have been recycled to a different purpose. The page + * cache reads and writes some of the fields in struct page to pin the + * page before checking that it's still in the page cache. It is vital + * that all users of struct page: + * 1. Use the first word as PageFlags. + * 2. Clear or preserve bit 0 of page->compound_head. It is used as + * PageTail for compound pages, and the page cache must not see false + * positives. Some users put a pointer here (guaranteed to be at least + * 4-byte aligned), other users avoid using the field altogether. + * 3. page->_refcount must either not be used, or must be used in such a + * way that other CPUs temporarily incrementing and then decrementing the + * refcount does not cause problems. On receiving the page from + * alloc_pages(), the refcount will be positive. + * 4. Either preserve page->_mapcount or restore it to -1 before freeing it. + * + * If you allocate pages of order > 0, you can use the fields in the struct + * page associated with each page, but bear in mind that the pages may have + * been inserted individually into the page cache, so you must use the above + * four fields in a compatible way for each struct page. + * + * SLUB uses cmpxchg_double() to atomically update its freelist and + * counters. That requires that freelist & counters be adjacent and + * double-word aligned. We align all struct pages to double-word + * boundaries, and ensure that 'freelist' is aligned within the + * struct. */ +#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE +#define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) +#define _slub_counter_t unsigned long +#else +#define _slub_counter_t unsigned int +#endif +#else /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */ +#define _struct_page_alignment +#define _slub_counter_t unsigned int +#endif /* !CONFIG_HAVE_ALIGNED_STRUCT_PAGE */ + struct page { /* First double word block */ unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ union { - struct address_space *mapping; /* If low bit clear, points to - * inode address_space, or NULL. - * If page mapped as anonymous - * memory, low bit is set, and - * it points to anon_vma object - * or KSM private structure. See - * PAGE_MAPPING_ANON and - * PAGE_MAPPING_KSM. - */ + /* See page-flags.h for the definition of PAGE_MAPPING_FLAGS */ + struct address_space *mapping; + void *s_mem; /* slab first object */ atomic_t compound_mapcount; /* first tail page */ /* page_deferred_list().next -- second tail page */ @@ -66,40 +94,27 @@ struct page { }; union { -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ - defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) - /* Used for cmpxchg_double in slub */ - unsigned long counters; -#else - /* - * Keep _refcount separate from slub cmpxchg_double data. - * As the rest of the double word is protected by slab_lock - * but _refcount is not. - */ - unsigned counters; -#endif - struct { + _slub_counter_t counters; + unsigned int active; /* SLAB */ + struct { /* SLUB */ + unsigned inuse:16; + unsigned objects:15; + unsigned frozen:1; + }; + int units; /* SLOB */ + + struct { /* Page cache */ + /* + * Count of ptes mapped in mms, to show when + * page is mapped & limit reverse map searches. + * + * Extra information about page type may be + * stored here for pages that are never mapped, + * in which case the value MUST BE <= -2. + * See page-flags.h for more details. + */ + atomic_t _mapcount; - union { - /* - * Count of ptes mapped in mms, to show when - * page is mapped & limit reverse map searches. - * - * Extra information about page type may be - * stored here for pages that are never mapped, - * in which case the value MUST BE <= -2. - * See page-flags.h for more details. - */ - atomic_t _mapcount; - - unsigned int active; /* SLAB */ - struct { /* SLUB */ - unsigned inuse:16; - unsigned objects:15; - unsigned frozen:1; - }; - int units; /* SLOB */ - }; /* * Usage count, *USE WRAPPER FUNCTION* when manual * accounting. See page_ref.h @@ -109,8 +124,6 @@ struct page { }; /* - * Third double word block - * * WARNING: bit 0 of the first word encode PageTail(). That means * the rest users of the storage space MUST NOT use the bit to * avoid collision and false-positive PageTail(). @@ -145,19 +158,9 @@ struct page { unsigned long compound_head; /* If bit zero is set */ /* First tail page only */ -#ifdef CONFIG_64BIT - /* - * On 64 bit system we have enough space in struct page - * to encode compound_dtor and compound_order with - * unsigned int. It can help compiler generate better or - * smaller code on some archtectures. - */ - unsigned int compound_dtor; - unsigned int compound_order; -#else - unsigned short int compound_dtor; - unsigned short int compound_order; -#endif + unsigned char compound_dtor; + unsigned char compound_order; + /* two/six bytes available here */ }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS @@ -171,15 +174,14 @@ struct page { #endif }; - /* Remainder is not double word aligned */ union { - unsigned long private; /* Mapping-private opaque data: - * usually used for buffer_heads - * if PagePrivate set; used for - * swp_entry_t if PageSwapCache; - * indicates order in the buddy - * system if PG_buddy is set. - */ + /* + * Mapping-private opaque data: + * Usually used for buffer_heads if PagePrivate + * Used for swp_entry_t if PageSwapCache + * Indicates order in the buddy system if PageBuddy + */ + unsigned long private; #if USE_SPLIT_PTE_PTLOCKS #if ALLOC_SPLIT_PTLOCKS spinlock_t *ptl; @@ -212,15 +214,7 @@ struct page { #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif -} -/* - * The struct page can be forced to be double word aligned so that atomic ops - * on double words work. The SLUB allocator can make use of such a feature. - */ -#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE - __aligned(2 * sizeof(unsigned long)) -#endif -; +} _struct_page_alignment; #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b25dc9db19fc..2c6de02a34ae 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -2,6 +2,7 @@ #ifndef _LINUX_MMU_NOTIFIER_H #define _LINUX_MMU_NOTIFIER_H +#include <linux/types.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/mm_types.h> @@ -10,6 +11,9 @@ struct mmu_notifier; struct mmu_notifier_ops; +/* mmu_notifier_ops flags */ +#define MMU_INVALIDATE_DOES_NOT_BLOCK (0x01) + #ifdef CONFIG_MMU_NOTIFIER /* @@ -27,6 +31,15 @@ struct mmu_notifier_mm { struct mmu_notifier_ops { /* + * Flags to specify behavior of callbacks for this MMU notifier. + * Used to determine which context an operation may be called. + * + * MMU_INVALIDATE_DOES_NOT_BLOCK: invalidate_{start,end} does not + * block + */ + int flags; + + /* * Called either by mmu_notifier_unregister or when the mm is * being destroyed by exit_mmap, always before all pages are * freed. This can run concurrently with other mmu notifier @@ -137,6 +150,9 @@ struct mmu_notifier_ops { * page. Pages will no longer be referenced by the linux * address space but may still be referenced by sptes until * the last refcount is dropped. + * + * If both of these callbacks cannot block, mmu_notifier_ops.flags + * should have MMU_INVALIDATE_DOES_NOT_BLOCK set. */ void (*invalidate_range_start)(struct mmu_notifier *mn, struct mm_struct *mm, @@ -218,6 +234,7 @@ extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, bool only_end); extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end); +extern bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm); static inline void mmu_notifier_release(struct mm_struct *mm) { @@ -457,6 +474,11 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, { } +static inline bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm) +{ + return false; +} + static inline void mmu_notifier_mm_init(struct mm_struct *mm) { } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 67f2e3c38939..24fcbce6e267 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -886,7 +886,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; +extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, diff --git a/include/linux/oom.h b/include/linux/oom.h index 5bad038ac012..d4d41c01a74d 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -10,6 +10,13 @@ #include <linux/sched/coredump.h> /* MMF_* */ #include <linux/mm.h> /* VM_FAULT* */ + +/* + * Special value returned by victim selection functions to indicate + * that are inflight OOM victims. + */ +#define INFLIGHT_VICTIM ((void *)-1UL) + struct zonelist; struct notifier_block; struct mem_cgroup; @@ -40,7 +47,8 @@ struct oom_control { /* Used by oom implementation, do not set */ unsigned long totalpages; - struct task_struct *chosen; + struct task_struct *chosen_task; + struct mem_cgroup *chosen_memcg; unsigned long chosen_points; }; @@ -111,6 +119,8 @@ extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); +extern int oom_evaluate_task(struct task_struct *task, void *arg); + /* sysctls */ extern int sysctl_oom_dump_tasks; extern int sysctl_oom_kill_allocating_task; diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index cdad58bbfd8b..4ae347cbc36d 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -63,7 +63,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, bool skip_hwpoisoned_pages); -struct page *alloc_migrate_target(struct page *page, unsigned long private, - int **resultp); +struct page *alloc_migrate_target(struct page *page, unsigned long private); #endif diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 5fb6580f7f23..6dc456ac6136 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -9,14 +9,14 @@ #ifndef _LINUX_PAGEVEC_H #define _LINUX_PAGEVEC_H -/* 14 pointers + two long's align the pagevec structure to a power of two */ -#define PAGEVEC_SIZE 14 +/* 15 pointers + header align the pagevec structure to a power of two */ +#define PAGEVEC_SIZE 15 struct page; struct address_space; struct pagevec { - unsigned long nr; + unsigned char nr; bool percpu_pvec_drained; struct page *pages[PAGEVEC_SIZE]; }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 166144c04ef6..ce5a27304b03 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1489,6 +1489,11 @@ static inline struct thread_info *task_thread_info(struct task_struct *task) extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); +/* + * find a task by its virtual pid and get the task struct + */ +extern struct task_struct *find_get_task_by_vpid(pid_t nr); + extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 3d49b91b674d..bd422561a75e 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -11,7 +11,7 @@ /* * Routines for handling mm_structs */ -extern struct mm_struct * mm_alloc(void); +extern struct mm_struct *mm_alloc(void); /** * mmgrab() - Pin a &struct mm_struct. @@ -35,27 +35,7 @@ static inline void mmgrab(struct mm_struct *mm) atomic_inc(&mm->mm_count); } -/* mmdrop drops the mm and the page tables */ -extern void __mmdrop(struct mm_struct *); -static inline void mmdrop(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) - __mmdrop(mm); -} - -static inline void mmdrop_async_fn(struct work_struct *work) -{ - struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); - __mmdrop(mm); -} - -static inline void mmdrop_async(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) { - INIT_WORK(&mm->async_put_work, mmdrop_async_fn); - schedule_work(&mm->async_put_work); - } -} +extern void mmdrop(struct mm_struct *mm); /** * mmget() - Pin the address space associated with a &struct mm_struct. diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0aa4548fb492..64d85fc65c41 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -104,6 +104,9 @@ struct signal_struct { int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ + /* The signal sent when the parent dies: */ + int pdeath_signal_proc; + /* * PR_SET_CHILD_SUBREAPER marks a process, like a service * manager, to re-parent orphan (double-forking) child processes diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 06b295bec00d..73b5e655a76e 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -112,13 +112,11 @@ extern void shmem_uncharge(struct inode *inode, long pages); #ifdef CONFIG_TMPFS -extern int shmem_add_seals(struct file *file, unsigned int seals); -extern int shmem_get_seals(struct file *file); -extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg); +extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned long arg); #else -static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a) +static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a) { return -EINVAL; } diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 388ff2936a87..a3894918a436 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -75,6 +75,6 @@ struct shrinker { #define SHRINKER_NUMA_AWARE (1 << 0) #define SHRINKER_MEMCG_AWARE (1 << 1) -extern int register_shrinker(struct shrinker *); +extern __must_check int register_shrinker(struct shrinker *); extern void unregister_shrinker(struct shrinker *); #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index c2b8128799c1..a1a3f4ed94ce 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -332,20 +332,16 @@ extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); -extern void lru_add_drain_all_cpuslocked(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); -extern void add_page_to_unevictable_list(struct page *page); - extern void lru_cache_add_active_or_unevictable(struct page *page, struct vm_area_struct *vma); /* linux/mm/vmscan.c */ extern unsigned long zone_reclaimable_pages(struct zone *zone); -extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 33b0bdbb613c..d9c4a6cce3c2 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -17,6 +17,7 @@ #define _LINUX_UUID_H_ #include <uapi/linux/uuid.h> +#include <linux/string.h> #define UUID_SIZE 16 diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 1779c9817b39..a4c2317d8b9f 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -216,23 +216,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, return x; } -static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat, - enum node_stat_item item) -{ - long x = atomic_long_read(&pgdat->vm_stat[item]); - -#ifdef CONFIG_SMP - int cpu; - for_each_online_cpu(cpu) - x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item]; - - if (x < 0) - x = 0; -#endif - return x; -} - - #ifdef CONFIG_NUMA extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item); extern unsigned long sum_zone_node_page_state(int node, diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index d70b53e65f43..e0b8b9173e1c 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -192,12 +192,12 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_re TRACE_EVENT(mm_shrink_slab_start, TP_PROTO(struct shrinker *shr, struct shrink_control *sc, - long nr_objects_to_shrink, unsigned long pgs_scanned, - unsigned long lru_pgs, unsigned long cache_items, - unsigned long long delta, unsigned long total_scan), + long nr_objects_to_shrink, unsigned long cache_items, + unsigned long long delta, unsigned long total_scan, + int priority), - TP_ARGS(shr, sc, nr_objects_to_shrink, pgs_scanned, lru_pgs, - cache_items, delta, total_scan), + TP_ARGS(shr, sc, nr_objects_to_shrink, cache_items, delta, total_scan, + priority), TP_STRUCT__entry( __field(struct shrinker *, shr) @@ -205,11 +205,10 @@ TRACE_EVENT(mm_shrink_slab_start, __field(int, nid) __field(long, nr_objects_to_shrink) __field(gfp_t, gfp_flags) - __field(unsigned long, pgs_scanned) - __field(unsigned long, lru_pgs) __field(unsigned long, cache_items) __field(unsigned long long, delta) __field(unsigned long, total_scan) + __field(int, priority) ), TP_fast_assign( @@ -218,24 +217,22 @@ TRACE_EVENT(mm_shrink_slab_start, __entry->nid = sc->nid; __entry->nr_objects_to_shrink = nr_objects_to_shrink; __entry->gfp_flags = sc->gfp_mask; - __entry->pgs_scanned = pgs_scanned; - __entry->lru_pgs = lru_pgs; __entry->cache_items = cache_items; __entry->delta = delta; __entry->total_scan = total_scan; + __entry->priority = priority; ), - TP_printk("%pF %p: nid: %d objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld", + TP_printk("%pF %p: nid: %d objects to shrink %ld gfp_flags %s cache items %ld delta %lld total_scan %ld priority %d", __entry->shrink, __entry->shr, __entry->nid, __entry->nr_objects_to_shrink, show_gfp_flags(__entry->gfp_flags), - __entry->pgs_scanned, - __entry->lru_pgs, __entry->cache_items, __entry->delta, - __entry->total_scan) + __entry->total_scan, + __entry->priority) ); TRACE_EVENT(mm_shrink_slab_end, diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index f8b134f5608f..e7ee32861d51 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -27,6 +27,9 @@ # define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ #endif +/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ + /* * Flags for mlock */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index af5f8c2df87a..3165863aa187 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -207,4 +207,8 @@ struct prctl_mm_map { # define PR_SVE_VL_LEN_MASK 0xffff # define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */ +/* Process-based variant of PDEATHSIG */ +#define PR_SET_PDEATHSIG_PROC 48 +#define PR_GET_PDEATHSIG_PROC 49 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 0f272818a4d2..be69135eb361 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -27,6 +27,10 @@ #include <linux/types.h> #include <linux/compiler.h> +#ifndef __KERNEL__ +#include <stddef.h> /* For size_t. */ +#endif + #define CTL_MAXNAME 10 /* how many path components do we allow in a call to sysctl? In other words, what is the largest acceptable value for the nlen diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h index 5c04130bb524..e5a7eecef7c3 100644 --- a/include/uapi/linux/uuid.h +++ b/include/uapi/linux/uuid.h @@ -19,7 +19,6 @@ #define _UAPI_LINUX_UUID_H_ #include <linux/types.h> -#include <linux/string.h> typedef struct { __u8 b[16]; |