summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--include/linux/blkdev.h6
-rw-r--r--include/linux/compaction.h33
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/dcache.h41
-rw-r--r--include/linux/device-mapper.h2
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/fsnotify.h12
-rw-r--r--include/linux/fsnotify_backend.h20
-rw-r--r--include/linux/gfp.h14
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/kdb.h2
-rw-r--r--include/linux/libnvdimm.h24
-rw-r--r--include/linux/memblock.h1
-rw-r--r--include/linux/memcontrol.h70
-rw-r--r--include/linux/memremap.h2
-rw-r--r--include/linux/mm.h17
-rw-r--r--include/linux/mm_inline.h19
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/mmzone.h170
-rw-r--r--include/linux/nd.h3
-rw-r--r--include/linux/oom.h26
-rw-r--r--include/linux/pfn_t.h5
-rw-r--r--include/linux/pinctrl/pinconf-generic.h2
-rw-r--r--include/linux/pmem.h117
-rw-r--r--include/linux/quota.h4
-rw-r--r--include/linux/sched.h27
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h14
-rw-r--r--include/linux/stringhash.h12
-rw-r--r--include/linux/sunrpc/svcauth.h4
-rw-r--r--include/linux/swap.h23
-rw-r--r--include/linux/topology.h2
-rw-r--r--include/linux/vm_event_item.h14
-rw-r--r--include/linux/vmstat.h111
-rw-r--r--include/linux/writeback.h2
37 files changed, 457 insertions, 363 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c82794f20110..491a91717788 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -197,7 +197,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
}
long congestion_wait(int sync, long timeout);
-long wait_iff_congested(struct zone *zone, int sync, long timeout);
+long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
int pdflush_proc_obsolete(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c96db9c22d10..adf33079771e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1665,7 +1665,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
*/
struct blk_dax_ctl {
sector_t sector;
- void __pmem *addr;
+ void *addr;
long size;
pfn_t pfn;
};
@@ -1676,8 +1676,8 @@ struct block_device_operations {
int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
- long (*direct_access)(struct block_device *, sector_t, void __pmem **,
- pfn_t *, long);
+ long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
+ long);
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
/* ->media_changed() is DEPRECATED, use ->check_events() instead */
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 1a02dab16646..d4e106b5dc27 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,6 +1,18 @@
#ifndef _LINUX_COMPACTION_H
#define _LINUX_COMPACTION_H
+/*
+ * Determines how hard direct compaction should try to succeed.
+ * Lower value means higher priority, analogically to reclaim priority.
+ */
+enum compact_priority {
+ COMPACT_PRIO_SYNC_LIGHT,
+ MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT,
+ DEF_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT,
+ COMPACT_PRIO_ASYNC,
+ INIT_COMPACT_PRIORITY = COMPACT_PRIO_ASYNC
+};
+
/* Return values for compact_zone() and try_to_compact_pages() */
/* When adding new states, please adjust include/trace/events/compaction.h */
enum compact_result {
@@ -43,14 +55,6 @@ enum compact_result {
COMPACT_PARTIAL,
};
-/* Used to signal whether compaction detected need_sched() or lock contention */
-/* No contention detected */
-#define COMPACT_CONTENDED_NONE 0
-/* Either need_sched() was true or fatal signal pending */
-#define COMPACT_CONTENDED_SCHED 1
-/* Zone lock or lru_lock was contended in async compaction */
-#define COMPACT_CONTENDED_LOCK 2
-
struct alloc_context; /* in mm/internal.h */
#ifdef CONFIG_COMPACTION
@@ -64,9 +68,8 @@ extern int sysctl_compact_unevictable_allowed;
extern int fragmentation_index(struct zone *zone, unsigned int order);
extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
- unsigned int order,
- unsigned int alloc_flags, const struct alloc_context *ac,
- enum migrate_mode mode, int *contended);
+ unsigned int order, unsigned int alloc_flags,
+ const struct alloc_context *ac, enum compact_priority prio);
extern void compact_pgdat(pg_data_t *pgdat, int order);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern enum compact_result compaction_suitable(struct zone *zone, int order,
@@ -151,14 +154,6 @@ extern void kcompactd_stop(int nid);
extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx);
#else
-static inline enum compact_result try_to_compact_pages(gfp_t gfp_mask,
- unsigned int order, int alloc_flags,
- const struct alloc_context *ac,
- enum migrate_mode mode, int *contended)
-{
- return COMPACT_CONTINUE;
-}
-
static inline void compact_pgdat(pg_data_t *pgdat, int order)
{
}
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 2e853b679a5d..1bb954842725 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -17,7 +17,6 @@
# define __release(x) __context__(x,-1)
# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
# define __percpu __attribute__((noderef, address_space(3)))
-# define __pmem __attribute__((noderef, address_space(5)))
#ifdef CONFIG_SPARSE_RCU_POINTER
# define __rcu __attribute__((noderef, address_space(4)))
#else /* CONFIG_SPARSE_RCU_POINTER */
@@ -45,7 +44,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
# define __cond_lock(x,c) (c)
# define __percpu
# define __rcu
-# define __pmem
# define __private
# define ACCESS_PRIVATE(p, member) ((p)->member)
#endif /* __CHECKER__ */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f53fa055021a..98044a8d1487 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -133,14 +133,15 @@ struct dentry_operations {
int (*d_compare)(const struct dentry *, const struct dentry *,
unsigned int, const char *, const struct qstr *);
int (*d_delete)(const struct dentry *);
+ int (*d_init)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_prune)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)(struct dentry *, char *, int);
struct vfsmount *(*d_automount)(struct path *);
int (*d_manage)(struct dentry *, bool);
- struct inode *(*d_select_inode)(struct dentry *, unsigned);
- struct dentry *(*d_real)(struct dentry *, struct inode *);
+ struct dentry *(*d_real)(struct dentry *, const struct inode *,
+ unsigned int);
} ____cacheline_aligned;
/*
@@ -206,10 +207,8 @@ struct dentry_operations {
#define DCACHE_MAY_FREE 0x00800000
#define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */
-#define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */
-
-#define DCACHE_ENCRYPTED_WITH_KEY 0x04000000 /* dir is encrypted with a valid key */
-#define DCACHE_OP_REAL 0x08000000
+#define DCACHE_ENCRYPTED_WITH_KEY 0x02000000 /* dir is encrypted with a valid key */
+#define DCACHE_OP_REAL 0x04000000
#define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */
#define DCACHE_DENTRY_CURSOR 0x20000000
@@ -557,25 +556,27 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
return upper;
}
-static inline struct dentry *d_real(struct dentry *dentry)
+/**
+ * d_real - Return the real dentry
+ * @dentry: the dentry to query
+ * @inode: inode to select the dentry from multiple layers (can be NULL)
+ * @flags: open flags to control copy-up behavior
+ *
+ * If dentry is on an union/overlay, then return the underlying, real dentry.
+ * Otherwise return the dentry itself.
+ *
+ * See also: Documentation/filesystems/vfs.txt
+ */
+static inline struct dentry *d_real(struct dentry *dentry,
+ const struct inode *inode,
+ unsigned int flags)
{
if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
- return dentry->d_op->d_real(dentry, NULL);
+ return dentry->d_op->d_real(dentry, inode, flags);
else
return dentry;
}
-static inline struct inode *vfs_select_inode(struct dentry *dentry,
- unsigned open_flags)
-{
- struct inode *inode = d_inode(dentry);
-
- if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE))
- inode = dentry->d_op->d_select_inode(dentry, open_flags);
-
- return inode;
-}
-
/**
* d_real_inode - Return the real inode
* @dentry: The dentry to query
@@ -585,7 +586,7 @@ static inline struct inode *vfs_select_inode(struct dentry *dentry,
*/
static inline struct inode *d_real_inode(struct dentry *dentry)
{
- return d_backing_inode(d_real(dentry));
+ return d_backing_inode(d_real(dentry, NULL, 0));
}
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index b0db857f334b..91acfce74a22 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -131,7 +131,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
* >= 0 : the number of bytes accessible at the address
*/
typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
- void __pmem **kaddr, pfn_t *pfn, long size);
+ void **kaddr, pfn_t *pfn, long size);
void dm_error(const char *message);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f2a69f20926f..50ccf845b56c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -457,7 +457,6 @@ struct block_device {
struct inode * bd_inode; /* will die */
struct super_block * bd_super;
struct mutex bd_mutex; /* open/close mutex */
- struct list_head bd_inodes;
void * bd_claiming;
void * bd_holder;
int bd_holders;
@@ -1271,12 +1270,7 @@ static inline struct inode *file_inode(const struct file *f)
static inline struct dentry *file_dentry(const struct file *file)
{
- struct dentry *dentry = file->f_path.dentry;
-
- if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
- return dentry->d_op->d_real(dentry, file_inode(file));
- else
- return dentry;
+ return d_real(file->f_path.dentry, file_inode(file), 0);
}
static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 0141f257d67b..eed9e853a06f 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -52,18 +52,6 @@ static inline int fsnotify_perm(struct file *file, int mask)
}
/*
- * fsnotify_d_move - dentry has been moved
- */
-static inline void fsnotify_d_move(struct dentry *dentry)
-{
- /*
- * On move we need to update dentry->d_flags to indicate if the new parent
- * cares about events from this dentry.
- */
- __fsnotify_update_dcache_flags(dentry);
-}
-
-/*
* fsnotify_link_count - inode's link count changed
*/
static inline void fsnotify_link_count(struct inode *inode)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 29f917517299..58205f33af02 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -267,10 +267,8 @@ static inline int fsnotify_inode_watches_children(struct inode *inode)
* Update the dentry with a flag indicating the interest of its parent to receive
* filesystem events when those events happens to this dentry->d_inode.
*/
-static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
+static inline void fsnotify_update_flags(struct dentry *dentry)
{
- struct dentry *parent;
-
assert_spin_locked(&dentry->d_lock);
/*
@@ -280,21 +278,12 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
* find our entry, so it will spin until we complete here, and update
* us with the new state.
*/
- parent = dentry->d_parent;
- if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode))
+ if (fsnotify_inode_watches_children(dentry->d_parent->d_inode))
dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
else
dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
}
-/*
- * fsnotify_d_instantiate - instantiate a dentry for inode
- */
-static inline void __fsnotify_d_instantiate(struct dentry *dentry)
-{
- __fsnotify_update_dcache_flags(dentry);
-}
-
/* called from fsnotify listeners, such as fanotify or dnotify */
/* create a new group */
@@ -386,10 +375,7 @@ static inline void __fsnotify_inode_delete(struct inode *inode)
static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
{}
-static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
-{}
-
-static inline void __fsnotify_d_instantiate(struct dentry *dentry)
+static inline void fsnotify_update_flags(struct dentry *dentry)
{}
static inline u32 fsnotify_get_cookie(void)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c29e9d347bc6..f8041f9de31e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -237,9 +237,11 @@ struct vm_area_struct;
* are expected to be movable via page reclaim or page migration. Typically,
* pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
*
- * GFP_TRANSHUGE is used for THP allocations. They are compound allocations
- * that will fail quickly if memory is not available and will not wake
- * kswapd on failure.
+ * GFP_TRANSHUGE and GFP_TRANSHUGE_LIGHT are used for THP allocations. They are
+ * compound allocations that will generally fail quickly if memory is not
+ * available and will not wake kswapd/kcompactd on failure. The _LIGHT
+ * version does not attempt reclaim/compaction at all and is by default used
+ * in page fault path, while the non-light is used by khugepaged.
*/
#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
@@ -254,9 +256,9 @@ struct vm_area_struct;
#define GFP_DMA32 __GFP_DMA32
#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE)
-#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
- __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \
- ~__GFP_RECLAIM)
+#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
+#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
/* Convert GFP flags to their corresponding migrate type */
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 92ce91c03cd0..6f14de45b5ce 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -11,7 +11,7 @@ extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmd,
unsigned int flags);
-extern int madvise_free_huge_pmd(struct mmu_gather *tlb,
+extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr, unsigned long next);
extern int zap_huge_pmd(struct mmu_gather *tlb,
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index ac4b3c46a84d..c9cf374445d8 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -77,6 +77,7 @@ void kasan_free_shadow(const struct vm_struct *vm);
size_t ksize(const void *);
static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
+size_t kasan_metadata_size(struct kmem_cache *cache);
#else /* CONFIG_KASAN */
@@ -121,6 +122,7 @@ static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
static inline void kasan_free_shadow(const struct vm_struct *vm) {}
static inline void kasan_unpoison_slab(const void *ptr) { }
+static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
#endif /* CONFIG_KASAN */
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index a19bcf9e762e..410decacff8f 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -177,7 +177,7 @@ extern int kdb_get_kbd_char(void);
static inline
int kdb_process_cpu(const struct task_struct *p)
{
- unsigned int cpu = task_thread_info(p)->cpu;
+ unsigned int cpu = task_cpu(p);
if (cpu > num_possible_cpus())
cpu = 0;
return cpu;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 0c3c30cbbea5..b519e137b9b7 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -52,6 +52,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
struct nd_namespace_label;
struct nvdimm_drvdata;
+
struct nd_mapping {
struct nvdimm *nvdimm;
struct nd_namespace_label **labels;
@@ -69,6 +70,7 @@ struct nd_mapping {
struct nvdimm_bus_descriptor {
const struct attribute_group **attr_groups;
unsigned long cmd_mask;
+ struct module *module;
char *provider_name;
ndctl_fn ndctl;
int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
@@ -99,13 +101,21 @@ struct nd_region_desc {
unsigned long flags;
};
+struct device;
+void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags);
+static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
+ resource_size_t offset, size_t size)
+{
+ return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0);
+}
+
struct nvdimm_bus;
struct module;
struct device;
struct nd_blk_region;
struct nd_blk_region_desc {
int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
- void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
void *iobuf, u64 len, int rw);
struct nd_region_desc ndr_desc;
@@ -119,22 +129,22 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
-struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
- struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
-#define nvdimm_bus_register(parent, desc) \
- __nvdimm_bus_register(parent, desc, THIS_MODULE)
+struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+ struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
struct nvdimm *to_nvdimm(struct device *dev);
struct nd_region *to_nd_region(struct device *dev);
struct nd_blk_region *to_nd_blk_region(struct device *dev);
struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
+struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
const char *nvdimm_name(struct nvdimm *nvdimm);
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
void *nvdimm_provider_data(struct nvdimm *nvdimm);
struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
const struct attribute_group **groups, unsigned long flags,
- unsigned long cmd_mask);
+ unsigned long cmd_mask, int num_flush,
+ struct resource *flush_wpq);
const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
@@ -156,4 +166,6 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
u64 nd_fletcher64(void *addr, size_t len, bool le);
+void nvdimm_flush(struct nd_region *nd_region);
+int nvdimm_has_flush(struct nd_region *nd_region);
#endif /* __LIBNVDIMM_H__ */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6c14b6179727..2925da23505d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -332,6 +332,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
phys_addr_t memblock_start_of_DRAM(void);
phys_addr_t memblock_end_of_DRAM(void);
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
+void memblock_mem_limit_remove_map(phys_addr_t limit);
bool memblock_is_memory(phys_addr_t addr);
int memblock_is_map_memory(phys_addr_t addr);
int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 71aff733a497..5d8ca6e02e39 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -52,7 +52,7 @@ enum mem_cgroup_stat_index {
MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */
MEM_CGROUP_STAT_NSTATS,
/* default hierarchy stats */
- MEMCG_KERNEL_STACK = MEM_CGROUP_STAT_NSTATS,
+ MEMCG_KERNEL_STACK_KB = MEM_CGROUP_STAT_NSTATS,
MEMCG_SLAB_RECLAIMABLE,
MEMCG_SLAB_UNRECLAIMABLE,
MEMCG_SOCK,
@@ -60,7 +60,7 @@ enum mem_cgroup_stat_index {
};
struct mem_cgroup_reclaim_cookie {
- struct zone *zone;
+ pg_data_t *pgdat;
int priority;
unsigned int generation;
};
@@ -118,7 +118,7 @@ struct mem_cgroup_reclaim_iter {
/*
* per-zone information in memory controller.
*/
-struct mem_cgroup_per_zone {
+struct mem_cgroup_per_node {
struct lruvec lruvec;
unsigned long lru_size[NR_LRU_LISTS];
@@ -132,10 +132,6 @@ struct mem_cgroup_per_zone {
/* use container_of */
};
-struct mem_cgroup_per_node {
- struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
-};
-
struct mem_cgroup_threshold {
struct eventfd_ctx *eventfd;
unsigned long threshold;
@@ -314,8 +310,46 @@ void mem_cgroup_uncharge_list(struct list_head *page_list);
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
-struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
-struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
+static struct mem_cgroup_per_node *
+mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
+{
+ return memcg->nodeinfo[nid];
+}
+
+/**
+ * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
+ * @node: node of the wanted lruvec
+ * @memcg: memcg of the wanted lruvec
+ *
+ * Returns the lru list vector holding pages for a given @node or a given
+ * @memcg and @zone. This can be the node lruvec, if the memory controller
+ * is disabled.
+ */
+static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
+ struct mem_cgroup *memcg)
+{
+ struct mem_cgroup_per_node *mz;
+ struct lruvec *lruvec;
+
+ if (mem_cgroup_disabled()) {
+ lruvec = node_lruvec(pgdat);
+ goto out;
+ }
+
+ mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+ lruvec = &mz->lruvec;
+out:
+ /*
+ * Since a node can be onlined after the mem_cgroup was created,
+ * we have to be prepared to initialize lruvec->pgdat here;
+ * and if offlined then reonlined, we need to reinitialize it.
+ */
+ if (unlikely(lruvec->pgdat != pgdat))
+ lruvec->pgdat = pgdat;
+ return lruvec;
+}
+
+struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -404,9 +438,9 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
static inline
unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
{
- struct mem_cgroup_per_zone *mz;
+ struct mem_cgroup_per_node *mz;
- mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
+ mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
return mz->lru_size[lru];
}
@@ -477,7 +511,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
mem_cgroup_update_page_stat(page, idx, -1);
}
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
@@ -568,16 +602,16 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
{
}
-static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
- struct mem_cgroup *memcg)
+static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
+ struct mem_cgroup *memcg)
{
- return &zone->lruvec;
+ return node_lruvec(pgdat);
}
static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
- struct zone *zone)
+ struct pglist_data *pgdat)
{
- return &zone->lruvec;
+ return &pgdat->lruvec;
}
static inline bool mm_match_cgroup(struct mm_struct *mm,
@@ -681,7 +715,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
}
static inline
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned)
{
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index bcaa634139a9..93416196ba64 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -26,7 +26,7 @@ struct vmem_altmap {
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_ZONE_DEVICE)
+#ifdef CONFIG_ZONE_DEVICE
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
#else
static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 192c1bbe5fcd..08ed53eeedd5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -933,6 +933,11 @@ static inline struct zone *page_zone(const struct page *page)
return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
}
+static inline pg_data_t *page_pgdat(const struct page *page)
+{
+ return NODE_DATA(page_to_nid(page));
+}
+
#ifdef SECTION_IN_PAGE_FLAGS
static inline void set_page_section(struct page *page, unsigned long section)
{
@@ -973,11 +978,21 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
{
return page->mem_cgroup;
}
+static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return READ_ONCE(page->mem_cgroup);
+}
#else
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return NULL;
}
+static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return NULL;
+}
#endif
/*
@@ -2284,6 +2299,8 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
}
#endif /* __HAVE_ARCH_GATE_AREA */
+extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
+
#ifdef CONFIG_SYSCTL
extern int sysctl_drop_caches;
int drop_caches_sysctl_handler(struct ctl_table *, int,
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 5bd29ba4f174..71613e8a720f 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -23,25 +23,30 @@ static inline int page_is_file_cache(struct page *page)
}
static __always_inline void __update_lru_size(struct lruvec *lruvec,
- enum lru_list lru, int nr_pages)
+ enum lru_list lru, enum zone_type zid,
+ int nr_pages)
{
- __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages);
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+
+ __mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages);
+ __mod_zone_page_state(&pgdat->node_zones[zid],
+ NR_ZONE_LRU_BASE + lru, nr_pages);
}
static __always_inline void update_lru_size(struct lruvec *lruvec,
- enum lru_list lru, int nr_pages)
+ enum lru_list lru, enum zone_type zid,
+ int nr_pages)
{
+ __update_lru_size(lruvec, lru, zid, nr_pages);
#ifdef CONFIG_MEMCG
mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
-#else
- __update_lru_size(lruvec, lru, nr_pages);
#endif
}
static __always_inline void add_page_to_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
- update_lru_size(lruvec, lru, hpage_nr_pages(page));
+ update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
list_add(&page->lru, &lruvec->lists[lru]);
}
@@ -49,7 +54,7 @@ static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru)
{
list_del(&page->lru);
- update_lru_size(lruvec, lru, -hpage_nr_pages(page));
+ update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
}
/**
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 79472b22d23f..903200f4ec41 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -118,7 +118,7 @@ struct page {
*/
union {
struct list_head lru; /* Pageout list, eg. active_list
- * protected by zone->lru_lock !
+ * protected by zone_lru_lock !
* Can be used as a generic list
* by the page owner.
*/
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 19425e988bdc..f2e4e90621ec 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -93,7 +93,7 @@ struct free_area {
struct pglist_data;
/*
- * zone->lock and zone->lru_lock are two of the hottest locks in the kernel.
+ * zone->lock and the zone lru_lock are two of the hottest locks in the kernel.
* So add a wild amount of padding here to ensure that they fall into separate
* cachelines. There are very few zone structures in the machine, so space
* consumption is not a concern here.
@@ -110,36 +110,20 @@ struct zone_padding {
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
- NR_ALLOC_BATCH,
- NR_LRU_BASE,
- NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
- NR_ACTIVE_ANON, /* " " " " " */
- NR_INACTIVE_FILE, /* " " " " " */
- NR_ACTIVE_FILE, /* " " " " " */
- NR_UNEVICTABLE, /* " " " " " */
+ NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
+ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
+ NR_ZONE_ACTIVE_ANON,
+ NR_ZONE_INACTIVE_FILE,
+ NR_ZONE_ACTIVE_FILE,
+ NR_ZONE_UNEVICTABLE,
+ NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
- NR_ANON_PAGES, /* Mapped anonymous pages */
- NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
- only modified from process context */
- NR_FILE_PAGES,
- NR_FILE_DIRTY,
- NR_WRITEBACK,
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
- NR_KERNEL_STACK,
+ NR_KERNEL_STACK_KB, /* measured in KiB */
/* Second 128 byte cacheline */
- NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_BOUNCE,
- NR_VMSCAN_WRITE,
- NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
- NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
- NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
- NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
- NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
- NR_DIRTIED, /* page dirtyings since bootup */
- NR_WRITTEN, /* page writings since bootup */
- NR_PAGES_SCANNED, /* pages scanned since last reclaim */
#if IS_ENABLED(CONFIG_ZSMALLOC)
NR_ZSPAGES, /* allocated in zsmalloc */
#endif
@@ -151,14 +135,40 @@ enum zone_stat_item {
NUMA_LOCAL, /* allocation from local node */
NUMA_OTHER, /* allocation from other node */
#endif
+ NR_FREE_CMA_PAGES,
+ NR_VM_ZONE_STAT_ITEMS };
+
+enum node_stat_item {
+ NR_LRU_BASE,
+ NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+ NR_ACTIVE_ANON, /* " " " " " */
+ NR_INACTIVE_FILE, /* " " " " " */
+ NR_ACTIVE_FILE, /* " " " " " */
+ NR_UNEVICTABLE, /* " " " " " */
+ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
+ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
+ NR_PAGES_SCANNED, /* pages scanned since last reclaim */
WORKINGSET_REFAULT,
WORKINGSET_ACTIVATE,
WORKINGSET_NODERECLAIM,
- NR_ANON_THPS,
+ NR_ANON_MAPPED, /* Mapped anonymous pages */
+ NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
+ only modified from process context */
+ NR_FILE_PAGES,
+ NR_FILE_DIRTY,
+ NR_WRITEBACK,
+ NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
+ NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
NR_SHMEM_THPS,
NR_SHMEM_PMDMAPPED,
- NR_FREE_CMA_PAGES,
- NR_VM_ZONE_STAT_ITEMS };
+ NR_ANON_THPS,
+ NR_UNSTABLE_NFS, /* NFS unstable pages */
+ NR_VMSCAN_WRITE,
+ NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
+ NR_DIRTIED, /* page dirtyings since bootup */
+ NR_WRITTEN, /* page writings since bootup */
+ NR_VM_NODE_STAT_ITEMS
+};
/*
* We do arithmetic on the LRU lists in various places in the code,
@@ -215,7 +225,7 @@ struct lruvec {
/* Evictions & activations on the inactive file list */
atomic_long_t inactive_age;
#ifdef CONFIG_MEMCG
- struct zone *zone;
+ struct pglist_data *pgdat;
#endif
};
@@ -267,6 +277,11 @@ struct per_cpu_pageset {
#endif
};
+struct per_cpu_nodestat {
+ s8 stat_threshold;
+ s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
+};
+
#endif /* !__GENERATING_BOUNDS.H */
enum zone_type {
@@ -348,22 +363,9 @@ struct zone {
#ifdef CONFIG_NUMA
int node;
#endif
-
- /*
- * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
- * this zone's LRU. Maintained by the pageout code.
- */
- unsigned int inactive_ratio;
-
struct pglist_data *zone_pgdat;
struct per_cpu_pageset __percpu *pageset;
- /*
- * This is a per-zone reserve of pages that are not available
- * to userspace allocations.
- */
- unsigned long totalreserve_pages;
-
#ifndef CONFIG_SPARSEMEM
/*
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
@@ -372,14 +374,6 @@ struct zone {
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
-#ifdef CONFIG_NUMA
- /*
- * zone reclaim becomes active if more unmapped pages exist.
- */
- unsigned long min_unmapped_pages;
- unsigned long min_slab_pages;
-#endif /* CONFIG_NUMA */
-
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
@@ -472,24 +466,21 @@ struct zone {
unsigned long wait_table_hash_nr_entries;
unsigned long wait_table_bits;
+ /* Write-intensive fields used from the page allocator */
ZONE_PADDING(_pad1_)
+
/* free areas of different sizes */
struct free_area free_area[MAX_ORDER];
/* zone flags, see below */
unsigned long flags;
- /* Write-intensive fields used from the page allocator */
+ /* Primarily protects free_area */
spinlock_t lock;
+ /* Write-intensive fields used by compaction and vmstats. */
ZONE_PADDING(_pad2_)
- /* Write-intensive fields used by page reclaim */
-
- /* Fields commonly accessed by the page reclaim scanner */
- spinlock_t lru_lock;
- struct lruvec lruvec;
-
/*
* When free pages are below this point, additional steps are taken
* when reading the number of free pages to avoid per-cpu counter
@@ -527,19 +518,18 @@ struct zone {
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
} ____cacheline_internodealigned_in_smp;
-enum zone_flags {
- ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
- ZONE_CONGESTED, /* zone has many dirty pages backed by
+enum pgdat_flags {
+ PGDAT_CONGESTED, /* pgdat has many dirty pages backed by
* a congested BDI
*/
- ZONE_DIRTY, /* reclaim scanning has recently found
+ PGDAT_DIRTY, /* reclaim scanning has recently found
* many dirty file pages at the tail
* of the LRU.
*/
- ZONE_WRITEBACK, /* reclaim scanning has recently found
+ PGDAT_WRITEBACK, /* reclaim scanning has recently found
* many pages under writeback
*/
- ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
+ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
};
static inline unsigned long zone_end_pfn(const struct zone *zone)
@@ -663,8 +653,9 @@ typedef struct pglist_data {
wait_queue_head_t pfmemalloc_wait;
struct task_struct *kswapd; /* Protected by
mem_hotplug_begin/end() */
- int kswapd_max_order;
- enum zone_type classzone_idx;
+ int kswapd_order;
+ enum zone_type kswapd_classzone_idx;
+
#ifdef CONFIG_COMPACTION
int kcompactd_max_order;
enum zone_type kcompactd_classzone_idx;
@@ -681,6 +672,23 @@ typedef struct pglist_data {
/* Number of pages migrated during the rate limiting time interval */
unsigned long numabalancing_migrate_nr_pages;
#endif
+ /*
+ * This is a per-node reserve of pages that are not available
+ * to userspace allocations.
+ */
+ unsigned long totalreserve_pages;
+
+#ifdef CONFIG_NUMA
+ /*
+ * zone reclaim becomes active if more unmapped pages exist.
+ */
+ unsigned long min_unmapped_pages;
+ unsigned long min_slab_pages;
+#endif /* CONFIG_NUMA */
+
+ /* Write-intensive fields used by page reclaim */
+ ZONE_PADDING(_pad1_)
+ spinlock_t lru_lock;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
@@ -695,6 +703,23 @@ typedef struct pglist_data {
struct list_head split_queue;
unsigned long split_queue_len;
#endif
+
+ /* Fields commonly accessed by the page reclaim scanner */
+ struct lruvec lruvec;
+
+ /*
+ * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
+ * this node's LRU. Maintained by the pageout code.
+ */
+ unsigned int inactive_ratio;
+
+ unsigned long flags;
+
+ ZONE_PADDING(_pad2_)
+
+ /* Per-node vmstats */
+ struct per_cpu_nodestat __percpu *per_cpu_nodestats;
+ atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
@@ -708,6 +733,15 @@ typedef struct pglist_data {
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
+static inline spinlock_t *zone_lru_lock(struct zone *zone)
+{
+ return &zone->zone_pgdat->lru_lock;
+}
+
+static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
+{
+ return &pgdat->lruvec;
+}
static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
{
@@ -760,12 +794,12 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
extern void lruvec_init(struct lruvec *lruvec);
-static inline struct zone *lruvec_zone(struct lruvec *lruvec)
+static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
{
#ifdef CONFIG_MEMCG
- return lruvec->zone;
+ return lruvec->pgdat;
#else
- return container_of(lruvec, struct zone, lruvec);
+ return container_of(lruvec, struct pglist_data, lruvec);
#endif
}
diff --git a/include/linux/nd.h b/include/linux/nd.h
index aee2761d294c..f1ea426d6a5e 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -26,6 +26,7 @@ struct nd_device_driver {
unsigned long type;
int (*probe)(struct device *dev);
int (*remove)(struct device *dev);
+ void (*shutdown)(struct device *dev);
void (*notify)(struct device *dev, enum nvdimm_event event);
};
@@ -67,7 +68,7 @@ struct nd_namespace_io {
struct nd_namespace_common common;
struct resource res;
resource_size_t size;
- void __pmem *addr;
+ void *addr;
struct badblocks bb;
};
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 606137b3b778..5bc0457ee3a8 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -73,9 +73,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
extern void mark_oom_victim(struct task_struct *tsk);
#ifdef CONFIG_MMU
-extern void try_oom_reaper(struct task_struct *tsk);
+extern void wake_oom_reaper(struct task_struct *tsk);
#else
-static inline void try_oom_reaper(struct task_struct *tsk)
+static inline void wake_oom_reaper(struct task_struct *tsk)
{
}
#endif
@@ -107,27 +107,7 @@ extern void oom_killer_enable(void);
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
-static inline bool task_will_free_mem(struct task_struct *task)
-{
- struct signal_struct *sig = task->signal;
-
- /*
- * A coredumping process may sleep for an extended period in exit_mm(),
- * so the oom killer cannot assume that the process will promptly exit
- * and release memory.
- */
- if (sig->flags & SIGNAL_GROUP_COREDUMP)
- return false;
-
- if (!(task->flags & PF_EXITING))
- return false;
-
- /* Make sure that the whole thread group is going down */
- if (!thread_group_empty(task) && !(sig->flags & SIGNAL_GROUP_EXIT))
- return false;
-
- return true;
-}
+bool task_will_free_mem(struct task_struct *task);
/* sysctls */
extern int sysctl_oom_dump_tasks;
diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 94994810c7c0..a3d90b9da18d 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -28,7 +28,10 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
return __pfn_to_pfn_t(pfn, 0);
}
-extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags);
+static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
+{
+ return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
+}
static inline bool pfn_t_has_page(pfn_t pfn)
{
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index d921afd5f109..12343caa114e 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -175,6 +175,8 @@ int pinconf_generic_dt_subnode_to_map(struct pinctrl_dev *pctldev,
int pinconf_generic_dt_node_to_map(struct pinctrl_dev *pctldev,
struct device_node *np_config, struct pinctrl_map **map,
unsigned *num_maps, enum pinctrl_map_type type);
+void pinconf_generic_dt_free_map(struct pinctrl_dev *pctldev,
+ struct pinctrl_map *map, unsigned num_maps);
static inline int pinconf_generic_dt_node_to_map_group(
struct pinctrl_dev *pctldev, struct device_node *np_config,
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 57d146fe44dd..e856c2cb0fe8 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -26,47 +26,35 @@
* calling these symbols with arch_has_pmem_api() and redirect to the
* implementation in asm/pmem.h.
*/
-static inline bool __arch_has_wmb_pmem(void)
-{
- return false;
-}
-
-static inline void arch_wmb_pmem(void)
-{
- BUG();
-}
-
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
- size_t n)
+static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
BUG();
}
-static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
- size_t n)
+static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
BUG();
return -EFAULT;
}
-static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
BUG();
return 0;
}
-static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+static inline void arch_clear_pmem(void *addr, size_t size)
{
BUG();
}
-static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
BUG();
}
-static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
+static inline void arch_invalidate_pmem(void *addr, size_t size)
{
BUG();
}
@@ -77,13 +65,6 @@ static inline bool arch_has_pmem_api(void)
return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
}
-static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
- size_t size)
-{
- memcpy(dst, (void __force *) src, size);
- return 0;
-}
-
/*
* memcpy_from_pmem - read from persistent memory with error handling
* @dst: destination buffer
@@ -92,54 +73,13 @@ static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
*
* Returns 0 on success negative error code on failure.
*/
-static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
- size_t size)
+static inline int memcpy_from_pmem(void *dst, void const *src, size_t size)
{
if (arch_has_pmem_api())
return arch_memcpy_from_pmem(dst, src, size);
else
- return default_memcpy_from_pmem(dst, src, size);
-}
-
-/**
- * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
- *
- * For a given cpu implementation within an architecture it is possible
- * that wmb_pmem() resolves to a nop. In the case this returns
- * false, pmem api users are unable to ensure durability and may want to
- * fall back to a different data consistency model, or otherwise notify
- * the user.
- */
-static inline bool arch_has_wmb_pmem(void)
-{
- return arch_has_pmem_api() && __arch_has_wmb_pmem();
-}
-
-/*
- * These defaults seek to offer decent performance and minimize the
- * window between i/o completion and writes being durable on media.
- * However, it is undefined / architecture specific whether
- * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
- * making data durable relative to i/o completion.
- */
-static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
- size_t size)
-{
- memcpy((void __force *) dst, src, size);
-}
-
-static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
- size_t bytes, struct iov_iter *i)
-{
- return copy_from_iter_nocache((void __force *)addr, bytes, i);
-}
-
-static inline void default_clear_pmem(void __pmem *addr, size_t size)
-{
- if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
- clear_page((void __force *)addr);
- else
- memset((void __force *)addr, 0, size);
+ memcpy(dst, src, size);
+ return 0;
}
/**
@@ -152,29 +92,14 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
* being effectively evicted from, or never written to, the processor
* cache hierarchy after the copy completes. After memcpy_to_pmem()
* data may still reside in cpu or platform buffers, so this operation
- * must be followed by a wmb_pmem().
+ * must be followed by a blkdev_issue_flush() on the pmem block device.
*/
-static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
+static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
{
if (arch_has_pmem_api())
arch_memcpy_to_pmem(dst, src, n);
else
- default_memcpy_to_pmem(dst, src, n);
-}
-
-/**
- * wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of memcpy_to_pmem() operations this drains data from
- * cpu write buffers and any platform (memory controller) buffers to
- * ensure that written data is durable on persistent memory media.
- */
-static inline void wmb_pmem(void)
-{
- if (arch_has_wmb_pmem())
- arch_wmb_pmem();
- else
- wmb();
+ memcpy(dst, src, n);
}
/**
@@ -184,14 +109,14 @@ static inline void wmb_pmem(void)
* @i: iterator with source data
*
* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
*/
-static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+static inline size_t copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
if (arch_has_pmem_api())
return arch_copy_from_iter_pmem(addr, bytes, i);
- return default_copy_from_iter_pmem(addr, bytes, i);
+ return copy_from_iter_nocache(addr, bytes, i);
}
/**
@@ -200,14 +125,14 @@ static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
* @size: number of bytes to zero
*
* Write zeros into the memory range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
*/
-static inline void clear_pmem(void __pmem *addr, size_t size)
+static inline void clear_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_clear_pmem(addr, size);
else
- default_clear_pmem(addr, size);
+ memset(addr, 0, size);
}
/**
@@ -218,7 +143,7 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
* For platforms that support clearing poison this flushes any poisoned
* ranges out of the cache
*/
-static inline void invalidate_pmem(void __pmem *addr, size_t size)
+static inline void invalidate_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_invalidate_pmem(addr, size);
@@ -230,9 +155,9 @@ static inline void invalidate_pmem(void __pmem *addr, size_t size)
* @size: number of bytes to write back
*
* Write back the processor cache range starting at 'addr' for 'size' bytes.
- * This function requires explicit ordering with a wmb_pmem() call.
+ * See blkdev_issue_flush() note for memcpy_to_pmem().
*/
-static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+static inline void wb_cache_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_wb_cache_pmem(addr, size);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 9dfb6bce8c9e..8486d27cf360 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -200,8 +200,8 @@ struct mem_dqblk {
qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */
qsize_t dqb_isoftlimit; /* preferred inode limit */
qsize_t dqb_curinodes; /* current # allocated inodes */
- time_t dqb_btime; /* time limit for excessive disk use */
- time_t dqb_itime; /* time limit for excessive inode use */
+ time64_t dqb_btime; /* time limit for excessive disk use */
+ time64_t dqb_itime; /* time limit for excessive inode use */
};
/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d99218a1e043..553af2923824 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -523,6 +523,7 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_HAS_UPROBES 19 /* has uprobes */
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
#define MMF_OOM_REAPED 21 /* mm has been already reaped */
+#define MMF_OOM_NOT_REAPABLE 22 /* mm couldn't be reaped */
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
@@ -1949,6 +1950,32 @@ static inline int tsk_nr_cpus_allowed(struct task_struct *p)
#define TNF_FAULT_LOCAL 0x08
#define TNF_MIGRATE_FAIL 0x10
+static inline bool in_vfork(struct task_struct *tsk)
+{
+ bool ret;
+
+ /*
+ * need RCU to access ->real_parent if CLONE_VM was used along with
+ * CLONE_PARENT.
+ *
+ * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
+ * imply CLONE_VM
+ *
+ * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
+ * ->real_parent is not necessarily the task doing vfork(), so in
+ * theory we can't rely on task_lock() if we want to dereference it.
+ *
+ * And in this case we can't trust the real_parent->mm == tsk->mm
+ * check, it can be false negative. But we do not care, if init or
+ * another oom-unkillable task does this it should blame itself.
+ */
+ rcu_read_lock();
+ ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
+ rcu_read_unlock();
+
+ return ret;
+}
+
#ifdef CONFIG_NUMA_BALANCING
extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 339ba027ade9..4ad2c5a26399 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -88,7 +88,8 @@ struct kmem_cache {
};
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
- void *x) {
+ void *x)
+{
void *object = x - (x - page->s_mem) % cache->size;
void *last_object = page->s_mem + (cache->num - 1) * cache->size;
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5624c1f3eb0a..75f56c2ef2d4 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -104,6 +104,10 @@ struct kmem_cache {
unsigned int *random_seq;
#endif
+#ifdef CONFIG_KASAN
+ struct kasan_cache kasan_info;
+#endif
+
struct kmem_cache_node *node[MAX_NUMNODES];
};
@@ -119,15 +123,17 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason);
+void *fixup_red_left(struct kmem_cache *s, void *p);
+
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
void *x) {
void *object = x - (x - page_address(page)) % cache->size;
void *last_object = page_address(page) +
(page->objects - 1) * cache->size;
- if (unlikely(object > last_object))
- return last_object;
- else
- return object;
+ void *result = (unlikely(object > last_object)) ? last_object : object;
+
+ result = fixup_red_left(cache, result);
+ return result;
}
#endif /* _LINUX_SLUB_DEF_H */
diff --git a/include/linux/stringhash.h b/include/linux/stringhash.h
index 451771d9b9c0..7c2d95170d01 100644
--- a/include/linux/stringhash.h
+++ b/include/linux/stringhash.h
@@ -3,6 +3,7 @@
#include <linux/compiler.h> /* For __pure */
#include <linux/types.h> /* For u32, u64 */
+#include <linux/hash.h>
/*
* Routines for hashing strings of bytes to a 32-bit hash value.
@@ -34,7 +35,7 @@
*/
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
-#define init_name_hash() 0
+#define init_name_hash(salt) (unsigned long)(salt)
/* partial hash update function. Assume roughly 4 bits per character */
static inline unsigned long
@@ -45,11 +46,12 @@ partial_name_hash(unsigned long c, unsigned long prevhash)
/*
* Finally: cut down the number of bits to a int value (and try to avoid
- * losing bits)
+ * losing bits). This also has the property (wanted by the dcache)
+ * that the msbits make a good hash table index.
*/
static inline unsigned long end_name_hash(unsigned long hash)
{
- return (unsigned int)hash;
+ return __hash_32((unsigned int)hash);
}
/*
@@ -60,7 +62,7 @@ static inline unsigned long end_name_hash(unsigned long hash)
*
* If not set, this falls back to a wrapper around the preceding.
*/
-extern unsigned int __pure full_name_hash(const char *, unsigned int);
+extern unsigned int __pure full_name_hash(const void *salt, const char *, unsigned int);
/*
* A hash_len is a u64 with the hash of a string in the low
@@ -71,6 +73,6 @@ extern unsigned int __pure full_name_hash(const char *, unsigned int);
#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash))
/* Return the "hash_len" (hash and length) of a null-terminated string */
-extern u64 __pure hashlen_string(const char *name);
+extern u64 __pure hashlen_string(const void *salt, const char *name);
#endif /* __LINUX_STRINGHASH_H */
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 91d5a5d6f52b..d03932055328 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -172,12 +172,12 @@ extern void unix_gid_cache_destroy(struct net *net);
*/
static inline unsigned long hash_str(char const *name, int bits)
{
- return hashlen_hash(hashlen_string(name)) >> (32 - bits);
+ return hashlen_hash(hashlen_string(NULL, name)) >> (32 - bits);
}
static inline unsigned long hash_mem(char const *buf, int length, int bits)
{
- return full_name_hash(buf, length) >> (32 - bits);
+ return full_name_hash(NULL, buf, length) >> (32 - bits);
}
#endif /* __KERNEL__ */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0af2bb2028fd..b17cc4830fa6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -157,15 +157,6 @@ enum {
#define SWAP_CLUSTER_MAX 32UL
#define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
-/*
- * Ratio between zone->managed_pages and the "gap" that above the per-zone
- * "high_wmark". While balancing nodes, We allow kswapd to shrink zones that
- * do not meet the (high_wmark + gap) watermark, even which already met the
- * high_wmark, in order to provide better per-zone lru behavior. We are ok to
- * spend not more than 1% of the memory for this zone balancing "gap".
- */
-#define KSWAPD_ZONE_BALANCE_GAP_RATIO 100
-
#define SWAP_MAP_MAX 0x3e /* Max duplication count, in first swap_map */
#define SWAP_MAP_BAD 0x3f /* Note pageblock is bad, in first swap_map */
#define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */
@@ -317,6 +308,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
/* linux/mm/vmscan.c */
extern unsigned long zone_reclaimable_pages(struct zone *zone);
+extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask, nodemask_t *mask);
extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
@@ -324,9 +316,9 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
bool may_swap);
-extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
- struct zone *zone,
+ pg_data_t *pgdat,
unsigned long *nr_scanned);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
@@ -334,13 +326,14 @@ extern int remove_mapping(struct address_space *mapping, struct page *page);
extern unsigned long vm_total_pages;
#ifdef CONFIG_NUMA
-extern int zone_reclaim_mode;
+extern int node_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio;
-extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
+extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
#else
-#define zone_reclaim_mode 0
-static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
+#define node_reclaim_mode 0
+static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
+ unsigned int order)
{
return 0;
}
diff --git a/include/linux/topology.h b/include/linux/topology.h
index afce69296ac0..cb0775e1ee4b 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -54,7 +54,7 @@ int arch_update_cpu_topology(void);
/*
* If the distance between nodes in a system is larger than RECLAIM_DISTANCE
* (in whatever arch specific measurement units returned by node_distance())
- * and zone_reclaim_mode is enabled then the VM will only call zone_reclaim()
+ * and node_reclaim_mode is enabled then the VM will only call node_reclaim()
* on nodes within this distance.
*/
#define RECLAIM_DISTANCE 30
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 42604173f122..4d6ec58a8d45 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -23,21 +23,23 @@
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGALLOC),
+ FOR_ALL_ZONES(ALLOCSTALL),
+ FOR_ALL_ZONES(PGSCAN_SKIP),
PGFREE, PGACTIVATE, PGDEACTIVATE,
PGFAULT, PGMAJFAULT,
PGLAZYFREED,
- FOR_ALL_ZONES(PGREFILL),
- FOR_ALL_ZONES(PGSTEAL_KSWAPD),
- FOR_ALL_ZONES(PGSTEAL_DIRECT),
- FOR_ALL_ZONES(PGSCAN_KSWAPD),
- FOR_ALL_ZONES(PGSCAN_DIRECT),
+ PGREFILL,
+ PGSTEAL_KSWAPD,
+ PGSTEAL_DIRECT,
+ PGSCAN_KSWAPD,
+ PGSCAN_DIRECT,
PGSCAN_DIRECT_THROTTLE,
#ifdef CONFIG_NUMA
PGSCAN_ZONE_RECLAIM_FAILED,
#endif
PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
- PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+ PAGEOUTRUN, PGROTATED,
DROP_PAGECACHE, DROP_SLAB,
#ifdef CONFIG_NUMA_BALANCING
NUMA_PTE_UPDATES,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d2da8e053210..613771909b6e 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -101,25 +101,42 @@ static inline void vm_events_fold_cpu(int cpu)
#define count_vm_vmacache_event(x) do {} while (0)
#endif
-#define __count_zone_vm_events(item, zone, delta) \
- __count_vm_events(item##_NORMAL - ZONE_NORMAL + \
- zone_idx(zone), delta)
+#define __count_zid_vm_events(item, zid, delta) \
+ __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta)
/*
- * Zone based page accounting with per cpu differentials.
+ * Zone and node-based page accounting with per cpu differentials.
*/
-extern atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
+extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
+extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
static inline void zone_page_state_add(long x, struct zone *zone,
enum zone_stat_item item)
{
atomic_long_add(x, &zone->vm_stat[item]);
- atomic_long_add(x, &vm_stat[item]);
+ atomic_long_add(x, &vm_zone_stat[item]);
+}
+
+static inline void node_page_state_add(long x, struct pglist_data *pgdat,
+ enum node_stat_item item)
+{
+ atomic_long_add(x, &pgdat->vm_stat[item]);
+ atomic_long_add(x, &vm_node_stat[item]);
}
static inline unsigned long global_page_state(enum zone_stat_item item)
{
- long x = atomic_long_read(&vm_stat[item]);
+ long x = atomic_long_read(&vm_zone_stat[item]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
+}
+
+static inline unsigned long global_node_page_state(enum node_stat_item item)
+{
+ long x = atomic_long_read(&vm_node_stat[item]);
#ifdef CONFIG_SMP
if (x < 0)
x = 0;
@@ -160,32 +177,61 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
return x;
}
-#ifdef CONFIG_NUMA
+static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat,
+ enum node_stat_item item)
+{
+ long x = atomic_long_read(&pgdat->vm_stat[item]);
-extern unsigned long node_page_state(int node, enum zone_stat_item item);
+#ifdef CONFIG_SMP
+ int cpu;
+ for_each_online_cpu(cpu)
+ x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item];
-#else
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
+}
-#define node_page_state(node, item) global_page_state(item)
+#ifdef CONFIG_NUMA
+extern unsigned long sum_zone_node_page_state(int node,
+ enum zone_stat_item item);
+extern unsigned long node_page_state(struct pglist_data *pgdat,
+ enum node_stat_item item);
+#else
+#define sum_zone_node_page_state(node, item) global_page_state(item)
+#define node_page_state(node, item) global_node_page_state(item)
#endif /* CONFIG_NUMA */
#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
+#define add_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, __d)
+#define sub_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, -(__d))
#ifdef CONFIG_SMP
void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long);
void __inc_zone_page_state(struct page *, enum zone_stat_item);
void __dec_zone_page_state(struct page *, enum zone_stat_item);
+void __mod_node_page_state(struct pglist_data *, enum node_stat_item item, long);
+void __inc_node_page_state(struct page *, enum node_stat_item);
+void __dec_node_page_state(struct page *, enum node_stat_item);
+
void mod_zone_page_state(struct zone *, enum zone_stat_item, long);
void inc_zone_page_state(struct page *, enum zone_stat_item);
void dec_zone_page_state(struct page *, enum zone_stat_item);
-extern void inc_zone_state(struct zone *, enum zone_stat_item);
+void mod_node_page_state(struct pglist_data *, enum node_stat_item, long);
+void inc_node_page_state(struct page *, enum node_stat_item);
+void dec_node_page_state(struct page *, enum node_stat_item);
+
+extern void inc_node_state(struct pglist_data *, enum node_stat_item);
extern void __inc_zone_state(struct zone *, enum zone_stat_item);
+extern void __inc_node_state(struct pglist_data *, enum node_stat_item);
extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
+extern void __dec_node_state(struct pglist_data *, enum node_stat_item);
void quiet_vmstat(void);
void cpu_vm_stats_fold(int cpu);
@@ -213,16 +259,34 @@ static inline void __mod_zone_page_state(struct zone *zone,
zone_page_state_add(delta, zone, item);
}
+static inline void __mod_node_page_state(struct pglist_data *pgdat,
+ enum node_stat_item item, int delta)
+{
+ node_page_state_add(delta, pgdat, item);
+}
+
static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
{
atomic_long_inc(&zone->vm_stat[item]);
- atomic_long_inc(&vm_stat[item]);
+ atomic_long_inc(&vm_zone_stat[item]);
+}
+
+static inline void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
+{
+ atomic_long_inc(&pgdat->vm_stat[item]);
+ atomic_long_inc(&vm_node_stat[item]);
}
static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
{
atomic_long_dec(&zone->vm_stat[item]);
- atomic_long_dec(&vm_stat[item]);
+ atomic_long_dec(&vm_zone_stat[item]);
+}
+
+static inline void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
+{
+ atomic_long_dec(&pgdat->vm_stat[item]);
+ atomic_long_dec(&vm_node_stat[item]);
}
static inline void __inc_zone_page_state(struct page *page,
@@ -231,12 +295,26 @@ static inline void __inc_zone_page_state(struct page *page,
__inc_zone_state(page_zone(page), item);
}
+static inline void __inc_node_page_state(struct page *page,
+ enum node_stat_item item)
+{
+ __inc_node_state(page_pgdat(page), item);
+}
+
+
static inline void __dec_zone_page_state(struct page *page,
enum zone_stat_item item)
{
__dec_zone_state(page_zone(page), item);
}
+static inline void __dec_node_page_state(struct page *page,
+ enum node_stat_item item)
+{
+ __dec_node_state(page_pgdat(page), item);
+}
+
+
/*
* We only use atomic operations to update counters. So there is no need to
* disable interrupts.
@@ -245,7 +323,12 @@ static inline void __dec_zone_page_state(struct page *page,
#define dec_zone_page_state __dec_zone_page_state
#define mod_zone_page_state __mod_zone_page_state
+#define inc_node_page_state __inc_node_page_state
+#define dec_node_page_state __dec_node_page_state
+#define mod_node_page_state __mod_node_page_state
+
#define inc_zone_state __inc_zone_state
+#define inc_node_state __inc_node_state
#define dec_zone_state __dec_zone_state
#define set_pgdat_percpu_threshold(pgdat, callback) { }
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 717e6149e753..fc1e16c25a29 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -320,7 +320,7 @@ void laptop_mode_timer_fn(unsigned long data);
static inline void laptop_sync_completion(void) { }
#endif
void throttle_vm_writeout(gfp_t gfp_mask);
-bool zone_dirty_ok(struct zone *zone);
+bool node_dirty_ok(struct pglist_data *pgdat);
int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
#ifdef CONFIG_CGROUP_WRITEBACK
void wb_domain_exit(struct wb_domain *dom);