summaryrefslogtreecommitdiff
path: root/include/linux/mmzone.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r--include/linux/mmzone.h106
1 files changed, 79 insertions, 27 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ef6a13b7bd3e..c8f89417740b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -114,6 +114,20 @@ struct zone_padding {
#define ZONE_PADDING(name)
#endif
+#ifdef CONFIG_NUMA
+enum numa_stat_item {
+ NUMA_HIT, /* allocated in intended node */
+ NUMA_MISS, /* allocated in non intended node */
+ NUMA_FOREIGN, /* was intended here, hit elsewhere */
+ NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
+ NUMA_LOCAL, /* allocation from local node */
+ NUMA_OTHER, /* allocation from other node */
+ NR_VM_NUMA_STAT_ITEMS
+};
+#else
+#define NR_VM_NUMA_STAT_ITEMS 0
+#endif
+
enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
@@ -125,8 +139,6 @@ enum zone_stat_item {
NR_ZONE_UNEVICTABLE,
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
- NR_SLAB_RECLAIMABLE,
- NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
/* Second 128 byte cacheline */
@@ -134,14 +146,6 @@ enum zone_stat_item {
#if IS_ENABLED(CONFIG_ZSMALLOC)
NR_ZSPAGES, /* allocated in zsmalloc */
#endif
-#ifdef CONFIG_NUMA
- NUMA_HIT, /* allocated in intended node */
- NUMA_MISS, /* allocated in non intended node */
- NUMA_FOREIGN, /* was intended here, hit elsewhere */
- NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
- NUMA_LOCAL, /* allocation from local node */
- NUMA_OTHER, /* allocation from other node */
-#endif
NR_FREE_CMA_PAGES,
NR_VM_ZONE_STAT_ITEMS };
@@ -152,6 +156,8 @@ enum node_stat_item {
NR_INACTIVE_FILE, /* " " " " " */
NR_ACTIVE_FILE, /* " " " " " */
NR_UNEVICTABLE, /* " " " " " */
+ NR_SLAB_RECLAIMABLE,
+ NR_SLAB_UNRECLAIMABLE,
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
WORKINGSET_REFAULT,
@@ -276,6 +282,7 @@ struct per_cpu_pageset {
struct per_cpu_pages pcp;
#ifdef CONFIG_NUMA
s8 expire;
+ u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
#endif
#ifdef CONFIG_SMP
s8 stat_threshold;
@@ -496,6 +503,7 @@ struct zone {
ZONE_PADDING(_pad3_)
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
+ atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
} ____cacheline_internodealigned_in_smp;
enum pgdat_flags {
@@ -533,6 +541,22 @@ static inline bool zone_is_empty(struct zone *zone)
}
/*
+ * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty
+ * intersection with the given zone
+ */
+static inline bool zone_intersects(struct zone *zone,
+ unsigned long start_pfn, unsigned long nr_pages)
+{
+ if (zone_is_empty(zone))
+ return false;
+ if (start_pfn >= zone_end_pfn(zone) ||
+ start_pfn + nr_pages <= zone->zone_start_pfn)
+ return false;
+
+ return true;
+}
+
+/*
* The "priority" of VM scanning is how much of the queues we will scan in one
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
* queues ("queue_length >> 12") during an aging round.
@@ -587,12 +611,9 @@ extern struct page *mem_map;
#endif
/*
- * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
- * (mostly NUMA machines?) to denote a higher-level memory zone than the
- * zone denotes.
- *
* On NUMA machines, each NUMA node would have a pg_data_t to describe
- * it's memory layout.
+ * it's memory layout. On UMA machines there is a single pglist_data which
+ * describes the whole memory.
*
* Memory statistics and page replacement data structures are maintained on a
* per-zone basis.
@@ -757,8 +778,7 @@ static inline bool is_dev_zone(const struct zone *zone)
#include <linux/memory_hotplug.h>
-extern struct mutex zonelists_mutex;
-void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);
+void build_all_zonelists(pg_data_t *pgdat);
void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int classzone_idx, unsigned int alloc_flags,
@@ -772,7 +792,7 @@ enum memmap_context {
MEMMAP_EARLY,
MEMMAP_HOTPLUG,
};
-extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
+extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
unsigned long size);
extern void lruvec_init(struct lruvec *lruvec);
@@ -883,7 +903,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
extern int numa_zonelist_order_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern char numa_zonelist_order[];
-#define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */
+#define NUMA_ZONELIST_ORDER_LEN 16
#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -1042,6 +1062,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
!defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
static inline unsigned long early_pfn_to_nid(unsigned long pfn)
{
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA));
return 0;
}
#endif
@@ -1073,8 +1094,14 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
#error Allocator MAX_ORDER exceeds SECTION_SIZE
#endif
-#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
-#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+static inline unsigned long pfn_to_section_nr(unsigned long pfn)
+{
+ return pfn >> PFN_SECTION_SHIFT;
+}
+static inline unsigned long section_nr_to_pfn(unsigned long sec)
+{
+ return sec << PFN_SECTION_SHIFT;
+}
#define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
@@ -1144,9 +1171,10 @@ extern unsigned long usemap_size(void);
*/
#define SECTION_MARKED_PRESENT (1UL<<0)
#define SECTION_HAS_MEM_MAP (1UL<<1)
-#define SECTION_MAP_LAST_BIT (1UL<<2)
+#define SECTION_IS_ONLINE (1UL<<2)
+#define SECTION_MAP_LAST_BIT (1UL<<3)
#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
-#define SECTION_NID_SHIFT 2
+#define SECTION_NID_SHIFT 3
static inline struct page *__section_mem_map_addr(struct mem_section *section)
{
@@ -1175,11 +1203,30 @@ static inline int valid_section_nr(unsigned long nr)
return valid_section(__nr_to_section(nr));
}
+static inline int online_section(struct mem_section *section)
+{
+ return (section && (section->section_mem_map & SECTION_IS_ONLINE));
+}
+
+static inline int online_section_nr(unsigned long nr)
+{
+ return online_section(__nr_to_section(nr));
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
+#ifdef CONFIG_MEMORY_HOTREMOVE
+void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
+#endif
+#endif
+
static inline struct mem_section *__pfn_to_section(unsigned long pfn)
{
return __nr_to_section(pfn_to_section_nr(pfn));
}
+extern int __highest_present_section_nr;
+
#ifndef CONFIG_HAVE_ARCH_PFN_VALID
static inline int pfn_valid(unsigned long pfn)
{
@@ -1251,10 +1298,15 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
/*
* pfn_valid() is meant to be able to tell if a given PFN has valid memmap
- * associated with it or not. In FLATMEM, it is expected that holes always
- * have valid memmap as long as there is valid PFNs either side of the hole.
- * In SPARSEMEM, it is assumed that a valid section has a memmap for the
- * entire section.
+ * associated with it or not. This means that a struct page exists for this
+ * pfn. The caller cannot assume the page is fully initialized in general.
+ * Hotplugable pages might not have been onlined yet. pfn_to_online_page()
+ * will ensure the struct page is fully online and initialized. Special pages
+ * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly.
+ *
+ * In FLATMEM, it is expected that holes always have valid memmap as long as
+ * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed
+ * that a valid section has a memmap for the entire section.
*
* However, an ARM, and maybe other embedded architectures in the future
* free memmap backing holes to save memory on the assumption the memmap is