diff options
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r-- | include/linux/mmzone.h | 106 |
1 files changed, 79 insertions, 27 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ef6a13b7bd3e..c8f89417740b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -114,6 +114,20 @@ struct zone_padding { #define ZONE_PADDING(name) #endif +#ifdef CONFIG_NUMA +enum numa_stat_item { + NUMA_HIT, /* allocated in intended node */ + NUMA_MISS, /* allocated in non intended node */ + NUMA_FOREIGN, /* was intended here, hit elsewhere */ + NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ + NUMA_LOCAL, /* allocation from local node */ + NUMA_OTHER, /* allocation from other node */ + NR_VM_NUMA_STAT_ITEMS +}; +#else +#define NR_VM_NUMA_STAT_ITEMS 0 +#endif + enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, @@ -125,8 +139,6 @@ enum zone_stat_item { NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ - NR_SLAB_RECLAIMABLE, - NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_KERNEL_STACK_KB, /* measured in KiB */ /* Second 128 byte cacheline */ @@ -134,14 +146,6 @@ enum zone_stat_item { #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif -#ifdef CONFIG_NUMA - NUMA_HIT, /* allocated in intended node */ - NUMA_MISS, /* allocated in non intended node */ - NUMA_FOREIGN, /* was intended here, hit elsewhere */ - NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ - NUMA_LOCAL, /* allocation from local node */ - NUMA_OTHER, /* allocation from other node */ -#endif NR_FREE_CMA_PAGES, NR_VM_ZONE_STAT_ITEMS }; @@ -152,6 +156,8 @@ enum node_stat_item { NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */ + NR_SLAB_RECLAIMABLE, + NR_SLAB_UNRECLAIMABLE, NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ WORKINGSET_REFAULT, @@ -276,6 +282,7 @@ struct per_cpu_pageset { struct per_cpu_pages pcp; #ifdef CONFIG_NUMA s8 expire; + u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; #endif #ifdef CONFIG_SMP s8 stat_threshold; @@ -496,6 +503,7 @@ struct zone { ZONE_PADDING(_pad3_) /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; + atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; } ____cacheline_internodealigned_in_smp; enum pgdat_flags { @@ -533,6 +541,22 @@ static inline bool zone_is_empty(struct zone *zone) } /* + * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty + * intersection with the given zone + */ +static inline bool zone_intersects(struct zone *zone, + unsigned long start_pfn, unsigned long nr_pages) +{ + if (zone_is_empty(zone)) + return false; + if (start_pfn >= zone_end_pfn(zone) || + start_pfn + nr_pages <= zone->zone_start_pfn) + return false; + + return true; +} + +/* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the * queues ("queue_length >> 12") during an aging round. @@ -587,12 +611,9 @@ extern struct page *mem_map; #endif /* - * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM - * (mostly NUMA machines?) to denote a higher-level memory zone than the - * zone denotes. - * * On NUMA machines, each NUMA node would have a pg_data_t to describe - * it's memory layout. + * it's memory layout. On UMA machines there is a single pglist_data which + * describes the whole memory. * * Memory statistics and page replacement data structures are maintained on a * per-zone basis. @@ -757,8 +778,7 @@ static inline bool is_dev_zone(const struct zone *zone) #include <linux/memory_hotplug.h> -extern struct mutex zonelists_mutex; -void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); +void build_all_zonelists(pg_data_t *pgdat); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, unsigned int alloc_flags, @@ -772,7 +792,7 @@ enum memmap_context { MEMMAP_EARLY, MEMMAP_HOTPLUG, }; -extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, +extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, unsigned long size); extern void lruvec_init(struct lruvec *lruvec); @@ -883,7 +903,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, extern int numa_zonelist_order_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern char numa_zonelist_order[]; -#define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ +#define NUMA_ZONELIST_ORDER_LEN 16 #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -1042,6 +1062,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) static inline unsigned long early_pfn_to_nid(unsigned long pfn) { + BUILD_BUG_ON(IS_ENABLED(CONFIG_NUMA)); return 0; } #endif @@ -1073,8 +1094,14 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif -#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) -#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) +static inline unsigned long pfn_to_section_nr(unsigned long pfn) +{ + return pfn >> PFN_SECTION_SHIFT; +} +static inline unsigned long section_nr_to_pfn(unsigned long sec) +{ + return sec << PFN_SECTION_SHIFT; +} #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) @@ -1144,9 +1171,10 @@ extern unsigned long usemap_size(void); */ #define SECTION_MARKED_PRESENT (1UL<<0) #define SECTION_HAS_MEM_MAP (1UL<<1) -#define SECTION_MAP_LAST_BIT (1UL<<2) +#define SECTION_IS_ONLINE (1UL<<2) +#define SECTION_MAP_LAST_BIT (1UL<<3) #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) -#define SECTION_NID_SHIFT 2 +#define SECTION_NID_SHIFT 3 static inline struct page *__section_mem_map_addr(struct mem_section *section) { @@ -1175,11 +1203,30 @@ static inline int valid_section_nr(unsigned long nr) return valid_section(__nr_to_section(nr)); } +static inline int online_section(struct mem_section *section) +{ + return (section && (section->section_mem_map & SECTION_IS_ONLINE)); +} + +static inline int online_section_nr(unsigned long nr) +{ + return online_section(__nr_to_section(nr)); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn); +#ifdef CONFIG_MEMORY_HOTREMOVE +void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn); +#endif +#endif + static inline struct mem_section *__pfn_to_section(unsigned long pfn) { return __nr_to_section(pfn_to_section_nr(pfn)); } +extern int __highest_present_section_nr; + #ifndef CONFIG_HAVE_ARCH_PFN_VALID static inline int pfn_valid(unsigned long pfn) { @@ -1251,10 +1298,15 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL /* * pfn_valid() is meant to be able to tell if a given PFN has valid memmap - * associated with it or not. In FLATMEM, it is expected that holes always - * have valid memmap as long as there is valid PFNs either side of the hole. - * In SPARSEMEM, it is assumed that a valid section has a memmap for the - * entire section. + * associated with it or not. This means that a struct page exists for this + * pfn. The caller cannot assume the page is fully initialized in general. + * Hotplugable pages might not have been onlined yet. pfn_to_online_page() + * will ensure the struct page is fully online and initialized. Special pages + * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly. + * + * In FLATMEM, it is expected that holes always have valid memmap as long as + * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed + * that a valid section has a memmap for the entire section. * * However, an ARM, and maybe other embedded architectures in the future * free memmap backing holes to save memory on the assumption the memmap is |