diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-12 11:40:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-12 11:40:28 -0700 |
commit | ef8f3d48afd6a17a0dae8c277c2f539c2f19fd16 (patch) | |
tree | 5c28f9f287a552ad1a655b9e29e5330966652e89 /mm/page_alloc.c | |
parent | d7d170a8e357bd9926cc6bfea5c2385c2eac65b2 (diff) | |
parent | 2c207985f354dfb549e5a543102a3e084eea81f6 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
"Am experimenting with splitting MM up into identifiable subsystems
perhaps with a view to gitifying it in complex ways. Also with more
verbose "incoming" emails.
Most of MM is here and a few other trees.
Subsystems affected by this patch series:
- hotfixes
- iommu
- scripts
- arch/sh
- ocfs2
- mm:slab-generic
- mm:slub
- mm:kmemleak
- mm:kasan
- mm:cleanups
- mm:debug
- mm:pagecache
- mm:swap
- mm:memcg
- mm:gup
- mm:pagemap
- mm:infrastructure
- mm:vmalloc
- mm:initialization
- mm:pagealloc
- mm:vmscan
- mm:tools
- mm:proc
- mm:ras
- mm:oom-kill
hotfixes:
mm: vmscan: scan anonymous pages on file refaults
mm/nvdimm: add is_ioremap_addr and use that to check ioremap address
mm/memcontrol: fix wrong statistics in memory.stat
mm/z3fold.c: lock z3fold page before __SetPageMovable()
nilfs2: do not use unexported cpu_to_le32()/le32_to_cpu() in uapi header
MAINTAINERS: nilfs2: update email address
iommu:
include/linux/dmar.h: replace single-char identifiers in macros
scripts:
scripts/decode_stacktrace: match basepath using shell prefix operator, not regex
scripts/decode_stacktrace: look for modules with .ko.debug extension
scripts/spelling.txt: drop "sepc" from the misspelling list
scripts/spelling.txt: add spelling fix for prohibited
scripts/decode_stacktrace: Accept dash/underscore in modules
scripts/spelling.txt: add more spellings to spelling.txt
arch/sh:
arch/sh/configs/sdk7786_defconfig: remove CONFIG_LOGFS
sh: config: remove left-over BACKLIGHT_LCD_SUPPORT
sh: prevent warnings when using iounmap
ocfs2:
fs: ocfs: fix spelling mistake "hearbeating" -> "heartbeat"
ocfs2/dlm: use struct_size() helper
ocfs2: add last unlock times in locking_state
ocfs2: add locking filter debugfs file
ocfs2: add first lock wait time in locking_state
ocfs: no need to check return value of debugfs_create functions
fs/ocfs2/dlmglue.c: unneeded variable: "status"
ocfs2: use kmemdup rather than duplicating its implementation
mm:slab-generic:
Patch series "mm/slab: Improved sanity checking":
mm/slab: validate cache membership under freelist hardening
mm/slab: sanity-check page type when looking up cache
lkdtm/heap: add tests for freelist hardening
mm:slub:
mm/slub.c: avoid double string traverse in kmem_cache_flags()
slub: don't panic for memcg kmem cache creation failure
mm:kmemleak:
mm/kmemleak.c: fix check for softirq context
mm/kmemleak.c: change error at _write when kmemleak is disabled
docs: kmemleak: add more documentation details
mm:kasan:
mm/kasan: print frame description for stack bugs
Patch series "Bitops instrumentation for KASAN", v5:
lib/test_kasan: add bitops tests
x86: use static_cpu_has in uaccess region to avoid instrumentation
asm-generic, x86: add bitops instrumentation for KASAN
Patch series "mm/kasan: Add object validation in ksize()", v3:
mm/kasan: introduce __kasan_check_{read,write}
mm/kasan: change kasan_check_{read,write} to return boolean
lib/test_kasan: Add test for double-kzfree detection
mm/slab: refactor common ksize KASAN logic into slab_common.c
mm/kasan: add object validation in ksize()
mm:cleanups:
include/linux/pfn_t.h: remove pfn_t_to_virt()
Patch series "remove ARCH_SELECT_MEMORY_MODEL where it has no effect":
arm: remove ARCH_SELECT_MEMORY_MODEL
s390: remove ARCH_SELECT_MEMORY_MODEL
sparc: remove ARCH_SELECT_MEMORY_MODEL
mm/gup.c: make follow_page_mask() static
mm/memory.c: trivial clean up in insert_page()
mm: make !CONFIG_HUGE_PAGE wrappers into static inlines
include/linux/mm_types.h: ifdef struct vm_area_struct::swap_readahead_info
mm: remove the account_page_dirtied export
mm/page_isolation.c: change the prototype of undo_isolate_page_range()
include/linux/vmpressure.h: use spinlock_t instead of struct spinlock
mm: remove the exporting of totalram_pages
include/linux/pagemap.h: document trylock_page() return value
mm:debug:
mm/failslab.c: by default, do not fail allocations with direct reclaim only
Patch series "debug_pagealloc improvements":
mm, debug_pagelloc: use static keys to enable debugging
mm, page_alloc: more extensive free page checking with debug_pagealloc
mm, debug_pagealloc: use a page type instead of page_ext flag
mm:pagecache:
Patch series "fix filler_t callback type mismatches", v2:
mm/filemap.c: fix an overly long line in read_cache_page
mm/filemap: don't cast ->readpage to filler_t for do_read_cache_page
jffs2: pass the correct prototype to read_cache_page
9p: pass the correct prototype to read_cache_page
mm/filemap.c: correct the comment about VM_FAULT_RETRY
mm:swap:
mm, swap: fix race between swapoff and some swap operations
mm/swap_state.c: simplify total_swapcache_pages() with get_swap_device()
mm, swap: use rbtree for swap_extent
mm/mincore.c: fix race between swapoff and mincore
mm:memcg:
memcg, oom: no oom-kill for __GFP_RETRY_MAYFAIL
memcg, fsnotify: no oom-kill for remote memcg charging
mm, memcg: introduce memory.events.local
mm: memcontrol: dump memory.stat during cgroup OOM
Patch series "mm: reparent slab memory on cgroup removal", v7:
mm: memcg/slab: postpone kmem_cache memcg pointer initialization to memcg_link_cache()
mm: memcg/slab: rename slab delayed deactivation functions and fields
mm: memcg/slab: generalize postponed non-root kmem_cache deactivation
mm: memcg/slab: introduce __memcg_kmem_uncharge_memcg()
mm: memcg/slab: unify SLAB and SLUB page accounting
mm: memcg/slab: don't check the dying flag on kmem_cache creation
mm: memcg/slab: synchronize access to kmem_cache dying flag using a spinlock
mm: memcg/slab: rework non-root kmem_cache lifecycle management
mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages
mm: memcg/slab: reparent memcg kmem_caches on cgroup removal
mm, memcg: add a memcg_slabinfo debugfs file
mm:gup:
Patch series "switch the remaining architectures to use generic GUP", v4:
mm: use untagged_addr() for get_user_pages_fast addresses
mm: simplify gup_fast_permitted
mm: lift the x86_32 PAE version of gup_get_pte to common code
MIPS: use the generic get_user_pages_fast code
sh: add the missing pud_page definition
sh: use the generic get_user_pages_fast code
sparc64: add the missing pgd_page definition
sparc64: define untagged_addr()
sparc64: use the generic get_user_pages_fast code
mm: rename CONFIG_HAVE_GENERIC_GUP to CONFIG_HAVE_FAST_GUP
mm: reorder code blocks in gup.c
mm: consolidate the get_user_pages* implementations
mm: validate get_user_pages_fast flags
mm: move the powerpc hugepd code to mm/gup.c
mm: switch gup_hugepte to use try_get_compound_head
mm: mark the page referenced in gup_hugepte
mm/gup: speed up check_and_migrate_cma_pages() on huge page
mm/gup.c: remove some BUG_ONs from get_gate_page()
mm/gup.c: mark undo_dev_pagemap as __maybe_unused
mm:pagemap:
asm-generic, x86: introduce generic pte_{alloc,free}_one[_kernel]
alpha: switch to generic version of pte allocation
arm: switch to generic version of pte allocation
arm64: switch to generic version of pte allocation
csky: switch to generic version of pte allocation
m68k: sun3: switch to generic version of pte allocation
mips: switch to generic version of pte allocation
nds32: switch to generic version of pte allocation
nios2: switch to generic version of pte allocation
parisc: switch to generic version of pte allocation
riscv: switch to generic version of pte allocation
um: switch to generic version of pte allocation
unicore32: switch to generic version of pte allocation
mm/pgtable: drop pgtable_t variable from pte_fn_t functions
mm/memory.c: fail when offset == num in first check of __vm_map_pages()
mm:infrastructure:
mm/mmu_notifier: use hlist_add_head_rcu()
mm:vmalloc:
Patch series "Some cleanups for the KVA/vmalloc", v5:
mm/vmalloc.c: remove "node" argument
mm/vmalloc.c: preload a CPU with one object for split purpose
mm/vmalloc.c: get rid of one single unlink_va() when merge
mm/vmalloc.c: switch to WARN_ON() and move it under unlink_va()
mm/vmalloc.c: spelling> s/informaion/information/
mm:initialization:
mm/large system hash: use vmalloc for size > MAX_ORDER when !hashdist
mm/large system hash: clear hashdist when only one node with memory is booted
mm:pagealloc:
arm64: move jump_label_init() before parse_early_param()
Patch series "add init_on_alloc/init_on_free boot options", v10:
mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options
mm: init: report memory auto-initialization features at boot time
mm:vmscan:
mm: vmscan: remove double slab pressure by inc'ing sc->nr_scanned
mm: vmscan: correct some vmscan counters for THP swapout
mm:tools:
tools/vm/slabinfo: order command line options
tools/vm/slabinfo: add partial slab listing to -X
tools/vm/slabinfo: add option to sort by partial slabs
tools/vm/slabinfo: add sorting info to help menu
mm:proc:
proc: use down_read_killable mmap_sem for /proc/pid/maps
proc: use down_read_killable mmap_sem for /proc/pid/smaps_rollup
proc: use down_read_killable mmap_sem for /proc/pid/pagemap
proc: use down_read_killable mmap_sem for /proc/pid/clear_refs
proc: use down_read_killable mmap_sem for /proc/pid/map_files
mm: use down_read_killable for locking mmap_sem in access_remote_vm
mm: smaps: split PSS into components
mm: vmalloc: show number of vmalloc pages in /proc/meminfo
mm:ras:
mm/memory-failure.c: clarify error message
mm:oom-kill:
mm: memcontrol: use CSS_TASK_ITER_PROCS at mem_cgroup_scan_tasks()
mm, oom: refactor dump_tasks for memcg OOMs
mm, oom: remove redundant task_in_mem_cgroup() check
oom: decouple mems_allowed from oom_unkillable_task
mm/oom_kill.c: remove redundant OOM score normalization in select_bad_process()"
* akpm: (147 commits)
mm/oom_kill.c: remove redundant OOM score normalization in select_bad_process()
oom: decouple mems_allowed from oom_unkillable_task
mm, oom: remove redundant task_in_mem_cgroup() check
mm, oom: refactor dump_tasks for memcg OOMs
mm: memcontrol: use CSS_TASK_ITER_PROCS at mem_cgroup_scan_tasks()
mm/memory-failure.c: clarify error message
mm: vmalloc: show number of vmalloc pages in /proc/meminfo
mm: smaps: split PSS into components
mm: use down_read_killable for locking mmap_sem in access_remote_vm
proc: use down_read_killable mmap_sem for /proc/pid/map_files
proc: use down_read_killable mmap_sem for /proc/pid/clear_refs
proc: use down_read_killable mmap_sem for /proc/pid/pagemap
proc: use down_read_killable mmap_sem for /proc/pid/smaps_rollup
proc: use down_read_killable mmap_sem for /proc/pid/maps
tools/vm/slabinfo: add sorting info to help menu
tools/vm/slabinfo: add option to sort by partial slabs
tools/vm/slabinfo: add partial slab listing to -X
tools/vm/slabinfo: order command line options
mm: vmscan: correct some vmscan counters for THP swapout
mm: vmscan: remove double slab pressure by inc'ing sc->nr_scanned
...
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 234 |
1 files changed, 156 insertions, 78 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8e3bc949ebcc..dbd0d5cbbcbb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -50,7 +50,6 @@ #include <linux/backing-dev.h> #include <linux/fault-inject.h> #include <linux/page-isolation.h> -#include <linux/page_ext.h> #include <linux/debugobjects.h> #include <linux/kmemleak.h> #include <linux/compaction.h> @@ -136,6 +135,55 @@ unsigned long totalcma_pages __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; +#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON +DEFINE_STATIC_KEY_TRUE(init_on_alloc); +#else +DEFINE_STATIC_KEY_FALSE(init_on_alloc); +#endif +EXPORT_SYMBOL(init_on_alloc); + +#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON +DEFINE_STATIC_KEY_TRUE(init_on_free); +#else +DEFINE_STATIC_KEY_FALSE(init_on_free); +#endif +EXPORT_SYMBOL(init_on_free); + +static int __init early_init_on_alloc(char *buf) +{ + int ret; + bool bool_result; + + if (!buf) + return -EINVAL; + ret = kstrtobool(buf, &bool_result); + if (bool_result && page_poisoning_enabled()) + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_alloc\n"); + if (bool_result) + static_branch_enable(&init_on_alloc); + else + static_branch_disable(&init_on_alloc); + return ret; +} +early_param("init_on_alloc", early_init_on_alloc); + +static int __init early_init_on_free(char *buf) +{ + int ret; + bool bool_result; + + if (!buf) + return -EINVAL; + ret = kstrtobool(buf, &bool_result); + if (bool_result && page_poisoning_enabled()) + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, will take precedence over init_on_free\n"); + if (bool_result) + static_branch_enable(&init_on_free); + else + static_branch_disable(&init_on_free); + return ret; +} +early_param("init_on_free", early_init_on_free); /* * A cached value of the page's pageblock's migratetype, used when the page is @@ -224,8 +272,6 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = { [ZONE_MOVABLE] = 0, }; -EXPORT_SYMBOL(totalram_pages); - static char * const zone_names[MAX_NR_ZONES] = { #ifdef CONFIG_ZONE_DMA "DMA", @@ -646,30 +692,29 @@ void prep_compound_page(struct page *page, unsigned int order) #ifdef CONFIG_DEBUG_PAGEALLOC unsigned int _debug_guardpage_minorder; -bool _debug_pagealloc_enabled __read_mostly - = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT); + +#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT +DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled); +#else +DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled); +#endif EXPORT_SYMBOL(_debug_pagealloc_enabled); -bool _debug_guardpage_enabled __read_mostly; + +DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled); static int __init early_debug_pagealloc(char *buf) { - if (!buf) + bool enable = false; + + if (kstrtobool(buf, &enable)) return -EINVAL; - return kstrtobool(buf, &_debug_pagealloc_enabled); -} -early_param("debug_pagealloc", early_debug_pagealloc); -static bool need_debug_guardpage(void) -{ - /* If we don't use debug_pagealloc, we don't need guard page */ - if (!debug_pagealloc_enabled()) - return false; + if (enable) + static_branch_enable(&_debug_pagealloc_enabled); - if (!debug_guardpage_minorder()) - return false; - - return true; + return 0; } +early_param("debug_pagealloc", early_debug_pagealloc); static void init_debug_guardpage(void) { @@ -679,14 +724,9 @@ static void init_debug_guardpage(void) if (!debug_guardpage_minorder()) return; - _debug_guardpage_enabled = true; + static_branch_enable(&_debug_guardpage_enabled); } -struct page_ext_operations debug_guardpage_ops = { - .need = need_debug_guardpage, - .init = init_debug_guardpage, -}; - static int __init debug_guardpage_minorder_setup(char *buf) { unsigned long res; @@ -704,20 +744,13 @@ early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup); static inline bool set_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) { - struct page_ext *page_ext; - if (!debug_guardpage_enabled()) return false; if (order >= debug_guardpage_minorder()) return false; - page_ext = lookup_page_ext(page); - if (unlikely(!page_ext)) - return false; - - __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); - + __SetPageGuard(page); INIT_LIST_HEAD(&page->lru); set_page_private(page, order); /* Guard pages are not available for any usage */ @@ -729,23 +762,16 @@ static inline bool set_page_guard(struct zone *zone, struct page *page, static inline void clear_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) { - struct page_ext *page_ext; - if (!debug_guardpage_enabled()) return; - page_ext = lookup_page_ext(page); - if (unlikely(!page_ext)) - return; - - __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); + __ClearPageGuard(page); set_page_private(page, 0); if (!is_migrate_isolate(migratetype)) __mod_zone_freepage_state(zone, (1 << order), migratetype); } #else -struct page_ext_operations debug_guardpage_ops; static inline bool set_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, @@ -1090,6 +1116,14 @@ out: return ret; } +static void kernel_init_free_pages(struct page *page, int numpages) +{ + int i; + + for (i = 0; i < numpages; i++) + clear_highpage(page + i); +} + static __always_inline bool free_pages_prepare(struct page *page, unsigned int order, bool check_free) { @@ -1141,6 +1175,9 @@ static __always_inline bool free_pages_prepare(struct page *page, PAGE_SIZE << order); } arch_free_page(page, order); + if (want_init_on_free()) + kernel_init_free_pages(page, 1 << order); + kernel_poison_pages(page, 1 << order, 0); if (debug_pagealloc_enabled()) kernel_map_pages(page, 1 << order, 0); @@ -1151,19 +1188,36 @@ static __always_inline bool free_pages_prepare(struct page *page, } #ifdef CONFIG_DEBUG_VM -static inline bool free_pcp_prepare(struct page *page) +/* + * With DEBUG_VM enabled, order-0 pages are checked immediately when being freed + * to pcp lists. With debug_pagealloc also enabled, they are also rechecked when + * moved from pcp lists to free lists. + */ +static bool free_pcp_prepare(struct page *page) { return free_pages_prepare(page, 0, true); } -static inline bool bulkfree_pcp_prepare(struct page *page) +static bool bulkfree_pcp_prepare(struct page *page) { - return false; + if (debug_pagealloc_enabled()) + return free_pages_check(page); + else + return false; } #else +/* + * With DEBUG_VM disabled, order-0 pages being freed are checked only when + * moving from pcp lists to free list in order to reduce overhead. With + * debug_pagealloc enabled, they are checked also immediately when being freed + * to the pcp lists. + */ static bool free_pcp_prepare(struct page *page) { - return free_pages_prepare(page, 0, false); + if (debug_pagealloc_enabled()) + return free_pages_prepare(page, 0, true); + else + return free_pages_prepare(page, 0, false); } static bool bulkfree_pcp_prepare(struct page *page) @@ -1904,6 +1958,10 @@ void __init page_alloc_init_late(void) for_each_populated_zone(zone) set_zone_contiguous(zone); + +#ifdef CONFIG_DEBUG_PAGEALLOC + init_debug_guardpage(); +#endif } #ifdef CONFIG_CMA @@ -2021,28 +2079,44 @@ static inline int check_new_page(struct page *page) static inline bool free_pages_prezeroed(void) { - return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && - page_poisoning_enabled(); + return (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && + page_poisoning_enabled()) || want_init_on_free(); } #ifdef CONFIG_DEBUG_VM -static bool check_pcp_refill(struct page *page) +/* + * With DEBUG_VM enabled, order-0 pages are checked for expected state when + * being allocated from pcp lists. With debug_pagealloc also enabled, they are + * also checked when pcp lists are refilled from the free lists. + */ +static inline bool check_pcp_refill(struct page *page) { - return false; + if (debug_pagealloc_enabled()) + return check_new_page(page); + else + return false; } -static bool check_new_pcp(struct page *page) +static inline bool check_new_pcp(struct page *page) { return check_new_page(page); } #else -static bool check_pcp_refill(struct page *page) +/* + * With DEBUG_VM disabled, free order-0 pages are checked for expected state + * when pcp lists are being refilled from the free lists. With debug_pagealloc + * enabled, they are also checked when being allocated from the pcp lists. + */ +static inline bool check_pcp_refill(struct page *page) { return check_new_page(page); } -static bool check_new_pcp(struct page *page) +static inline bool check_new_pcp(struct page *page) { - return false; + if (debug_pagealloc_enabled()) + return check_new_page(page); + else + return false; } #endif /* CONFIG_DEBUG_VM */ @@ -2076,13 +2150,10 @@ inline void post_alloc_hook(struct page *page, unsigned int order, static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, unsigned int alloc_flags) { - int i; - post_alloc_hook(page, order, gfp_flags); - if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO)) - for (i = 0; i < (1 << order); i++) - clear_highpage(page + i); + if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags)) + kernel_init_free_pages(page, 1 << order); if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); @@ -7520,10 +7591,28 @@ static int page_alloc_cpu_dead(unsigned int cpu) return 0; } +#ifdef CONFIG_NUMA +int hashdist = HASHDIST_DEFAULT; + +static int __init set_hashdist(char *str) +{ + if (!str) + return 0; + hashdist = simple_strtoul(str, &str, 0); + return 1; +} +__setup("hashdist=", set_hashdist); +#endif + void __init page_alloc_init(void) { int ret; +#ifdef CONFIG_NUMA + if (num_node_state(N_MEMORY) == 1) + hashdist = 0; +#endif + ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD, "mm/page_alloc:dead", NULL, page_alloc_cpu_dead); @@ -7908,19 +7997,6 @@ out: return ret; } -#ifdef CONFIG_NUMA -int hashdist = HASHDIST_DEFAULT; - -static int __init set_hashdist(char *str) -{ - if (!str) - return 0; - hashdist = simple_strtoul(str, &str, 0); - return 1; -} -__setup("hashdist=", set_hashdist); -#endif - #ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES /* * Returns the number of pages that arch has reserved but @@ -7967,6 +8043,7 @@ void *__init alloc_large_system_hash(const char *tablename, unsigned long log2qty, size; void *table = NULL; gfp_t gfp_flags; + bool virt; /* allow the kernel cmdline to have a say */ if (!numentries) { @@ -8023,6 +8100,7 @@ void *__init alloc_large_system_hash(const char *tablename, gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; do { + virt = false; size = bucketsize << log2qty; if (flags & HASH_EARLY) { if (flags & HASH_ZERO) @@ -8030,26 +8108,26 @@ void *__init alloc_large_system_hash(const char *tablename, else table = memblock_alloc_raw(size, SMP_CACHE_BYTES); - } else if (hashdist) { + } else if (get_order(size) >= MAX_ORDER || hashdist) { table = __vmalloc(size, gfp_flags, PAGE_KERNEL); + virt = true; } else { /* * If bucketsize is not a power-of-two, we may free * some pages at the end of hash table which * alloc_pages_exact() automatically does */ - if (get_order(size) < MAX_ORDER) { - table = alloc_pages_exact(size, gfp_flags); - kmemleak_alloc(table, size, 1, gfp_flags); - } + table = alloc_pages_exact(size, gfp_flags); + kmemleak_alloc(table, size, 1, gfp_flags); } } while (!table && size > PAGE_SIZE && --log2qty); if (!table) panic("Failed to allocate %s hash table\n", tablename); - pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n", - tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size); + pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", + tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, + virt ? "vmalloc" : "linear"); if (_hash_shift) *_hash_shift = log2qty; |