summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c96
-rw-r--r--mm/gup.c4
-rw-r--r--mm/internal.h27
-rw-r--r--mm/kasan/common.c2
-rw-r--r--mm/kasan/hw_tags.c66
-rw-r--r--mm/kasan/kasan.h42
-rw-r--r--mm/kasan/report.c22
-rw-r--r--mm/kasan/report_generic.c2
-rw-r--r--mm/mapping_dirty_helpers.c2
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/mmu_gather.c29
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/page_poison.c4
-rw-r--r--mm/percpu-internal.h2
-rw-r--r--mm/percpu-stats.c9
-rw-r--r--mm/percpu.c14
-rw-r--r--mm/ptdump.c2
-rw-r--r--mm/readahead.c101
-rw-r--r--mm/shuffle.c4
-rw-r--r--mm/slab.h6
-rw-r--r--mm/slub.c13
22 files changed, 379 insertions, 80 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 43700480d897..151090fdcf29 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1433,6 +1433,67 @@ void unlock_page(struct page *page)
EXPORT_SYMBOL(unlock_page);
/**
+ * end_page_private_2 - Clear PG_private_2 and release any waiters
+ * @page: The page
+ *
+ * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for
+ * this. The page ref held for PG_private_2 being set is released.
+ *
+ * This is, for example, used when a netfs page is being written to a local
+ * disk cache, thereby allowing writes to the cache for the same page to be
+ * serialised.
+ */
+void end_page_private_2(struct page *page)
+{
+ page = compound_head(page);
+ VM_BUG_ON_PAGE(!PagePrivate2(page), page);
+ clear_bit_unlock(PG_private_2, &page->flags);
+ wake_up_page_bit(page, PG_private_2);
+ put_page(page);
+}
+EXPORT_SYMBOL(end_page_private_2);
+
+/**
+ * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page
+ * @page: The page to wait on
+ *
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page.
+ */
+void wait_on_page_private_2(struct page *page)
+{
+ page = compound_head(page);
+ while (PagePrivate2(page))
+ wait_on_page_bit(page, PG_private_2);
+}
+EXPORT_SYMBOL(wait_on_page_private_2);
+
+/**
+ * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page
+ * @page: The page to wait on
+ *
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a
+ * fatal signal is received by the calling task.
+ *
+ * Return:
+ * - 0 if successful.
+ * - -EINTR if a fatal signal was encountered.
+ */
+int wait_on_page_private_2_killable(struct page *page)
+{
+ int ret = 0;
+
+ page = compound_head(page);
+ while (PagePrivate2(page)) {
+ ret = wait_on_page_bit_killable(page, PG_private_2);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(wait_on_page_private_2_killable);
+
+/**
* end_page_writeback - end writeback against a page
* @page: the page
*/
@@ -1969,8 +2030,14 @@ unlock:
put:
put_page(page);
next:
- if (!xa_is_value(page) && PageTransHuge(page))
- xas_set(&xas, page->index + thp_nr_pages(page));
+ if (!xa_is_value(page) && PageTransHuge(page)) {
+ unsigned int nr_pages = thp_nr_pages(page);
+
+ /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */
+ xas_set(&xas, page->index + nr_pages);
+ if (xas.xa_index < nr_pages)
+ break;
+ }
}
rcu_read_unlock();
@@ -2672,7 +2739,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
loff_t end, int whence)
{
XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);
- pgoff_t max = (end - 1) / PAGE_SIZE;
+ pgoff_t max = (end - 1) >> PAGE_SHIFT;
bool seek_data = (whence == SEEK_DATA);
struct page *page;
@@ -2681,7 +2748,8 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
rcu_read_lock();
while ((page = find_get_entry(&xas, max, XA_PRESENT))) {
- loff_t pos = xas.xa_index * PAGE_SIZE;
+ loff_t pos = (u64)xas.xa_index << PAGE_SHIFT;
+ unsigned int seek_size;
if (start < pos) {
if (!seek_data)
@@ -2689,25 +2757,25 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
start = pos;
}
- pos += seek_page_size(&xas, page);
+ seek_size = seek_page_size(&xas, page);
+ pos = round_up(pos + 1, seek_size);
start = page_seek_hole_data(&xas, mapping, page, start, pos,
seek_data);
if (start < pos)
goto unlock;
+ if (start >= end)
+ break;
+ if (seek_size > PAGE_SIZE)
+ xas_set(&xas, pos >> PAGE_SHIFT);
if (!xa_is_value(page))
put_page(page);
}
- rcu_read_unlock();
-
if (seek_data)
- return -ENXIO;
- goto out;
-
+ start = -ENXIO;
unlock:
rcu_read_unlock();
- if (!xa_is_value(page))
+ if (page && !xa_is_value(page))
put_page(page);
-out:
if (start > end)
return end;
return start;
@@ -2771,7 +2839,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
struct file *file = vmf->vma->vm_file;
struct file_ra_state *ra = &file->f_ra;
struct address_space *mapping = file->f_mapping;
- DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
+ DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);
struct file *fpin = NULL;
unsigned int mmap_miss;
@@ -2783,7 +2851,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
if (vmf->vma->vm_flags & VM_SEQ_READ) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- page_cache_sync_ra(&ractl, ra, ra->ra_pages);
+ page_cache_sync_ra(&ractl, ra->ra_pages);
return fpin;
}
diff --git a/mm/gup.c b/mm/gup.c
index e40579624f10..ef7d2da9f03f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1535,6 +1535,10 @@ struct page *get_dump_page(unsigned long addr)
FOLL_FORCE | FOLL_DUMP | FOLL_GET);
if (locked)
mmap_read_unlock(mm);
+
+ if (ret == 1 && is_page_poisoned(page))
+ return NULL;
+
return (ret == 1) ? page : NULL;
}
#endif /* CONFIG_ELF_CORE */
diff --git a/mm/internal.h b/mm/internal.h
index 1432feec62df..bbe900f9f095 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -51,13 +51,12 @@ void unmap_page_range(struct mmu_gather *tlb,
void do_page_cache_ra(struct readahead_control *, unsigned long nr_to_read,
unsigned long lookahead_size);
-void force_page_cache_ra(struct readahead_control *, struct file_ra_state *,
- unsigned long nr);
+void force_page_cache_ra(struct readahead_control *, unsigned long nr);
static inline void force_page_cache_readahead(struct address_space *mapping,
struct file *file, pgoff_t index, unsigned long nr_to_read)
{
- DEFINE_READAHEAD(ractl, file, mapping, index);
- force_page_cache_ra(&ractl, &file->f_ra, nr_to_read);
+ DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index);
+ force_page_cache_ra(&ractl, nr_to_read);
}
unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
@@ -97,6 +96,26 @@ static inline void set_page_refcounted(struct page *page)
set_page_count(page, 1);
}
+/*
+ * When kernel touch the user page, the user page may be have been marked
+ * poison but still mapped in user space, if without this page, the kernel
+ * can guarantee the data integrity and operation success, the kernel is
+ * better to check the posion status and avoid touching it, be good not to
+ * panic, coredump for process fatal signal is a sample case matching this
+ * scenario. Or if kernel can't guarantee the data integrity, it's better
+ * not to call this function, let kernel touch the poison page and get to
+ * panic.
+ */
+static inline bool is_page_poisoned(struct page *page)
+{
+ if (PageHWPoison(page))
+ return true;
+ else if (PageHuge(page) && PageHWPoison(compound_head(page)))
+ return true;
+
+ return false;
+}
+
extern unsigned long highest_memmap_pfn;
/*
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index b5e08d4cefec..7b53291dafa1 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -63,7 +63,7 @@ void __kasan_unpoison_range(const void *address, size_t size)
kasan_unpoison(address, size);
}
-#if CONFIG_KASAN_STACK
+#ifdef CONFIG_KASAN_STACK
/* Unpoison the entire stack for a task. */
void kasan_unpoison_task_stack(struct task_struct *task)
{
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 2aad21fda156..4004388b4e4b 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -25,6 +25,12 @@ enum kasan_arg {
KASAN_ARG_ON,
};
+enum kasan_arg_mode {
+ KASAN_ARG_MODE_DEFAULT,
+ KASAN_ARG_MODE_SYNC,
+ KASAN_ARG_MODE_ASYNC,
+};
+
enum kasan_arg_stacktrace {
KASAN_ARG_STACKTRACE_DEFAULT,
KASAN_ARG_STACKTRACE_OFF,
@@ -38,6 +44,7 @@ enum kasan_arg_fault {
};
static enum kasan_arg kasan_arg __ro_after_init;
+static enum kasan_arg_mode kasan_arg_mode __ro_after_init;
static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init;
static enum kasan_arg_fault kasan_arg_fault __ro_after_init;
@@ -45,6 +52,10 @@ static enum kasan_arg_fault kasan_arg_fault __ro_after_init;
DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled);
EXPORT_SYMBOL(kasan_flag_enabled);
+/* Whether the asynchronous mode is enabled. */
+bool kasan_flag_async __ro_after_init;
+EXPORT_SYMBOL_GPL(kasan_flag_async);
+
/* Whether to collect alloc/free stack traces. */
DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
@@ -68,6 +79,23 @@ static int __init early_kasan_flag(char *arg)
}
early_param("kasan", early_kasan_flag);
+/* kasan.mode=sync/async */
+static int __init early_kasan_mode(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ if (!strcmp(arg, "sync"))
+ kasan_arg_mode = KASAN_ARG_MODE_SYNC;
+ else if (!strcmp(arg, "async"))
+ kasan_arg_mode = KASAN_ARG_MODE_ASYNC;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+early_param("kasan.mode", early_kasan_mode);
+
/* kasan.stacktrace=off/on */
static int __init early_kasan_flag_stacktrace(char *arg)
{
@@ -115,7 +143,15 @@ void kasan_init_hw_tags_cpu(void)
return;
hw_init_tags(KASAN_TAG_MAX);
- hw_enable_tagging();
+
+ /*
+ * Enable async mode only when explicitly requested through
+ * the command line.
+ */
+ if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC)
+ hw_enable_tagging_async();
+ else
+ hw_enable_tagging_sync();
}
/* kasan_init_hw_tags() is called once on boot CPU. */
@@ -132,6 +168,22 @@ void __init kasan_init_hw_tags(void)
/* Enable KASAN. */
static_branch_enable(&kasan_flag_enabled);
+ switch (kasan_arg_mode) {
+ case KASAN_ARG_MODE_DEFAULT:
+ /*
+ * Default to sync mode.
+ * Do nothing, kasan_flag_async keeps its default value.
+ */
+ break;
+ case KASAN_ARG_MODE_SYNC:
+ /* Do nothing, kasan_flag_async keeps its default value. */
+ break;
+ case KASAN_ARG_MODE_ASYNC:
+ /* Async mode enabled. */
+ kasan_flag_async = true;
+ break;
+ }
+
switch (kasan_arg_stacktrace) {
case KASAN_ARG_STACKTRACE_DEFAULT:
/* Default to enabling stack trace collection. */
@@ -194,10 +246,16 @@ void kasan_set_tagging_report_once(bool state)
}
EXPORT_SYMBOL_GPL(kasan_set_tagging_report_once);
-void kasan_enable_tagging(void)
+void kasan_enable_tagging_sync(void)
+{
+ hw_enable_tagging_sync();
+}
+EXPORT_SYMBOL_GPL(kasan_enable_tagging_sync);
+
+void kasan_force_async_fault(void)
{
- hw_enable_tagging();
+ hw_force_async_tag_fault();
}
-EXPORT_SYMBOL_GPL(kasan_enable_tagging);
+EXPORT_SYMBOL_GPL(kasan_force_async_fault);
#endif
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 8c55634d6edd..c1581e8a9b8e 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -7,20 +7,37 @@
#include <linux/stackdepot.h>
#ifdef CONFIG_KASAN_HW_TAGS
+
#include <linux/static_key.h>
+
DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
+extern bool kasan_flag_async __ro_after_init;
+
static inline bool kasan_stack_collection_enabled(void)
{
return static_branch_unlikely(&kasan_flag_stacktrace);
}
+
+static inline bool kasan_async_mode_enabled(void)
+{
+ return kasan_flag_async;
+}
#else
+
static inline bool kasan_stack_collection_enabled(void)
{
return true;
}
+
+static inline bool kasan_async_mode_enabled(void)
+{
+ return false;
+}
+
#endif
extern bool kasan_flag_panic __ro_after_init;
+extern bool kasan_flag_async __ro_after_init;
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
#define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
@@ -231,7 +248,7 @@ void *kasan_find_first_bad_addr(void *addr, size_t size);
const char *kasan_get_bug_type(struct kasan_access_info *info);
void kasan_metadata_fetch_row(char *buffer, void *row);
-#if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK
+#if defined(CONFIG_KASAN_GENERIC) && defined(CONFIG_KASAN_STACK)
void kasan_print_address_stack_frame(const void *addr);
#else
static inline void kasan_print_address_stack_frame(const void *addr) { }
@@ -275,8 +292,11 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#ifdef CONFIG_KASAN_HW_TAGS
-#ifndef arch_enable_tagging
-#define arch_enable_tagging()
+#ifndef arch_enable_tagging_sync
+#define arch_enable_tagging_sync()
+#endif
+#ifndef arch_enable_tagging_async
+#define arch_enable_tagging_async()
#endif
#ifndef arch_init_tags
#define arch_init_tags(max_tag)
@@ -284,6 +304,9 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#ifndef arch_set_tagging_report_once
#define arch_set_tagging_report_once(state)
#endif
+#ifndef arch_force_async_tag_fault
+#define arch_force_async_tag_fault()
+#endif
#ifndef arch_get_random_tag
#define arch_get_random_tag() (0xFF)
#endif
@@ -294,16 +317,19 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#define arch_set_mem_tag_range(addr, size, tag) ((void *)(addr))
#endif
-#define hw_enable_tagging() arch_enable_tagging()
+#define hw_enable_tagging_sync() arch_enable_tagging_sync()
+#define hw_enable_tagging_async() arch_enable_tagging_async()
#define hw_init_tags(max_tag) arch_init_tags(max_tag)
#define hw_set_tagging_report_once(state) arch_set_tagging_report_once(state)
+#define hw_force_async_tag_fault() arch_force_async_tag_fault()
#define hw_get_random_tag() arch_get_random_tag()
#define hw_get_mem_tag(addr) arch_get_mem_tag(addr)
#define hw_set_mem_tag_range(addr, size, tag) arch_set_mem_tag_range((addr), (size), (tag))
#else /* CONFIG_KASAN_HW_TAGS */
-#define hw_enable_tagging()
+#define hw_enable_tagging_sync()
+#define hw_enable_tagging_async()
#define hw_set_tagging_report_once(state)
#endif /* CONFIG_KASAN_HW_TAGS */
@@ -311,12 +337,14 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
#if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
void kasan_set_tagging_report_once(bool state);
-void kasan_enable_tagging(void);
+void kasan_enable_tagging_sync(void);
+void kasan_force_async_fault(void);
#else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
static inline void kasan_set_tagging_report_once(bool state) { }
-static inline void kasan_enable_tagging(void) { }
+static inline void kasan_enable_tagging_sync(void) { }
+static inline void kasan_force_async_fault(void) { }
#endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 87b271206163..14bd51ea2348 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -87,7 +87,8 @@ static void start_report(unsigned long *flags)
static void end_report(unsigned long *flags, unsigned long addr)
{
- trace_error_report_end(ERROR_DETECTOR_KASAN, addr);
+ if (!kasan_async_mode_enabled())
+ trace_error_report_end(ERROR_DETECTOR_KASAN, addr);
pr_err("==================================================================\n");
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
spin_unlock_irqrestore(&report_lock, *flags);
@@ -360,6 +361,25 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
end_report(&flags, (unsigned long)object);
}
+#ifdef CONFIG_KASAN_HW_TAGS
+void kasan_report_async(void)
+{
+ unsigned long flags;
+
+#if IS_ENABLED(CONFIG_KUNIT)
+ if (current->kunit_test)
+ kasan_update_kunit_status(current->kunit_test);
+#endif /* IS_ENABLED(CONFIG_KUNIT) */
+
+ start_report(&flags);
+ pr_err("BUG: KASAN: invalid-access\n");
+ pr_err("Asynchronous mode enabled: no access details available\n");
+ pr_err("\n");
+ dump_stack();
+ end_report(&flags, 0);
+}
+#endif /* CONFIG_KASAN_HW_TAGS */
+
static void __kasan_report(unsigned long addr, size_t size, bool is_write,
unsigned long ip)
{
diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c
index 41f374585144..de732bc341c5 100644
--- a/mm/kasan/report_generic.c
+++ b/mm/kasan/report_generic.c
@@ -128,7 +128,7 @@ void kasan_metadata_fetch_row(char *buffer, void *row)
memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW);
}
-#if CONFIG_KASAN_STACK
+#ifdef CONFIG_KASAN_STACK
static bool __must_check tokenize_frame_descr(const char **frame_descr,
char *token, size_t max_tok_len,
unsigned long *value)
diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c
index b59054ef2e10..b890854ec761 100644
--- a/mm/mapping_dirty_helpers.c
+++ b/mm/mapping_dirty_helpers.c
@@ -165,10 +165,12 @@ static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end,
return 0;
}
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
/* Huge pud */
walk->action = ACTION_CONTINUE;
if (pud_trans_huge(pudval) || pud_devmap(pudval))
WARN_ON(pud_write(pudval) || pud_dirty(pudval));
+#endif
return 0;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 3f287599a7a3..1d96a21acb2f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -93,6 +93,12 @@ static void unmap_region(struct mm_struct *mm,
* MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
* w: (no) no w: (no) no w: (copy) copy w: (no) no
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
+ *
+ * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
+ * MAP_PRIVATE (with Enhanced PAN supported):
+ * r: (no) no
+ * w: (no) no
+ * x: (yes) yes
*/
pgprot_t protection_map[16] __ro_after_init = {
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 0dc7149b0c61..1b9837419bf9 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -249,16 +249,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
tlb_flush_mmu_free(tlb);
}
-/**
- * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
- * @tlb: the mmu_gather structure to initialize
- * @mm: the mm_struct of the target address space
- * @fullmm: @mm is without users and we're going to destroy the full address
- * space (exit/execve)
- *
- * Called to initialize an (on-stack) mmu_gather structure for page-table
- * tear-down from @mm.
- */
static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
bool fullmm)
{
@@ -283,11 +273,30 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
inc_tlb_flush_pending(tlb->mm);
}
+/**
+ * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
+ * @tlb: the mmu_gather structure to initialize
+ * @mm: the mm_struct of the target address space
+ *
+ * Called to initialize an (on-stack) mmu_gather structure for page-table
+ * tear-down from @mm.
+ */
void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
{
__tlb_gather_mmu(tlb, mm, false);
}
+/**
+ * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down
+ * @tlb: the mmu_gather structure to initialize
+ * @mm: the mm_struct of the target address space
+ *
+ * In this case, @mm is without users and we're going to destroy the
+ * full address space (exit/execve).
+ *
+ * Called to initialize an (on-stack) mmu_gather structure for page-table
+ * tear-down from @mm.
+ */
void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
{
__tlb_gather_mmu(tlb, mm, true);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 9efaf430cfd3..fa1cf18bac97 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -170,7 +170,7 @@ static bool oom_unkillable_task(struct task_struct *p)
return false;
}
-/**
+/*
* Check whether unreclaimable slab amount is greater than
* all user memory(LRU pages).
* dump_unreclaimable_slab() could help in the case that
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cfc72873961d..e2f19bf948db 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -167,10 +167,10 @@ unsigned long totalcma_pages __read_mostly;
int percpu_pagelist_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
-DEFINE_STATIC_KEY_FALSE(init_on_alloc);
+DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
EXPORT_SYMBOL(init_on_alloc);
-DEFINE_STATIC_KEY_FALSE(init_on_free);
+DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
EXPORT_SYMBOL(init_on_free);
static bool _init_on_alloc_enabled_early __read_mostly
diff --git a/mm/page_poison.c b/mm/page_poison.c
index 65cdf844c8ad..655dc5895604 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -77,12 +77,14 @@ static void unpoison_page(struct page *page)
void *addr;
addr = kmap_atomic(page);
+ kasan_disable_current();
/*
* Page poisoning when enabled poisons each and every page
* that is freed to buddy. Thus no extra check is done to
* see if a page was poisoned.
*/
- check_poison_mem(addr, PAGE_SIZE);
+ check_poison_mem(kasan_reset_tag(addr), PAGE_SIZE);
+ kasan_enable_current();
kunmap_atomic(addr);
}
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 18b768ac7dca..095d7eaa0db4 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -87,7 +87,7 @@ extern spinlock_t pcpu_lock;
extern struct list_head *pcpu_chunk_lists;
extern int pcpu_nr_slots;
-extern int pcpu_nr_empty_pop_pages;
+extern int pcpu_nr_empty_pop_pages[];
extern struct pcpu_chunk *pcpu_first_chunk;
extern struct pcpu_chunk *pcpu_reserved_chunk;
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index c8400a2adbc2..f6026dbcdf6b 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -145,6 +145,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
int slot, max_nr_alloc;
int *buffer;
enum pcpu_chunk_type type;
+ int nr_empty_pop_pages;
alloc_buffer:
spin_lock_irq(&pcpu_lock);
@@ -165,7 +166,11 @@ alloc_buffer:
goto alloc_buffer;
}
-#define PL(X) \
+ nr_empty_pop_pages = 0;
+ for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
+ nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type];
+
+#define PL(X) \
seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
seq_printf(m,
@@ -196,7 +201,7 @@ alloc_buffer:
PU(nr_max_chunks);
PU(min_alloc_size);
PU(max_alloc_size);
- P("empty_pop_pages", pcpu_nr_empty_pop_pages);
+ P("empty_pop_pages", nr_empty_pop_pages);
seq_putc(m, '\n');
#undef PU
diff --git a/mm/percpu.c b/mm/percpu.c
index 6596a0a4286e..23308113a5ff 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -173,10 +173,10 @@ struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */
static LIST_HEAD(pcpu_map_extend_chunks);
/*
- * The number of empty populated pages, protected by pcpu_lock. The
- * reserved chunk doesn't contribute to the count.
+ * The number of empty populated pages by chunk type, protected by pcpu_lock.
+ * The reserved chunk doesn't contribute to the count.
*/
-int pcpu_nr_empty_pop_pages;
+int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
/*
* The number of populated pages in use by the allocator, protected by
@@ -556,7 +556,7 @@ static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
{
chunk->nr_empty_pop_pages += nr;
if (chunk != pcpu_reserved_chunk)
- pcpu_nr_empty_pop_pages += nr;
+ pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr;
}
/*
@@ -1832,7 +1832,7 @@ area_found:
mutex_unlock(&pcpu_alloc_mutex);
}
- if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
+ if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_LOW)
pcpu_schedule_balance_work();
/* clear the areas and return address relative to base address */
@@ -2000,7 +2000,7 @@ retry_pop:
pcpu_atomic_alloc_failed = false;
} else {
nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
- pcpu_nr_empty_pop_pages,
+ pcpu_nr_empty_pop_pages[type],
0, PCPU_EMPTY_POP_PAGES_HIGH);
}
@@ -2580,7 +2580,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
/* link the first chunk in */
pcpu_first_chunk = chunk;
- pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
+ pcpu_nr_empty_pop_pages[PCPU_CHUNK_ROOT] = pcpu_first_chunk->nr_empty_pop_pages;
pcpu_chunk_relocate(pcpu_first_chunk, -1);
/* include all regions of the first chunk */
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 4354c1422d57..da751448d0e4 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -111,7 +111,7 @@ static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
struct ptdump_state *st = walk->private;
- pte_t val = READ_ONCE(*pte);
+ pte_t val = ptep_get(pte);
if (st->effective_prot)
st->effective_prot(st, 4, pte_val(val));
diff --git a/mm/readahead.c b/mm/readahead.c
index c5b0457415be..d589f147f4c2 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -198,8 +198,6 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
for (i = 0; i < nr_to_read; i++) {
struct page *page = xa_load(&mapping->i_pages, index + i);
- BUG_ON(index + i != ractl->_index + ractl->_nr_pages);
-
if (page && !xa_is_value(page)) {
/*
* Page already present? Kick off the current batch
@@ -210,6 +208,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
* not worth getting one just for that.
*/
read_pages(ractl, &page_pool, true);
+ i = ractl->_index + ractl->_nr_pages - index - 1;
continue;
}
@@ -223,6 +222,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
gfp_mask) < 0) {
put_page(page);
read_pages(ractl, &page_pool, true);
+ i = ractl->_index + ractl->_nr_pages - index - 1;
continue;
}
if (i == nr_to_read - lookahead_size)
@@ -272,9 +272,10 @@ void do_page_cache_ra(struct readahead_control *ractl,
* memory at once.
*/
void force_page_cache_ra(struct readahead_control *ractl,
- struct file_ra_state *ra, unsigned long nr_to_read)
+ unsigned long nr_to_read)
{
struct address_space *mapping = ractl->mapping;
+ struct file_ra_state *ra = ractl->ra;
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
unsigned long max_pages, index;
@@ -433,10 +434,10 @@ static int try_context_readahead(struct address_space *mapping,
* A minimal readahead algorithm for trivial sequential/random reads.
*/
static void ondemand_readahead(struct readahead_control *ractl,
- struct file_ra_state *ra, bool hit_readahead_marker,
- unsigned long req_size)
+ bool hit_readahead_marker, unsigned long req_size)
{
struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
+ struct file_ra_state *ra = ractl->ra;
unsigned long max_pages = ra->ra_pages;
unsigned long add_pages;
unsigned long index = readahead_index(ractl);
@@ -550,7 +551,7 @@ readit:
}
void page_cache_sync_ra(struct readahead_control *ractl,
- struct file_ra_state *ra, unsigned long req_count)
+ unsigned long req_count)
{
bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
@@ -560,7 +561,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
* read-ahead will do the right thing and limit the read to just the
* requested range, which we'll set to 1 page for this case.
*/
- if (!ra->ra_pages || blk_cgroup_congested()) {
+ if (!ractl->ra->ra_pages || blk_cgroup_congested()) {
if (!ractl->file)
return;
req_count = 1;
@@ -569,21 +570,20 @@ void page_cache_sync_ra(struct readahead_control *ractl,
/* be dumb */
if (do_forced_ra) {
- force_page_cache_ra(ractl, ra, req_count);
+ force_page_cache_ra(ractl, req_count);
return;
}
/* do read-ahead */
- ondemand_readahead(ractl, ra, false, req_count);
+ ondemand_readahead(ractl, false, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_sync_ra);
void page_cache_async_ra(struct readahead_control *ractl,
- struct file_ra_state *ra, struct page *page,
- unsigned long req_count)
+ struct page *page, unsigned long req_count)
{
/* no read-ahead */
- if (!ra->ra_pages)
+ if (!ractl->ra->ra_pages)
return;
/*
@@ -604,7 +604,7 @@ void page_cache_async_ra(struct readahead_control *ractl,
return;
/* do read-ahead */
- ondemand_readahead(ractl, ra, true, req_count);
+ ondemand_readahead(ractl, true, req_count);
}
EXPORT_SYMBOL_GPL(page_cache_async_ra);
@@ -638,3 +638,78 @@ SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
{
return ksys_readahead(fd, offset, count);
}
+
+/**
+ * readahead_expand - Expand a readahead request
+ * @ractl: The request to be expanded
+ * @new_start: The revised start
+ * @new_len: The revised size of the request
+ *
+ * Attempt to expand a readahead request outwards from the current size to the
+ * specified size by inserting locked pages before and after the current window
+ * to increase the size to the new window. This may involve the insertion of
+ * THPs, in which case the window may get expanded even beyond what was
+ * requested.
+ *
+ * The algorithm will stop if it encounters a conflicting page already in the
+ * pagecache and leave a smaller expansion than requested.
+ *
+ * The caller must check for this by examining the revised @ractl object for a
+ * different expansion than was requested.
+ */
+void readahead_expand(struct readahead_control *ractl,
+ loff_t new_start, size_t new_len)
+{
+ struct address_space *mapping = ractl->mapping;
+ struct file_ra_state *ra = ractl->ra;
+ pgoff_t new_index, new_nr_pages;
+ gfp_t gfp_mask = readahead_gfp_mask(mapping);
+
+ new_index = new_start / PAGE_SIZE;
+
+ /* Expand the leading edge downwards */
+ while (ractl->_index > new_index) {
+ unsigned long index = ractl->_index - 1;
+ struct page *page = xa_load(&mapping->i_pages, index);
+
+ if (page && !xa_is_value(page))
+ return; /* Page apparently present */
+
+ page = __page_cache_alloc(gfp_mask);
+ if (!page)
+ return;
+ if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
+ put_page(page);
+ return;
+ }
+
+ ractl->_nr_pages++;
+ ractl->_index = page->index;
+ }
+
+ new_len += new_start - readahead_pos(ractl);
+ new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE);
+
+ /* Expand the trailing edge upwards */
+ while (ractl->_nr_pages < new_nr_pages) {
+ unsigned long index = ractl->_index + ractl->_nr_pages;
+ struct page *page = xa_load(&mapping->i_pages, index);
+
+ if (page && !xa_is_value(page))
+ return; /* Page apparently present */
+
+ page = __page_cache_alloc(gfp_mask);
+ if (!page)
+ return;
+ if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
+ put_page(page);
+ return;
+ }
+ ractl->_nr_pages++;
+ if (ra) {
+ ra->size++;
+ ra->async_size++;
+ }
+ }
+}
+EXPORT_SYMBOL(readahead_expand);
diff --git a/mm/shuffle.c b/mm/shuffle.c
index 9c2e145a747a..c13c33b247e8 100644
--- a/mm/shuffle.c
+++ b/mm/shuffle.c
@@ -147,8 +147,8 @@ void __meminit __shuffle_zone(struct zone *z)
spin_unlock_irqrestore(&z->lock, flags);
}
-/**
- * shuffle_free_memory - reduce the predictability of the page allocator
+/*
+ * __shuffle_free_memory - reduce the predictability of the page allocator
* @pgdat: node page data
*/
void __meminit __shuffle_free_memory(pg_data_t *pgdat)
diff --git a/mm/slab.h b/mm/slab.h
index 120b1d0dfb6d..c30ed35b3d5d 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -601,7 +601,8 @@ static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { }
static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
{
- if (static_branch_unlikely(&init_on_alloc)) {
+ if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
+ &init_on_alloc)) {
if (c->ctor)
return false;
if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
@@ -613,7 +614,8 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
static inline bool slab_want_init_on_free(struct kmem_cache *c)
{
- if (static_branch_unlikely(&init_on_free))
+ if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
+ &init_on_free))
return !(c->ctor ||
(c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)));
return false;
diff --git a/mm/slub.c b/mm/slub.c
index cc4dd18c3548..722f95e1ea0b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -624,7 +624,7 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time)
if (!t->addr)
return;
- pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
+ pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
#ifdef CONFIG_STACKTRACE
{
@@ -650,8 +650,9 @@ void print_tracking(struct kmem_cache *s, void *object)
static void print_page_info(struct page *page)
{
- pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
- page, page->objects, page->inuse, page->freelist, page->flags);
+ pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%#lx(%pGp)\n",
+ page, page->objects, page->inuse, page->freelist,
+ page->flags, &page->flags);
}
@@ -706,7 +707,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
print_page_info(page);
- pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
+ pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
p, p - addr, get_freepointer(s, p));
if (s->flags & SLAB_RED_ZONE)
@@ -799,7 +800,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
end--;
slab_bug(s, "%s overwritten", what);
- pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
+ pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
fault, end - 1, fault - addr,
fault[0], value);
print_trailer(s, page, object);
@@ -3898,7 +3899,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
for_each_object(p, s, addr, page->objects) {
if (!test_bit(__obj_to_index(s, addr, p), map)) {
- pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
+ pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
print_tracking(s, p);
}
}