summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig16
-rw-r--r--mm/damon/core.c3
-rw-r--r--mm/damon/sysfs-schemes.c54
-rw-r--r--mm/damon/sysfs.c6
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/huge_memory.c16
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/kmemleak.c40
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/madvise.c11
-rw-r--r--mm/memcontrol.c5
-rw-r--r--mm/memory.c1
-rw-r--r--mm/memory_hotplug.c15
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/userfaultfd.c2
-rw-r--r--mm/util.c10
16 files changed, 138 insertions, 56 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 89971a894b60..57cd378c73d6 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1201,13 +1201,6 @@ config ANON_VMA_NAME
area from being merged with adjacent virtual memory areas due to the
difference in their name.
-config USERFAULTFD
- bool "Enable userfaultfd() system call"
- depends on MMU
- help
- Enable the userfaultfd() system call that allows to intercept and
- handle page faults in userland.
-
config HAVE_ARCH_USERFAULTFD_WP
bool
help
@@ -1218,6 +1211,14 @@ config HAVE_ARCH_USERFAULTFD_MINOR
help
Arch has userfaultfd minor fault support
+menuconfig USERFAULTFD
+ bool "Enable userfaultfd() system call"
+ depends on MMU
+ help
+ Enable the userfaultfd() system call that allows to intercept and
+ handle page faults in userland.
+
+if USERFAULTFD
config PTE_MARKER_UFFD_WP
bool "Userfaultfd write protection support for shmem/hugetlbfs"
default y
@@ -1227,6 +1228,7 @@ config PTE_MARKER_UFFD_WP
Allows to create marker PTEs for userfaultfd write protection
purposes. It is required to enable userfaultfd write protection on
file-backed memory types like shmem and hugetlbfs.
+endif # USERFAULTFD
# multi-gen LRU {
config LRU_GEN
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 630077d95dc6..ce1562783e7e 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -924,7 +924,7 @@ static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
matched = true;
break;
default:
- break;
+ return false;
}
return matched == filter->matching;
@@ -1225,6 +1225,7 @@ static void damon_split_region_at(struct damon_target *t,
new->age = r->age;
new->last_nr_accesses = r->last_nr_accesses;
new->nr_accesses_bp = r->nr_accesses_bp;
+ new->nr_accesses = r->nr_accesses;
damon_insert_region(new, r, damon_next_region(r), t);
}
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 45bd0fd4a8b1..fe0fe2562000 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -139,6 +139,13 @@ static const struct kobj_type damon_sysfs_scheme_region_ktype = {
* damon_sysfs_before_damos_apply() understands the situation by showing the
* 'finished' status and do nothing.
*
+ * If DAMOS is not applied to any region due to any reasons including the
+ * access pattern, the watermarks, the quotas, and the filters,
+ * ->before_damos_apply() will not be called back. Until the situation is
+ * changed, the update will not be finished. To avoid this,
+ * damon_sysfs_after_sampling() set the status as 'finished' if more than two
+ * apply intervals of the scheme is passed while the state is 'idle'.
+ *
* Finally, the tried regions request handling finisher function
* (damon_sysfs_schemes_update_regions_stop()) unregisters the callbacks.
*/
@@ -154,6 +161,7 @@ struct damon_sysfs_scheme_regions {
int nr_regions;
unsigned long total_bytes;
enum damos_sysfs_regions_upd_status upd_status;
+ unsigned long upd_timeout_jiffies;
};
static struct damon_sysfs_scheme_regions *
@@ -162,6 +170,9 @@ damon_sysfs_scheme_regions_alloc(void)
struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions),
GFP_KERNEL);
+ if (!regions)
+ return NULL;
+
regions->kobj = (struct kobject){};
INIT_LIST_HEAD(&regions->regions_list);
regions->nr_regions = 0;
@@ -1823,6 +1834,8 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx,
return 0;
region = damon_sysfs_scheme_region_alloc(r);
+ if (!region)
+ return 0;
list_add_tail(&region->list, &sysfs_regions->regions_list);
sysfs_regions->nr_regions++;
if (kobject_init_and_add(&region->kobj,
@@ -1849,7 +1862,9 @@ static int damon_sysfs_after_sampling(struct damon_ctx *ctx)
for (i = 0; i < sysfs_schemes->nr; i++) {
sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions;
if (sysfs_regions->upd_status ==
- DAMOS_TRIED_REGIONS_UPD_STARTED)
+ DAMOS_TRIED_REGIONS_UPD_STARTED ||
+ time_after(jiffies,
+ sysfs_regions->upd_timeout_jiffies))
sysfs_regions->upd_status =
DAMOS_TRIED_REGIONS_UPD_FINISHED;
}
@@ -1880,14 +1895,41 @@ int damon_sysfs_schemes_clear_regions(
return 0;
}
+static struct damos *damos_sysfs_nth_scheme(int n, struct damon_ctx *ctx)
+{
+ struct damos *scheme;
+ int i = 0;
+
+ damon_for_each_scheme(scheme, ctx) {
+ if (i == n)
+ return scheme;
+ i++;
+ }
+ return NULL;
+}
+
static void damos_tried_regions_init_upd_status(
- struct damon_sysfs_schemes *sysfs_schemes)
+ struct damon_sysfs_schemes *sysfs_schemes,
+ struct damon_ctx *ctx)
{
int i;
+ struct damos *scheme;
+ struct damon_sysfs_scheme_regions *sysfs_regions;
- for (i = 0; i < sysfs_schemes->nr; i++)
- sysfs_schemes->schemes_arr[i]->tried_regions->upd_status =
- DAMOS_TRIED_REGIONS_UPD_IDLE;
+ for (i = 0; i < sysfs_schemes->nr; i++) {
+ sysfs_regions = sysfs_schemes->schemes_arr[i]->tried_regions;
+ scheme = damos_sysfs_nth_scheme(i, ctx);
+ if (!scheme) {
+ sysfs_regions->upd_status =
+ DAMOS_TRIED_REGIONS_UPD_FINISHED;
+ continue;
+ }
+ sysfs_regions->upd_status = DAMOS_TRIED_REGIONS_UPD_IDLE;
+ sysfs_regions->upd_timeout_jiffies = jiffies +
+ 2 * usecs_to_jiffies(scheme->apply_interval_us ?
+ scheme->apply_interval_us :
+ ctx->attrs.sample_interval);
+ }
}
/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */
@@ -1897,7 +1939,7 @@ int damon_sysfs_schemes_update_regions_start(
{
damon_sysfs_schemes_clear_regions(sysfs_schemes, ctx);
damon_sysfs_schemes_for_damos_callback = sysfs_schemes;
- damos_tried_regions_init_upd_status(sysfs_schemes);
+ damos_tried_regions_init_upd_status(sysfs_schemes, ctx);
damos_regions_upd_total_bytes_only = total_bytes_only;
ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply;
ctx->callback.after_sampling = damon_sysfs_after_sampling;
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index e27846708b5a..7472404456aa 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1172,7 +1172,7 @@ static int damon_sysfs_update_target(struct damon_target *target,
struct damon_ctx *ctx,
struct damon_sysfs_target *sys_target)
{
- int err;
+ int err = 0;
if (damon_target_has_pid(ctx)) {
err = damon_sysfs_update_target_pid(target, sys_target->pid);
@@ -1203,8 +1203,10 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx,
damon_for_each_target_safe(t, next, ctx) {
if (i < sysfs_targets->nr) {
- damon_sysfs_update_target(t, ctx,
+ err = damon_sysfs_update_target(t, ctx,
sysfs_targets->targets_arr[i]);
+ if (err)
+ return err;
} else {
if (damon_target_has_pid(ctx))
put_pid(t->pid);
diff --git a/mm/filemap.c b/mm/filemap.c
index 9710f43a89ac..f1c8c278310f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3371,7 +3371,7 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio,
}
}
- if (pmd_none(*vmf->pmd))
+ if (pmd_none(*vmf->pmd) && vmf->prealloc_pte)
pmd_install(mm, vmf->pmd, &vmf->prealloc_pte);
return false;
@@ -3443,7 +3443,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
* handled in the specific fault path, and it'll prohibit the
* fault-around logic.
*/
- if (!pte_none(vmf->pte[count]))
+ if (!pte_none(ptep_get(&vmf->pte[count])))
goto skip;
count++;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f31f02472396..4f542444a91f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2769,13 +2769,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
int nr = folio_nr_pages(folio);
xas_split(&xas, folio, folio_order(folio));
- if (folio_test_swapbacked(folio)) {
- __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS,
- -nr);
- } else {
- __lruvec_stat_mod_folio(folio, NR_FILE_THPS,
- -nr);
- filemap_nr_thps_dec(mapping);
+ if (folio_test_pmd_mappable(folio)) {
+ if (folio_test_swapbacked(folio)) {
+ __lruvec_stat_mod_folio(folio,
+ NR_SHMEM_THPS, -nr);
+ } else {
+ __lruvec_stat_mod_folio(folio,
+ NR_FILE_THPS, -nr);
+ filemap_nr_thps_dec(mapping);
+ }
}
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1169ef2f2176..6feb3e0630d1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1182,6 +1182,13 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
return (get_vma_private_data(vma) & flag) != 0;
}
+bool __vma_private_lock(struct vm_area_struct *vma)
+{
+ return !(vma->vm_flags & VM_MAYSHARE) &&
+ get_vma_private_data(vma) & ~HPAGE_RESV_MASK &&
+ is_vma_resv_set(vma, HPAGE_RESV_OWNER);
+}
+
void hugetlb_dup_vma_private(struct vm_area_struct *vma)
{
VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 1eacca03bedd..5501363d6b31 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -642,32 +642,16 @@ static struct kmemleak_object *__alloc_object(gfp_t gfp)
if (!object) {
pr_warn("Cannot allocate a kmemleak_object structure\n");
kmemleak_disable();
+ return NULL;
}
- return object;
-}
-
-static int __link_object(struct kmemleak_object *object, unsigned long ptr,
- size_t size, int min_count, bool is_phys)
-{
-
- struct kmemleak_object *parent;
- struct rb_node **link, *rb_parent;
- unsigned long untagged_ptr;
- unsigned long untagged_objp;
-
INIT_LIST_HEAD(&object->object_list);
INIT_LIST_HEAD(&object->gray_list);
INIT_HLIST_HEAD(&object->area_list);
raw_spin_lock_init(&object->lock);
atomic_set(&object->use_count, 1);
- object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0);
- object->pointer = ptr;
- object->size = kfence_ksize((void *)ptr) ?: size;
object->excess_ref = 0;
- object->min_count = min_count;
object->count = 0; /* white color initially */
- object->jiffies = jiffies;
object->checksum = 0;
object->del_state = 0;
@@ -692,6 +676,24 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
/* kernel backtrace */
object->trace_handle = set_track_prepare();
+ return object;
+}
+
+static int __link_object(struct kmemleak_object *object, unsigned long ptr,
+ size_t size, int min_count, bool is_phys)
+{
+
+ struct kmemleak_object *parent;
+ struct rb_node **link, *rb_parent;
+ unsigned long untagged_ptr;
+ unsigned long untagged_objp;
+
+ object->flags = OBJECT_ALLOCATED | (is_phys ? OBJECT_PHYS : 0);
+ object->pointer = ptr;
+ object->size = kfence_ksize((void *)ptr) ?: size;
+ object->min_count = min_count;
+ object->jiffies = jiffies;
+
untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
/*
* Only update min_addr and max_addr with object
@@ -1150,6 +1152,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
void __ref kmemleak_update_trace(const void *ptr)
{
struct kmemleak_object *object;
+ depot_stack_handle_t trace_handle;
unsigned long flags;
pr_debug("%s(0x%px)\n", __func__, ptr);
@@ -1166,8 +1169,9 @@ void __ref kmemleak_update_trace(const void *ptr)
return;
}
+ trace_handle = set_track_prepare();
raw_spin_lock_irqsave(&object->lock, flags);
- object->trace_handle = set_track_prepare();
+ object->trace_handle = trace_handle;
raw_spin_unlock_irqrestore(&object->lock, flags);
put_object(object);
diff --git a/mm/ksm.c b/mm/ksm.c
index 7efcc68ccc6e..6a831009b4cb 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -468,7 +468,7 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
page = pfn_swap_entry_to_page(entry);
}
/* return 1 if the page is an normal ksm page or KSM-placed zero page */
- ret = (page && PageKsm(page)) || is_ksm_zero_pte(*pte);
+ ret = (page && PageKsm(page)) || is_ksm_zero_pte(ptent);
pte_unmap_unlock(pte, ptl);
return ret;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index cf4d694280e9..6214a1ab5654 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -335,6 +335,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
struct folio *folio = NULL;
LIST_HEAD(folio_list);
bool pageout_anon_only_filter;
+ unsigned int batch_count = 0;
if (fatal_signal_pending(current))
return -EINTR;
@@ -416,6 +417,7 @@ huge_unlock:
regular_folio:
#endif
tlb_change_page_size(tlb, PAGE_SIZE);
+restart:
start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (!start_pte)
return 0;
@@ -424,6 +426,15 @@ regular_folio:
for (; addr < end; pte++, addr += PAGE_SIZE) {
ptent = ptep_get(pte);
+ if (++batch_count == SWAP_CLUSTER_MAX) {
+ batch_count = 0;
+ if (need_resched()) {
+ pte_unmap_unlock(start_pte, ptl);
+ cond_resched();
+ goto restart;
+ }
+ }
+
if (pte_none(ptent))
continue;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 774bd6e21e27..b226090fd906 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2936,7 +2936,8 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
* Moreover, it should not come from DMA buffer and is not readily
* reclaimable. So those GFP bits should be masked off.
*/
-#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
+#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
+ __GFP_ACCOUNT | __GFP_NOFAIL)
/*
* mod_objcg_mlstate() may be called with irq enabled, so
@@ -3165,6 +3166,7 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
return NULL;
from_memcg:
+ objcg = NULL;
for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
/*
* Memcg pointer is protected by scope (see set_active_memcg())
@@ -3175,7 +3177,6 @@ from_memcg:
objcg = rcu_dereference_check(memcg->objcg, 1);
if (likely(objcg))
break;
- objcg = NULL;
}
return objcg;
diff --git a/mm/memory.c b/mm/memory.c
index 1f18ed4a5497..5c757fba8858 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1517,6 +1517,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
continue;
} else {
/* We should have covered all the swap entry types */
+ pr_alert("unrecognized swap entry 0x%lx\n", entry.val);
WARN_ON_ONCE(1);
}
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ab41a511e20a..7a5fc89a8652 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1129,6 +1129,9 @@ void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
}
+/*
+ * Must be called with mem_hotplug_lock in write mode.
+ */
int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group)
{
@@ -1149,7 +1152,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
!IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION)))
return -EINVAL;
- mem_hotplug_begin();
/* associate pfn range with the zone */
move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
@@ -1208,7 +1210,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
writeback_set_ratelimit();
memory_notify(MEM_ONLINE, &arg);
- mem_hotplug_done();
return 0;
failed_addition:
@@ -1217,7 +1218,6 @@ failed_addition:
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_ONLINE, &arg);
remove_pfn_range_from_zone(zone, pfn, nr_pages);
- mem_hotplug_done();
return ret;
}
@@ -1458,7 +1458,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
/* create memory block devices after memory was added */
ret = create_memory_block_devices(start, size, params.altmap, group);
if (ret) {
- arch_remove_memory(start, size, NULL);
+ arch_remove_memory(start, size, params.altmap);
goto error_free;
}
@@ -1863,6 +1863,9 @@ static int count_system_ram_pages_cb(unsigned long start_pfn,
return 0;
}
+/*
+ * Must be called with mem_hotplug_lock in write mode.
+ */
int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group)
{
@@ -1885,8 +1888,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
!IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION)))
return -EINVAL;
- mem_hotplug_begin();
-
/*
* Don't allow to offline memory blocks that contain holes.
* Consequently, memory blocks with holes can never get onlined
@@ -2031,7 +2032,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
memory_notify(MEM_OFFLINE, &arg);
remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
- mem_hotplug_done();
return 0;
failed_removal_isolated:
@@ -2046,7 +2046,6 @@ failed_removal:
(unsigned long long) start_pfn << PAGE_SHIFT,
((unsigned long long) end_pfn << PAGE_SHIFT) - 1,
reason);
- mem_hotplug_done();
return ret;
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 46f2f5d3d183..ee2fd6a6af40 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -3107,7 +3107,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
*/
void folio_wait_stable(struct folio *folio)
{
- if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
+ if (mapping_stable_writes(folio_mapping(folio)))
folio_wait_writeback(folio);
}
EXPORT_SYMBOL_GPL(folio_wait_stable);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 96d9eae5c7cc..0b6ca553bebe 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -312,7 +312,7 @@ static int mfill_atomic_pte_poison(pmd_t *dst_pmd,
ret = -EEXIST;
/* Refuse to overwrite any PTE, even a PTE marker (e.g. UFFD WP). */
- if (!pte_none(*dst_pte))
+ if (!pte_none(ptep_get(dst_pte)))
goto out_unlock;
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
diff --git a/mm/util.c b/mm/util.c
index aa01f6ea5a75..744b4d7e3fae 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -414,6 +414,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack)
static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
+#ifdef CONFIG_STACK_GROWSUP
+ /*
+ * For an upwards growing stack the calculation is much simpler.
+ * Memory for the maximum stack size is reserved at the top of the
+ * task. mmap_base starts directly below the stack and grows
+ * downwards.
+ */
+ return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd);
+#else
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_guard_gap;
@@ -431,6 +440,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
gap = MAX_GAP;
return PAGE_ALIGN(STACK_TOP - gap - rnd);
+#endif
}
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)