summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c165
1 files changed, 83 insertions, 82 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4f9c854ce6cc..198d623054c5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1744,17 +1744,17 @@ bool folio_isolate_lru(struct folio *folio)
* the LRU list will go small and be scanned faster than necessary, leading to
* unnecessary swapping, thrashing and OOM.
*/
-static int too_many_isolated(struct pglist_data *pgdat, int file,
+static bool too_many_isolated(struct pglist_data *pgdat, int file,
struct scan_control *sc)
{
unsigned long inactive, isolated;
bool too_many;
if (current_is_kswapd())
- return 0;
+ return false;
if (!writeback_throttling_sane(sc))
- return 0;
+ return false;
if (file) {
inactive = node_page_state(pgdat, NR_INACTIVE_FILE);
@@ -1998,7 +1998,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
LIST_HEAD(l_inactive);
unsigned nr_deactivate, nr_activate;
unsigned nr_rotated = 0;
- int file = is_file_lru(lru);
+ bool file = is_file_lru(lru);
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
lru_add_drain();
@@ -2412,7 +2412,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
denominator = ap + fp;
out:
for_each_evictable_lru(lru) {
- int file = is_file_lru(lru);
+ bool file = is_file_lru(lru);
unsigned long lruvec_size;
unsigned long low, min;
unsigned long scan;
@@ -2879,38 +2879,37 @@ static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk)
#endif
-static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
+static void reset_mm_stats(struct lru_gen_mm_walk *walk, bool last)
{
int i;
int hist;
+ struct lruvec *lruvec = walk->lruvec;
struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock);
- if (walk) {
- hist = lru_hist_from_seq(walk->max_seq);
+ hist = lru_hist_from_seq(walk->seq);
- for (i = 0; i < NR_MM_STATS; i++) {
- WRITE_ONCE(mm_state->stats[hist][i],
- mm_state->stats[hist][i] + walk->mm_stats[i]);
- walk->mm_stats[i] = 0;
- }
+ for (i = 0; i < NR_MM_STATS; i++) {
+ WRITE_ONCE(mm_state->stats[hist][i],
+ mm_state->stats[hist][i] + walk->mm_stats[i]);
+ walk->mm_stats[i] = 0;
}
if (NR_HIST_GENS > 1 && last) {
- hist = lru_hist_from_seq(mm_state->seq + 1);
+ hist = lru_hist_from_seq(walk->seq + 1);
for (i = 0; i < NR_MM_STATS; i++)
WRITE_ONCE(mm_state->stats[hist][i], 0);
}
}
-static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
- struct mm_struct **iter)
+static bool iterate_mm_list(struct lru_gen_mm_walk *walk, struct mm_struct **iter)
{
bool first = false;
bool last = false;
struct mm_struct *mm = NULL;
+ struct lruvec *lruvec = walk->lruvec;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
@@ -2927,9 +2926,9 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
*/
spin_lock(&mm_list->lock);
- VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq);
+ VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->seq);
- if (walk->max_seq <= mm_state->seq)
+ if (walk->seq <= mm_state->seq)
goto done;
if (!mm_state->head)
@@ -2954,12 +2953,12 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
} while (!(mm = get_next_mm(walk)));
done:
if (*iter || last)
- reset_mm_stats(lruvec, walk, last);
+ reset_mm_stats(walk, last);
spin_unlock(&mm_list->lock);
if (mm && first)
- reset_bloom_filter(mm_state, walk->max_seq + 1);
+ reset_bloom_filter(mm_state, walk->seq + 1);
if (*iter)
mmput_async(*iter);
@@ -2969,7 +2968,7 @@ done:
return last;
}
-static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
+static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long seq)
{
bool success = false;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
@@ -2978,13 +2977,12 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
spin_lock(&mm_list->lock);
- VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq);
+ VM_WARN_ON_ONCE(mm_state->seq + 1 < seq);
- if (max_seq > mm_state->seq) {
+ if (seq > mm_state->seq) {
mm_state->head = NULL;
mm_state->tail = NULL;
WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
- reset_mm_stats(lruvec, NULL, true);
success = true;
}
@@ -3159,9 +3157,10 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio,
walk->nr_pages[new_gen][type][zone] += delta;
}
-static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
+static void reset_batch_size(struct lru_gen_mm_walk *walk)
{
int gen, type, zone;
+ struct lruvec *lruvec = walk->lruvec;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
walk->batched = 0;
@@ -3331,7 +3330,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
struct lru_gen_mm_walk *walk = args->private;
struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
- int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
+ DEFINE_MAX_SEQ(walk->lruvec);
+ int old_gen, new_gen = lru_gen_from_seq(max_seq);
pte = pte_offset_map_nolock(args->mm, pmd, start & PMD_MASK, &ptl);
if (!pte)
@@ -3398,7 +3398,8 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
struct lru_gen_mm_walk *walk = args->private;
struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
- int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
+ DEFINE_MAX_SEQ(walk->lruvec);
+ int old_gen, new_gen = lru_gen_from_seq(max_seq);
VM_WARN_ON_ONCE(pud_leaf(*pud));
@@ -3529,7 +3530,7 @@ restart:
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
}
- if (!walk->force_scan && !test_bloom_filter(mm_state, walk->max_seq, pmd + i))
+ if (!walk->force_scan && !test_bloom_filter(mm_state, walk->seq, pmd + i))
continue;
walk->mm_stats[MM_NONLEAF_FOUND]++;
@@ -3540,7 +3541,7 @@ restart:
walk->mm_stats[MM_NONLEAF_ADDED]++;
/* carry over to the next generation */
- update_bloom_filter(mm_state, walk->max_seq + 1, pmd + i);
+ update_bloom_filter(mm_state, walk->seq + 1, pmd + i);
}
walk_pmd_range_locked(pud, -1, vma, args, bitmap, &first);
@@ -3591,7 +3592,7 @@ done:
return -EAGAIN;
}
-static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_mm_walk *walk)
+static void walk_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
{
static const struct mm_walk_ops mm_walk_ops = {
.test_walk = should_skip_vma,
@@ -3600,6 +3601,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
};
int err;
+ struct lruvec *lruvec = walk->lruvec;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
walk->next_addr = FIRST_USER_ADDRESS;
@@ -3610,7 +3612,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
err = -EBUSY;
/* another thread might have called inc_max_seq() */
- if (walk->max_seq != max_seq)
+ if (walk->seq != max_seq)
break;
/* folio_update_gen() requires stable folio_memcg() */
@@ -3628,7 +3630,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
if (walk->batched) {
spin_lock_irq(&lruvec->lru_lock);
- reset_batch_size(lruvec, walk);
+ reset_batch_size(walk);
spin_unlock_irq(&lruvec->lru_lock);
}
@@ -3747,7 +3749,7 @@ next:
return success;
}
-static bool inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq,
bool can_swap, bool force_scan)
{
bool success;
@@ -3755,14 +3757,14 @@ static bool inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
int type, zone;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
restart:
- if (max_seq < READ_ONCE(lrugen->max_seq))
+ if (seq < READ_ONCE(lrugen->max_seq))
return false;
spin_lock_irq(&lruvec->lru_lock);
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
- success = max_seq == lrugen->max_seq;
+ success = seq == lrugen->max_seq;
if (!success)
goto unlock;
@@ -3815,8 +3817,8 @@ unlock:
return success;
}
-static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
- struct scan_control *sc, bool can_swap, bool force_scan)
+static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long seq,
+ bool can_swap, bool force_scan)
{
bool success;
struct lru_gen_mm_walk *walk;
@@ -3824,13 +3826,13 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
- VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
+ VM_WARN_ON_ONCE(seq > READ_ONCE(lrugen->max_seq));
if (!mm_state)
- return inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+ return inc_max_seq(lruvec, seq, can_swap, force_scan);
/* see the comment in iterate_mm_list() */
- if (max_seq <= READ_ONCE(mm_state->seq))
+ if (seq <= READ_ONCE(mm_state->seq))
return false;
/*
@@ -3840,29 +3842,29 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
* is less efficient, but it avoids bursty page faults.
*/
if (!should_walk_mmu()) {
- success = iterate_mm_list_nowalk(lruvec, max_seq);
+ success = iterate_mm_list_nowalk(lruvec, seq);
goto done;
}
walk = set_mm_walk(NULL, true);
if (!walk) {
- success = iterate_mm_list_nowalk(lruvec, max_seq);
+ success = iterate_mm_list_nowalk(lruvec, seq);
goto done;
}
walk->lruvec = lruvec;
- walk->max_seq = max_seq;
+ walk->seq = seq;
walk->can_swap = can_swap;
walk->force_scan = force_scan;
do {
- success = iterate_mm_list(lruvec, walk, &mm);
+ success = iterate_mm_list(walk, &mm);
if (mm)
- walk_mm(lruvec, mm, walk);
+ walk_mm(mm, walk);
} while (mm);
done:
if (success) {
- success = inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+ success = inc_max_seq(lruvec, seq, can_swap, force_scan);
WARN_ON_ONCE(!success);
}
@@ -4287,7 +4289,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
{
bool success;
- /* swapping inhibited */
+ /* swap constrained */
if (!(sc->gfp_mask & __GFP_IO) &&
(folio_test_dirty(folio) ||
(folio_test_anon(folio) && !folio_test_swapcache(folio))))
@@ -4456,9 +4458,12 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw
DEFINE_MIN_SEQ(lruvec);
/*
- * Try to make the obvious choice first. When anon and file are both
- * available from the same generation, interpret swappiness 1 as file
- * first and 200 as anon first.
+ * Try to make the obvious choice first, and if anon and file are both
+ * available from the same generation,
+ * 1. Interpret swappiness 1 as file first and MAX_SWAPPINESS as anon
+ * first.
+ * 2. If !__GFP_IO, file first since clean pagecache is more likely to
+ * exist than clean swapcache.
*/
if (!swappiness)
type = LRU_GEN_FILE;
@@ -4468,6 +4473,8 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw
type = LRU_GEN_FILE;
else if (swappiness == 200)
type = LRU_GEN_ANON;
+ else if (!(sc->gfp_mask & __GFP_IO))
+ type = LRU_GEN_FILE;
else
type = get_type_to_scan(lruvec, swappiness, &tier);
@@ -4558,8 +4565,10 @@ retry:
move_folios_to_lru(lruvec, &list);
walk = current->reclaim_state->mm_walk;
- if (walk && walk->batched)
- reset_batch_size(lruvec, walk);
+ if (walk && walk->batched) {
+ walk->lruvec = lruvec;
+ reset_batch_size(walk);
+ }
item = PGSTEAL_KSWAPD + reclaimer_offset();
if (!cgroup_reclaim(sc))
@@ -4584,14 +4593,13 @@ retry:
}
static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
- struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
+ bool can_swap, unsigned long *nr_to_scan)
{
int gen, type, zone;
unsigned long old = 0;
unsigned long young = 0;
unsigned long total = 0;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
- struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MIN_SEQ(lruvec);
/* whether this lruvec is completely out of cold folios */
@@ -4619,13 +4627,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
}
}
- /* try to scrape all its memory if this memcg was deleted */
- if (!mem_cgroup_online(memcg)) {
- *nr_to_scan = total;
- return false;
- }
-
- *nr_to_scan = total >> sc->priority;
+ *nr_to_scan = total;
/*
* The aging tries to be lazy to reduce the overhead, while the eviction
@@ -4657,6 +4659,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
*/
static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap)
{
+ bool success;
unsigned long nr_to_scan;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec);
@@ -4664,15 +4667,18 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
return -1;
- if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
- return nr_to_scan;
+ success = should_run_aging(lruvec, max_seq, can_swap, &nr_to_scan);
- /* skip the aging path at the default priority */
- if (sc->priority == DEF_PRIORITY)
+ /* try to scrape all its memory if this memcg was deleted */
+ if (nr_to_scan && !mem_cgroup_online(memcg))
return nr_to_scan;
- /* skip this lruvec as it's low on cold folios */
- return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
+ /* try to get away with not aging at the default priority */
+ if (!success || sc->priority == DEF_PRIORITY)
+ return nr_to_scan >> sc->priority;
+
+ /* stop scanning this lruvec as it's low on cold folios */
+ return try_to_inc_max_seq(lruvec, max_seq, can_swap, false) ? -1 : 0;
}
static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
@@ -4712,10 +4718,6 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
unsigned long scanned = 0;
int swappiness = get_swappiness(lruvec, sc);
- /* clean file folios are more likely to exist */
- if (swappiness && !(sc->gfp_mask & __GFP_IO))
- swappiness = 1;
-
while (true) {
int delta;
@@ -4878,7 +4880,6 @@ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control
{
int priority;
unsigned long reclaimable;
- struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat);
if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
return;
@@ -4888,7 +4889,7 @@ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control
* where reclaimed_to_scanned_ratio = inactive / total.
*/
reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
- if (get_swappiness(lruvec, sc))
+ if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
/* round down reclaimable and round up sc->nr_to_reclaim */
@@ -5332,7 +5333,7 @@ static const struct seq_operations lru_gen_seq_ops = {
.show = lru_gen_seq_show,
};
-static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
+static int run_aging(struct lruvec *lruvec, unsigned long seq,
bool can_swap, bool force_scan)
{
DEFINE_MAX_SEQ(lruvec);
@@ -5347,7 +5348,7 @@ static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_contr
if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
return -ERANGE;
- try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
+ try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
return 0;
}
@@ -5415,7 +5416,7 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
switch (cmd) {
case '+':
- err = run_aging(lruvec, seq, sc, swappiness, opt);
+ err = run_aging(lruvec, seq, swappiness, opt);
break;
case '-':
err = run_eviction(lruvec, seq, sc, swappiness, opt);
@@ -6796,6 +6797,7 @@ restart:
bool raise_priority = true;
bool balanced;
bool ret;
+ bool was_frozen;
sc.reclaim_idx = highest_zoneidx;
@@ -6894,9 +6896,9 @@ restart:
/* Check if kswapd should be suspending */
__fs_reclaim_release(_THIS_IP_);
- ret = try_to_freeze();
+ ret = kthread_freezable_should_stop(&was_frozen);
__fs_reclaim_acquire(_THIS_IP_);
- if (ret || kthread_should_stop())
+ if (was_frozen || ret)
break;
/*
@@ -7102,7 +7104,7 @@ static int kswapd(void *p)
WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
atomic_set(&pgdat->nr_writeback_throttled, 0);
for ( ; ; ) {
- bool ret;
+ bool was_frozen;
alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
highest_zoneidx = kswapd_highest_zoneidx(pgdat,
@@ -7119,15 +7121,14 @@ kswapd_try_sleep:
WRITE_ONCE(pgdat->kswapd_order, 0);
WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
- ret = try_to_freeze();
- if (kthread_should_stop())
+ if (kthread_freezable_should_stop(&was_frozen))
break;
/*
* We can speed up thawing tasks if we don't call balance_pgdat
* after returning from the refrigerator
*/
- if (ret)
+ if (was_frozen)
continue;
/*