diff options
Diffstat (limited to 'libbcachefs/journal_reclaim.c')
-rw-r--r-- | libbcachefs/journal_reclaim.c | 158 |
1 files changed, 129 insertions, 29 deletions
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c index 7a04d06b..66f5dcce 100644 --- a/libbcachefs/journal_reclaim.c +++ b/libbcachefs/journal_reclaim.c @@ -1,12 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "btree_key_cache.h" #include "journal.h" #include "journal_io.h" #include "journal_reclaim.h" #include "replicas.h" #include "super.h" +#include <linux/kthread.h> +#include <linux/sched/mm.h> +#include <trace/events/bcachefs.h> + /* Free space calculations: */ static unsigned journal_space_from(struct journal_device *ja, @@ -164,12 +169,12 @@ void bch2_journal_space_available(struct journal *j) j->can_discard = can_discard; if (nr_online < c->opts.metadata_replicas_required) { - ret = -EROFS; + ret = cur_entry_insufficient_devices; goto out; } if (!fifo_free(&j->pin)) { - ret = -ENOSPC; + ret = cur_entry_journal_pin_full; goto out; } @@ -180,7 +185,7 @@ void bch2_journal_space_available(struct journal *j) clean = __journal_space_available(j, nr_devs_want, journal_space_clean); if (!discarded.next_entry) - ret = -ENOSPC; + ret = cur_entry_journal_full; overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) * journal_entry_overhead(j); @@ -432,7 +437,6 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq) list_move(&ret->list, &pin_list->flushed); BUG_ON(j->flush_in_progress); j->flush_in_progress = ret; - j->last_flushed = jiffies; } spin_unlock(&j->lock); @@ -441,17 +445,24 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq) } /* returns true if we did work */ -static bool journal_flush_pins(struct journal *j, u64 seq_to_flush, - unsigned min_nr) +static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush, + unsigned min_nr) { struct journal_entry_pin *pin; - bool ret = false; - u64 seq; + u64 seq, ret = 0; lockdep_assert_held(&j->reclaim_lock); - while ((pin = journal_get_next_pin(j, min_nr - ? U64_MAX : seq_to_flush, &seq))) { + while (1) { + cond_resched(); + + j->last_flushed = jiffies; + + pin = journal_get_next_pin(j, min_nr + ? U64_MAX : seq_to_flush, &seq); + if (!pin) + break; + if (min_nr) min_nr--; @@ -460,7 +471,7 @@ static bool journal_flush_pins(struct journal *j, u64 seq_to_flush, BUG_ON(j->flush_in_progress != pin); j->flush_in_progress = NULL; wake_up(&j->pin_flush_wait); - ret = true; + ret++; } return ret; @@ -524,15 +535,27 @@ static u64 journal_seq_to_flush(struct journal *j) * 512 journal entries or 25% of all journal buckets, then * journal_next_bucket() should not stall. */ -void bch2_journal_reclaim(struct journal *j) +static void __bch2_journal_reclaim(struct journal *j, bool direct) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - unsigned min_nr = 0; - u64 seq_to_flush = 0; + bool kthread = (current->flags & PF_KTHREAD) != 0; + u64 seq_to_flush, nr_flushed = 0; + size_t min_nr; + unsigned flags; + /* + * We can't invoke memory reclaim while holding the reclaim_lock - + * journal reclaim is required to make progress for memory reclaim + * (cleaning the caches), so we can't get stuck in memory reclaim while + * we're holding the reclaim lock: + */ lockdep_assert_held(&j->reclaim_lock); + flags = memalloc_noreclaim_save(); do { + if (kthread && kthread_should_stop()) + break; + bch2_journal_do_discards(j); seq_to_flush = journal_seq_to_flush(j); @@ -549,26 +572,103 @@ void bch2_journal_reclaim(struct journal *j) if (j->prereserved.reserved * 2 > j->prereserved.remaining) min_nr = 1; - if ((atomic_read(&c->btree_cache.dirty) * 4 > - c->btree_cache.used * 3) || - (c->btree_key_cache.nr_dirty * 4 > - c->btree_key_cache.nr_keys)) + if (atomic_read(&c->btree_cache.dirty) * 4 > + c->btree_cache.used * 3) min_nr = 1; - } while (journal_flush_pins(j, seq_to_flush, min_nr)); - if (!bch2_journal_error(j)) - queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, - msecs_to_jiffies(j->reclaim_delay_ms)); + min_nr = max(min_nr, bch2_nr_btree_keys_need_flush(c)); + + trace_journal_reclaim_start(c, + min_nr, + j->prereserved.reserved, + j->prereserved.remaining, + atomic_read(&c->btree_cache.dirty), + c->btree_cache.used, + c->btree_key_cache.nr_dirty, + c->btree_key_cache.nr_keys); + + nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr); + + if (direct) + j->nr_direct_reclaim += nr_flushed; + else + j->nr_background_reclaim += nr_flushed; + trace_journal_reclaim_finish(c, nr_flushed); + } while (min_nr); + + memalloc_noreclaim_restore(flags); } -void bch2_journal_reclaim_work(struct work_struct *work) +void bch2_journal_reclaim(struct journal *j) { - struct journal *j = container_of(to_delayed_work(work), - struct journal, reclaim_work); + __bch2_journal_reclaim(j, true); +} - mutex_lock(&j->reclaim_lock); - bch2_journal_reclaim(j); - mutex_unlock(&j->reclaim_lock); +static int bch2_journal_reclaim_thread(void *arg) +{ + struct journal *j = arg; + unsigned long next; + + set_freezable(); + + kthread_wait_freezable(test_bit(JOURNAL_RECLAIM_STARTED, &j->flags)); + + while (!kthread_should_stop()) { + j->reclaim_kicked = false; + + mutex_lock(&j->reclaim_lock); + __bch2_journal_reclaim(j, false); + mutex_unlock(&j->reclaim_lock); + + next = j->last_flushed + msecs_to_jiffies(j->reclaim_delay_ms); + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + if (j->reclaim_kicked) + break; + if (time_after_eq(jiffies, next)) + break; + schedule_timeout(next - jiffies); + try_to_freeze(); + + } + __set_current_state(TASK_RUNNING); + } + + return 0; +} + +void bch2_journal_reclaim_stop(struct journal *j) +{ + struct task_struct *p = j->reclaim_thread; + + j->reclaim_thread = NULL; + + if (p) { + kthread_stop(p); + put_task_struct(p); + } +} + +int bch2_journal_reclaim_start(struct journal *j) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct task_struct *p; + + if (j->reclaim_thread) + return 0; + + p = kthread_create(bch2_journal_reclaim_thread, j, + "bch-reclaim/%s", c->name); + if (IS_ERR(p)) + return PTR_ERR(p); + + get_task_struct(p); + j->reclaim_thread = p; + wake_up_process(p); + return 0; } static int journal_flush_done(struct journal *j, u64 seq_to_flush, @@ -582,7 +682,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, mutex_lock(&j->reclaim_lock); - *did_work = journal_flush_pins(j, seq_to_flush, 0); + *did_work = journal_flush_pins(j, seq_to_flush, 0) != 0; spin_lock(&j->lock); /* |