summaryrefslogtreecommitdiff
path: root/libbcachefs/alloc_background.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/alloc_background.c')
-rw-r--r--libbcachefs/alloc_background.c138
1 files changed, 79 insertions, 59 deletions
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 955caa21..6de6e263 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -272,12 +272,19 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
size_t b, struct btree_iter *iter,
u64 *journal_seq, unsigned flags)
{
+#if 0
__BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
+#else
+ /* hack: */
+ __BKEY_PADDED(k, 8) alloc_key;
+#endif
struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k);
struct bucket *g;
struct bucket_mark m;
int ret;
+ BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
+
a->k.p = POS(ca->dev_idx, b);
percpu_down_read_preempt_disable(&c->mark_lock);
@@ -339,12 +346,14 @@ err:
return ret;
}
-int bch2_alloc_write(struct bch_fs *c)
+int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
{
struct bch_dev *ca;
unsigned i;
int ret = 0;
+ *wrote = false;
+
for_each_rw_member(ca, c, i) {
struct btree_iter iter;
struct bucket_array *buckets;
@@ -362,9 +371,14 @@ int bch2_alloc_write(struct bch_fs *c)
if (!buckets->b[b].mark.dirty)
continue;
- ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL, 0);
+ ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL,
+ nowait
+ ? BTREE_INSERT_NOWAIT
+ : 0);
if (ret)
break;
+
+ *wrote = true;
}
up_read(&ca->bucket_lock);
bch2_btree_iter_unlock(&iter);
@@ -1262,20 +1276,23 @@ static void flush_held_btree_writes(struct bch_fs *c)
struct bucket_table *tbl;
struct rhash_head *pos;
struct btree *b;
- bool flush_updates;
- size_t i, nr_pending_updates;
+ bool nodes_blocked;
+ size_t i;
+ struct closure cl;
+
+ closure_init_stack(&cl);
clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
again:
pr_debug("flushing dirty btree nodes");
cond_resched();
+ closure_wait(&c->btree_interior_update_wait, &cl);
- flush_updates = false;
- nr_pending_updates = bch2_btree_interior_updates_nr_pending(c);
+ nodes_blocked = false;
rcu_read_lock();
for_each_cached_btree(b, c, tbl, i, pos)
- if (btree_node_dirty(b) && (!b->written || b->level)) {
+ if (btree_node_need_write(b)) {
if (btree_node_may_write(b)) {
rcu_read_unlock();
btree_node_lock_type(c, b, SIX_LOCK_read);
@@ -1283,7 +1300,7 @@ again:
six_unlock_read(&b->lock);
goto again;
} else {
- flush_updates = true;
+ nodes_blocked = true;
}
}
rcu_read_unlock();
@@ -1291,17 +1308,16 @@ again:
if (c->btree_roots_dirty)
bch2_journal_meta(&c->journal);
- /*
- * This is ugly, but it's needed to flush btree node writes
- * without spinning...
- */
- if (flush_updates) {
- closure_wait_event(&c->btree_interior_update_wait,
- bch2_btree_interior_updates_nr_pending(c) <
- nr_pending_updates);
+ if (nodes_blocked) {
+ closure_sync(&cl);
goto again;
}
+ closure_wake_up(&c->btree_interior_update_wait);
+ closure_sync(&cl);
+
+ closure_wait_event(&c->btree_interior_update_wait,
+ !bch2_btree_interior_updates_nr_pending(c));
}
static void allocator_start_issue_discards(struct bch_fs *c)
@@ -1323,13 +1339,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
unsigned dev_iter;
u64 journal_seq = 0;
long bu;
- bool invalidating_data = false;
int ret = 0;
- if (test_alloc_startup(c)) {
- invalidating_data = true;
+ if (test_alloc_startup(c))
goto not_enough;
- }
/* Scan for buckets that are already invalidated: */
for_each_rw_member(ca, c, dev_iter) {
@@ -1376,21 +1389,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
not_enough:
pr_debug("not enough empty buckets; scanning for reclaimable buckets");
- for_each_rw_member(ca, c, dev_iter) {
- find_reclaimable_buckets(c, ca);
-
- while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
- (bu = next_alloc_bucket(ca)) >= 0) {
- invalidating_data |=
- bch2_invalidate_one_bucket(c, ca, bu, &journal_seq);
-
- fifo_push(&ca->free[RESERVE_BTREE], bu);
- bucket_set_dirty(ca, bu);
- }
- }
-
- pr_debug("done scanning for reclaimable buckets");
-
/*
* We're moving buckets to freelists _before_ they've been marked as
* invalidated on disk - we have to so that we can allocate new btree
@@ -1400,38 +1398,59 @@ not_enough:
* have cached data in them, which is live until they're marked as
* invalidated on disk:
*/
- if (invalidating_data) {
- pr_debug("invalidating existing data");
- set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
- } else {
- pr_debug("issuing discards");
- allocator_start_issue_discards(c);
- }
+ set_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
- /*
- * XXX: it's possible for this to deadlock waiting on journal reclaim,
- * since we're holding btree writes. What then?
- */
- ret = bch2_alloc_write(c);
- if (ret)
- return ret;
+ while (1) {
+ bool wrote = false;
- if (invalidating_data) {
- pr_debug("flushing journal");
+ for_each_rw_member(ca, c, dev_iter) {
+ find_reclaimable_buckets(c, ca);
- ret = bch2_journal_flush_seq(&c->journal, journal_seq);
- if (ret)
- return ret;
+ while (!fifo_full(&ca->free[RESERVE_BTREE]) &&
+ (bu = next_alloc_bucket(ca)) >= 0) {
+ bch2_invalidate_one_bucket(c, ca, bu,
+ &journal_seq);
+
+ fifo_push(&ca->free[RESERVE_BTREE], bu);
+ bucket_set_dirty(ca, bu);
+ }
+ }
+
+ pr_debug("done scanning for reclaimable buckets");
+
+ /*
+ * XXX: it's possible for this to deadlock waiting on journal reclaim,
+ * since we're holding btree writes. What then?
+ */
+ ret = bch2_alloc_write(c, true, &wrote);
- pr_debug("issuing discards");
- allocator_start_issue_discards(c);
+ /*
+ * If bch2_alloc_write() did anything, it may have used some
+ * buckets, and we need the RESERVE_BTREE freelist full - so we
+ * need to loop and scan again.
+ * And if it errored, it may have been because there weren't
+ * enough buckets, so just scan and loop again as long as it
+ * made some progress:
+ */
+ if (!wrote && ret)
+ return ret;
+ if (!wrote && !ret)
+ break;
}
+ pr_debug("flushing journal");
+
+ ret = bch2_journal_flush(&c->journal);
+ if (ret)
+ return ret;
+
+ pr_debug("issuing discards");
+ allocator_start_issue_discards(c);
+
set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
/* now flush dirty btree nodes: */
- if (invalidating_data)
- flush_held_btree_writes(c);
+ flush_held_btree_writes(c);
return 0;
}
@@ -1440,6 +1459,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned i;
+ bool wrote;
int ret;
down_read(&c->gc_lock);
@@ -1457,7 +1477,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
}
}
- return bch2_alloc_write(c);
+ return bch2_alloc_write(c, false, &wrote);
}
void bch2_fs_allocator_background_init(struct bch_fs *c)