summaryrefslogtreecommitdiff
path: root/libbcachefs/btree_update_interior.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/btree_update_interior.c')
-rw-r--r--libbcachefs/btree_update_interior.c415
1 files changed, 161 insertions, 254 deletions
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 19dfc32e..00144707 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -437,10 +437,6 @@ static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes,
goto err_free;
}
- ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
- if (ret)
- goto err_free;
-
as->prealloc_nodes[as->nr_prealloc_nodes++] = b;
}
@@ -458,6 +454,10 @@ static void bch2_btree_update_free(struct btree_update *as)
{
struct bch_fs *c = as->c;
+ if (as->took_gc_lock)
+ up_read(&c->gc_lock);
+ as->took_gc_lock = false;
+
bch2_journal_preres_put(&c->journal, &as->journal_preres);
bch2_journal_pin_drop(&c->journal, &as->journal);
@@ -893,24 +893,33 @@ void bch2_btree_update_done(struct btree_update *as)
{
BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
+ if (as->took_gc_lock)
+ up_read(&as->c->gc_lock);
+ as->took_gc_lock = false;
+
bch2_btree_reserve_put(as);
continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq);
}
struct btree_update *
-bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
- unsigned nr_nodes, unsigned flags,
- struct closure *cl)
+bch2_btree_update_start(struct btree_iter *iter, unsigned level,
+ unsigned nr_nodes, unsigned flags)
{
+ struct btree_trans *trans = iter->trans;
struct bch_fs *c = trans->c;
struct btree_update *as;
+ struct closure cl;
int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
? BCH_DISK_RESERVATION_NOFAIL : 0;
- int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
- ? JOURNAL_RES_GET_RECLAIM : 0;
+ int journal_flags = 0;
int ret = 0;
+ if (flags & BTREE_INSERT_JOURNAL_RESERVED)
+ journal_flags |= JOURNAL_RES_GET_RESERVED;
+
+ closure_init_stack(&cl);
+retry:
/*
* This check isn't necessary for correctness - it's just to potentially
* prevent us from doing a lot of work that'll end up being wasted:
@@ -919,12 +928,36 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
if (ret)
return ERR_PTR(ret);
+ /*
+ * XXX: figure out how far we might need to split,
+ * instead of locking/reserving all the way to the root:
+ */
+ if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
+ trace_trans_restart_iter_upgrade(trans->ip);
+ return ERR_PTR(-EINTR);
+ }
+
+ if (flags & BTREE_INSERT_GC_LOCK_HELD)
+ lockdep_assert_held(&c->gc_lock);
+ else if (!down_read_trylock(&c->gc_lock)) {
+ if (flags & BTREE_INSERT_NOUNLOCK)
+ return ERR_PTR(-EINTR);
+
+ bch2_trans_unlock(trans);
+ down_read(&c->gc_lock);
+ if (!bch2_trans_relock(trans)) {
+ up_read(&c->gc_lock);
+ return ERR_PTR(-EINTR);
+ }
+ }
+
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
memset(as, 0, sizeof(*as));
closure_init(&as->cl, NULL);
as->c = c;
as->mode = BTREE_INTERIOR_NO_UPDATE;
- as->btree_id = id;
+ as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD);
+ as->btree_id = iter->btree_id;
INIT_LIST_HEAD(&as->list);
INIT_LIST_HEAD(&as->unwritten_list);
INIT_LIST_HEAD(&as->write_blocked_list);
@@ -936,16 +969,25 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
BTREE_UPDATE_JOURNAL_RES,
journal_flags|JOURNAL_RES_GET_NONBLOCK);
if (ret == -EAGAIN) {
- if (flags & BTREE_INSERT_NOUNLOCK)
- return ERR_PTR(-EINTR);
+ /*
+ * this would be cleaner if bch2_journal_preres_get() took a
+ * closure argument
+ */
+ if (flags & BTREE_INSERT_NOUNLOCK) {
+ ret = -EINTR;
+ goto err;
+ }
bch2_trans_unlock(trans);
+ if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
+ goto err;
+
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
BTREE_UPDATE_JOURNAL_RES,
journal_flags);
if (ret)
- return ERR_PTR(ret);
+ goto err;
if (!bch2_trans_relock(trans)) {
ret = -EINTR;
@@ -960,7 +1002,8 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
if (ret)
goto err;
- ret = bch2_btree_reserve_get(as, nr_nodes, flags, cl);
+ ret = bch2_btree_reserve_get(as, nr_nodes, flags,
+ !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
if (ret)
goto err;
@@ -975,6 +1018,18 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
return as;
err:
bch2_btree_update_free(as);
+
+ if (ret == -EAGAIN) {
+ BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
+
+ bch2_trans_unlock(trans);
+ closure_sync(&cl);
+ ret = -EINTR;
+ }
+
+ if (ret == -EINTR && bch2_trans_relock(trans))
+ goto retry;
+
return ERR_PTR(ret);
}
@@ -1419,6 +1474,7 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
+ lockdep_assert_held(&c->gc_lock);
BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level));
BUG_ON(!b->c.level);
BUG_ON(!as || as->b);
@@ -1450,14 +1506,6 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b,
bch2_btree_node_unlock_write(b, iter);
btree_node_interior_verify(c, b);
-
- /*
- * when called from the btree_split path the new nodes aren't added to
- * the btree iterator yet, so the merge path's unlock/wait/relock dance
- * won't work:
- */
- bch2_foreground_maybe_merge(c, iter, b->c.level,
- flags|BTREE_INSERT_NOUNLOCK);
return;
split:
btree_split(as, b, iter, keys, flags);
@@ -1466,109 +1514,73 @@ split:
int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
unsigned flags)
{
- struct btree_trans *trans = iter->trans;
struct btree *b = iter_l(iter)->b;
struct btree_update *as;
- struct closure cl;
+ unsigned l;
int ret = 0;
- closure_init_stack(&cl);
-
- /* Hack, because gc and splitting nodes doesn't mix yet: */
- if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
- !down_read_trylock(&c->gc_lock)) {
- if (flags & BTREE_INSERT_NOUNLOCK) {
- trace_transaction_restart_ip(trans->ip, _THIS_IP_);
- return -EINTR;
- }
-
- bch2_trans_unlock(trans);
- down_read(&c->gc_lock);
-
- if (!bch2_trans_relock(trans))
- ret = -EINTR;
- }
-
- /*
- * XXX: figure out how far we might need to split,
- * instead of locking/reserving all the way to the root:
- */
- if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
- trace_trans_restart_iter_upgrade(trans->ip);
- ret = -EINTR;
- goto out;
- }
-
- as = bch2_btree_update_start(trans, iter->btree_id,
- btree_update_reserve_required(c, b), flags,
- !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
- if (IS_ERR(as)) {
- ret = PTR_ERR(as);
- if (ret == -EAGAIN) {
- BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
- bch2_trans_unlock(trans);
- ret = -EINTR;
-
- trace_transaction_restart_ip(trans->ip, _THIS_IP_);
- }
- goto out;
- }
+ as = bch2_btree_update_start(iter, iter->level,
+ btree_update_reserve_required(c, b), flags);
+ if (IS_ERR(as))
+ return PTR_ERR(as);
btree_split(as, b, iter, NULL, flags);
bch2_btree_update_done(as);
- /*
- * We haven't successfully inserted yet, so don't downgrade all the way
- * back to read locks;
- */
- __bch2_btree_iter_downgrade(iter, 1);
-out:
- if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
- up_read(&c->gc_lock);
- closure_sync(&cl);
+ for (l = iter->level + 1; btree_iter_node(iter, l) && !ret; l++)
+ ret = bch2_foreground_maybe_merge(c, iter, l, flags);
+
return ret;
}
-void __bch2_foreground_maybe_merge(struct bch_fs *c,
- struct btree_iter *iter,
- unsigned level,
- unsigned flags,
- enum btree_node_sibling sib)
+int __bch2_foreground_maybe_merge(struct bch_fs *c,
+ struct btree_iter *iter,
+ unsigned level,
+ unsigned flags,
+ enum btree_node_sibling sib)
{
struct btree_trans *trans = iter->trans;
+ struct btree_iter *sib_iter = NULL;
struct btree_update *as;
struct bkey_format_state new_s;
struct bkey_format new_f;
struct bkey_i delete;
struct btree *b, *m, *n, *prev, *next, *parent;
- struct closure cl;
+ struct bpos sib_pos;
size_t sib_u64s;
- int ret = 0;
+ int ret = 0, ret2 = 0;
BUG_ON(!btree_node_locked(iter, level));
-
- closure_init_stack(&cl);
retry:
+ ret = bch2_btree_iter_traverse(iter);
+ if (ret)
+ goto err;
+
BUG_ON(!btree_node_locked(iter, level));
b = iter->l[level].b;
- parent = btree_node_parent(iter, b);
- if (!parent)
+ if ((sib == btree_prev_sib && !bpos_cmp(b->data->min_key, POS_MIN)) ||
+ (sib == btree_next_sib && !bpos_cmp(b->data->max_key, POS_MAX))) {
+ b->sib_u64s[sib] = U16_MAX;
goto out;
+ }
- if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c))
- goto out;
+ sib_pos = sib == btree_prev_sib
+ ? bpos_predecessor(b->data->min_key)
+ : bpos_successor(b->data->max_key);
- /* XXX: can't be holding read locks */
- m = bch2_btree_node_get_sibling(c, iter, b, sib);
- if (IS_ERR(m)) {
- ret = PTR_ERR(m);
+ sib_iter = bch2_trans_get_node_iter(trans, iter->btree_id,
+ sib_pos, U8_MAX, level,
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(sib_iter);
+ if (ret)
goto err;
- }
- /* NULL means no sibling: */
- if (!m) {
+ m = sib_iter->l[level].b;
+
+ if (btree_node_parent(iter, b) !=
+ btree_node_parent(sib_iter, m)) {
b->sib_u64s[sib] = U16_MAX;
goto out;
}
@@ -1581,6 +1593,8 @@ retry:
next = m;
}
+ BUG_ON(bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key));
+
bch2_bkey_format_init(&new_s);
bch2_bkey_format_add_pos(&new_s, prev->data->min_key);
__bch2_btree_calc_format(&new_s, prev);
@@ -1598,33 +1612,21 @@ retry:
}
sib_u64s = min(sib_u64s, btree_max_u64s(c));
+ sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1);
b->sib_u64s[sib] = sib_u64s;
- if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) {
- six_unlock_intent(&m->c.lock);
+ if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
goto out;
- }
-
- /* We're changing btree topology, doesn't mix with gc: */
- if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
- !down_read_trylock(&c->gc_lock))
- goto err_cycle_gc_lock;
-
- if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
- ret = -EINTR;
- goto err_unlock;
- }
- as = bch2_btree_update_start(trans, iter->btree_id,
+ parent = btree_node_parent(iter, b);
+ as = bch2_btree_update_start(iter, level,
btree_update_reserve_required(c, parent) + 1,
flags|
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE,
- !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
- if (IS_ERR(as)) {
- ret = PTR_ERR(as);
- goto err_unlock;
- }
+ BTREE_INSERT_USE_RESERVE);
+ ret = PTR_ERR_OR_ZERO(as);
+ if (ret)
+ goto err;
trace_btree_merge(c, b);
@@ -1658,6 +1660,7 @@ retry:
bch2_btree_update_get_open_buckets(as, n);
six_lock_increment(&b->c.lock, SIX_LOCK_intent);
+ six_lock_increment(&m->c.lock, SIX_LOCK_intent);
bch2_btree_iter_node_drop(iter, b);
bch2_btree_iter_node_drop(iter, m);
@@ -1671,11 +1674,9 @@ retry:
six_unlock_intent(&n->c.lock);
bch2_btree_update_done(as);
-
- if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
- up_read(&c->gc_lock);
out:
bch2_btree_trans_verify_locks(trans);
+ bch2_trans_iter_free(trans, sib_iter);
/*
* Don't downgrade locks here: we're called after successful insert,
@@ -1686,58 +1687,56 @@ out:
* split path, and downgrading to read locks in there is potentially
* confusing:
*/
- closure_sync(&cl);
- return;
-
-err_cycle_gc_lock:
- six_unlock_intent(&m->c.lock);
-
- if (flags & BTREE_INSERT_NOUNLOCK)
- goto out;
-
- bch2_trans_unlock(trans);
-
- down_read(&c->gc_lock);
- up_read(&c->gc_lock);
- ret = -EINTR;
- goto err;
-
-err_unlock:
- six_unlock_intent(&m->c.lock);
- if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
- up_read(&c->gc_lock);
+ return ret ?: ret2;
err:
- BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
-
- if ((ret == -EAGAIN || ret == -EINTR) &&
- !(flags & BTREE_INSERT_NOUNLOCK)) {
- bch2_trans_unlock(trans);
- closure_sync(&cl);
- ret = bch2_btree_iter_traverse(iter);
- if (ret)
- goto out;
+ bch2_trans_iter_put(trans, sib_iter);
+ sib_iter = NULL;
+ if (ret == -EINTR && bch2_trans_relock(trans))
goto retry;
+
+ if (ret == -EINTR && !(flags & BTREE_INSERT_NOUNLOCK)) {
+ ret2 = ret;
+ ret = bch2_btree_iter_traverse_all(trans);
+ if (!ret)
+ goto retry;
}
goto out;
}
-static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
- struct btree *b, unsigned flags,
- struct closure *cl)
+/**
+ * bch_btree_node_rewrite - Rewrite/move a btree node
+ */
+int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
+ __le64 seq, unsigned flags)
{
- struct btree *n, *parent = btree_node_parent(iter, b);
+ struct btree *b, *n, *parent;
struct btree_update *as;
+ int ret;
+
+ flags |= BTREE_INSERT_NOFAIL;
+retry:
+ ret = bch2_btree_iter_traverse(iter);
+ if (ret)
+ goto out;
+
+ b = bch2_btree_iter_peek_node(iter);
+ if (!b || b->data->keys.seq != seq)
+ goto out;
- as = bch2_btree_update_start(iter->trans, iter->btree_id,
+ parent = btree_node_parent(iter, b);
+ as = bch2_btree_update_start(iter, b->c.level,
(parent
? btree_update_reserve_required(c, parent)
: 0) + 1,
- flags, cl);
- if (IS_ERR(as)) {
+ flags);
+ ret = PTR_ERR_OR_ZERO(as);
+ if (ret == -EINTR)
+ goto retry;
+ if (ret) {
trace_btree_gc_rewrite_node_fail(c, b);
- return PTR_ERR(as);
+ goto out;
}
bch2_btree_interior_update_will_free_node(as, b);
@@ -1768,60 +1767,8 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
six_unlock_intent(&n->c.lock);
bch2_btree_update_done(as);
- return 0;
-}
-
-/**
- * bch_btree_node_rewrite - Rewrite/move a btree node
- *
- * Returns 0 on success, -EINTR or -EAGAIN on failure (i.e.
- * btree_check_reserve() has to wait)
- */
-int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
- __le64 seq, unsigned flags)
-{
- struct btree_trans *trans = iter->trans;
- struct closure cl;
- struct btree *b;
- int ret;
-
- flags |= BTREE_INSERT_NOFAIL;
-
- closure_init_stack(&cl);
-
- bch2_btree_iter_upgrade(iter, U8_MAX);
-
- if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
- if (!down_read_trylock(&c->gc_lock)) {
- bch2_trans_unlock(trans);
- down_read(&c->gc_lock);
- }
- }
-
- while (1) {
- ret = bch2_btree_iter_traverse(iter);
- if (ret)
- break;
-
- b = bch2_btree_iter_peek_node(iter);
- if (!b || b->data->keys.seq != seq)
- break;
-
- ret = __btree_node_rewrite(c, iter, b, flags, &cl);
- if (ret != -EAGAIN &&
- ret != -EINTR)
- break;
-
- bch2_trans_unlock(trans);
- closure_sync(&cl);
- }
-
+out:
bch2_btree_iter_downgrade(iter);
-
- if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
- up_read(&c->gc_lock);
-
- closure_sync(&cl);
return ret;
}
@@ -1892,71 +1839,34 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
struct btree_update *as = NULL;
struct btree *new_hash = NULL;
struct closure cl;
- int ret;
+ int ret = 0;
closure_init_stack(&cl);
- if (!bch2_btree_iter_upgrade(iter, U8_MAX))
- return -EINTR;
-
- if (!down_read_trylock(&c->gc_lock)) {
- bch2_trans_unlock(iter->trans);
- down_read(&c->gc_lock);
-
- if (!bch2_trans_relock(iter->trans)) {
- ret = -EINTR;
- goto err;
- }
- }
-
/*
* check btree_ptr_hash_val() after @b is locked by
* btree_iter_traverse():
*/
if (btree_ptr_hash_val(new_key) != b->hash_val) {
- /* bch2_btree_reserve_get will unlock */
ret = bch2_btree_cache_cannibalize_lock(c, &cl);
if (ret) {
bch2_trans_unlock(iter->trans);
- up_read(&c->gc_lock);
closure_sync(&cl);
- down_read(&c->gc_lock);
-
- if (!bch2_trans_relock(iter->trans)) {
- ret = -EINTR;
- goto err;
- }
+ if (!bch2_trans_relock(iter->trans))
+ return -EINTR;
}
new_hash = bch2_btree_node_mem_alloc(c);
}
-retry:
- as = bch2_btree_update_start(iter->trans, iter->btree_id,
- parent ? btree_update_reserve_required(c, parent) : 0,
- BTREE_INSERT_NOFAIL, &cl);
+ as = bch2_btree_update_start(iter, b->c.level,
+ parent ? btree_update_reserve_required(c, parent) : 0,
+ BTREE_INSERT_NOFAIL);
if (IS_ERR(as)) {
ret = PTR_ERR(as);
- if (ret == -EAGAIN)
- ret = -EINTR;
-
- if (ret == -EINTR) {
- bch2_trans_unlock(iter->trans);
- up_read(&c->gc_lock);
- closure_sync(&cl);
- down_read(&c->gc_lock);
-
- if (bch2_trans_relock(iter->trans))
- goto retry;
- }
-
goto err;
}
- ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key));
- if (ret)
- goto err_free_update;
-
__bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key);
bch2_btree_iter_downgrade(iter);
@@ -1969,12 +1879,9 @@ err:
six_unlock_write(&new_hash->c.lock);
six_unlock_intent(&new_hash->c.lock);
}
- up_read(&c->gc_lock);
closure_sync(&cl);
+ bch2_btree_cache_cannibalize_unlock(c);
return ret;
-err_free_update:
- bch2_btree_update_free(as);
- goto err;
}
/* Init code: */