diff options
Diffstat (limited to 'libbcachefs/btree_update_interior.c')
-rw-r--r-- | libbcachefs/btree_update_interior.c | 185 |
1 files changed, 104 insertions, 81 deletions
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 92e19c4e..3e13f784 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -223,8 +223,7 @@ found: mutex_unlock(&c->btree_interior_update_lock); } -static void __btree_node_free(struct bch_fs *c, struct btree *b, - struct btree_iter *iter) +static void __btree_node_free(struct bch_fs *c, struct btree *b) { trace_btree_node_free(c, b); @@ -237,21 +236,11 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b, clear_btree_node_noevict(b); - btree_node_lock_type(c, b, SIX_LOCK_write); - bch2_btree_node_hash_remove(&c->btree_cache, b); mutex_lock(&c->btree_cache.lock); list_move(&b->list, &c->btree_cache.freeable); mutex_unlock(&c->btree_cache.lock); - - /* - * By using six_unlock_write() directly instead of - * bch2_btree_node_unlock_write(), we don't update the iterator's - * sequence numbers and cause future bch2_btree_node_relock() calls to - * fail: - */ - six_unlock_write(&b->lock); } void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b) @@ -264,7 +253,9 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b) clear_btree_node_dirty(b); - __btree_node_free(c, b, NULL); + btree_node_lock_type(c, b, SIX_LOCK_write); + __btree_node_free(c, b); + six_unlock_write(&b->lock); bch2_open_bucket_put_refs(c, &ob.nr, ob.refs); } @@ -283,9 +274,9 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, */ btree_update_drop_new_node(c, b); - bch2_btree_iter_node_drop_linked(iter, b); - - __btree_node_free(c, b, iter); + __bch2_btree_node_lock_write(b, iter); + __btree_node_free(c, b); + six_unlock_write(&b->lock); bch2_btree_iter_node_drop(iter, b); } @@ -499,7 +490,9 @@ static void bch2_btree_reserve_put(struct bch_fs *c, struct btree_reserve *reser bch2_btree_open_bucket_put(c, b); } - __btree_node_free(c, b, NULL); + btree_node_lock_type(c, b, SIX_LOCK_write); + __btree_node_free(c, b); + six_unlock_write(&b->lock); six_unlock_intent(&b->lock); } @@ -1362,7 +1355,8 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b, } static void btree_split(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys) + struct btree_iter *iter, struct keylist *keys, + unsigned flags) { struct bch_fs *c = as->c; struct btree *parent = btree_node_parent(iter, b); @@ -1425,7 +1419,7 @@ static void btree_split(struct btree_update *as, struct btree *b, if (parent) { /* Split a non root node */ - bch2_btree_insert_node(as, parent, iter, &as->parent_keys); + bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); } else if (n3) { bch2_btree_set_root(as, n3, iter); } else { @@ -1491,9 +1485,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, btree_update_updated_node(as, b); - for_each_linked_btree_node(iter, b, linked) + for_each_btree_iter_with_node(iter, b, linked) bch2_btree_node_iter_peek(&linked->l[b->level].iter, b); - bch2_btree_node_iter_peek(&iter->l[b->level].iter, b); bch2_btree_iter_verify(iter, b); } @@ -1511,7 +1504,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, * for leaf nodes -- inserts into interior nodes have to be atomic. */ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, - struct btree_iter *iter, struct keylist *keys) + struct btree_iter *iter, struct keylist *keys, + unsigned flags) { struct bch_fs *c = as->c; int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s); @@ -1551,14 +1545,14 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, btree_node_interior_verify(b); - bch2_foreground_maybe_merge(c, iter, b->level); + bch2_foreground_maybe_merge(c, iter, b->level, flags); return; split: - btree_split(as, b, iter, keys); + btree_split(as, b, iter, keys, flags); } int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, - unsigned btree_reserve_flags) + unsigned flags) { struct btree *b = iter->l[0].b; struct btree_update *as; @@ -1570,16 +1564,17 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, * We already have a disk reservation and open buckets pinned; this * allocation must not block: */ - for_each_linked_btree_iter(iter, linked) + for_each_btree_iter(iter, linked) if (linked->btree_id == BTREE_ID_EXTENTS) - btree_reserve_flags |= BTREE_INSERT_USE_RESERVE; - if (iter->btree_id == BTREE_ID_EXTENTS) - btree_reserve_flags |= BTREE_INSERT_USE_RESERVE; + flags |= BTREE_INSERT_USE_RESERVE; closure_init_stack(&cl); /* Hack, because gc and splitting nodes doesn't mix yet: */ if (!down_read_trylock(&c->gc_lock)) { + if (flags & BTREE_INSERT_NOUNLOCK) + return -EINTR; + bch2_btree_iter_unlock(iter); down_read(&c->gc_lock); @@ -1591,39 +1586,43 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, * XXX: figure out how far we might need to split, * instead of locking/reserving all the way to the root: */ - if (!bch2_btree_iter_set_locks_want(iter, U8_MAX)) { + if (!bch2_btree_iter_upgrade(iter, U8_MAX)) { ret = -EINTR; goto out; } as = bch2_btree_update_start(c, iter->btree_id, - btree_update_reserve_required(c, b), - btree_reserve_flags, &cl); + btree_update_reserve_required(c, b), flags, + !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); if (IS_ERR(as)) { ret = PTR_ERR(as); if (ret == -EAGAIN) { + BUG_ON(flags & BTREE_INSERT_NOUNLOCK); bch2_btree_iter_unlock(iter); - up_read(&c->gc_lock); - closure_sync(&cl); - return -EINTR; + ret = -EINTR; } goto out; } - btree_split(as, b, iter, NULL); + btree_split(as, b, iter, NULL, flags); bch2_btree_update_done(as); - bch2_btree_iter_set_locks_want(iter, 1); + /* + * We haven't successfully inserted yet, so don't downgrade all the way + * back to read locks; + */ + __bch2_btree_iter_downgrade(iter, 1); out: up_read(&c->gc_lock); closure_sync(&cl); return ret; } -int __bch2_foreground_maybe_merge(struct bch_fs *c, - struct btree_iter *iter, - unsigned level, - enum btree_node_sibling sib) +void __bch2_foreground_maybe_merge(struct bch_fs *c, + struct btree_iter *iter, + unsigned level, + unsigned flags, + enum btree_node_sibling sib) { struct btree_update *as; struct bkey_format_state new_s; @@ -1636,29 +1635,29 @@ int __bch2_foreground_maybe_merge(struct bch_fs *c, closure_init_stack(&cl); retry: - if (!bch2_btree_node_relock(iter, level)) - return 0; + BUG_ON(!btree_node_locked(iter, level)); b = iter->l[level].b; parent = btree_node_parent(iter, b); if (!parent) - return 0; + goto out; if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) - return 0; + goto out; /* XXX: can't be holding read locks */ - m = bch2_btree_node_get_sibling(c, iter, b, sib); + m = bch2_btree_node_get_sibling(c, iter, b, + !(flags & BTREE_INSERT_NOUNLOCK), sib); if (IS_ERR(m)) { ret = PTR_ERR(m); - goto out; + goto err; } /* NULL means no sibling: */ if (!m) { b->sib_u64s[sib] = U16_MAX; - return 0; + goto out; } if (sib == btree_prev_sib) { @@ -1688,33 +1687,26 @@ retry: if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) { six_unlock_intent(&m->lock); - return 0; + goto out; } /* We're changing btree topology, doesn't mix with gc: */ - if (!down_read_trylock(&c->gc_lock)) { - six_unlock_intent(&m->lock); - bch2_btree_iter_unlock(iter); + if (!down_read_trylock(&c->gc_lock)) + goto err_cycle_gc_lock; - down_read(&c->gc_lock); - up_read(&c->gc_lock); + if (!bch2_btree_iter_upgrade(iter, U8_MAX)) { ret = -EINTR; - goto out; - } - - if (!bch2_btree_iter_set_locks_want(iter, U8_MAX)) { - ret = -EINTR; - goto out_unlock; + goto err_unlock; } as = bch2_btree_update_start(c, iter->btree_id, btree_update_reserve_required(c, parent) + 1, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE, - &cl); + !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); if (IS_ERR(as)) { ret = PTR_ERR(as); - goto out_unlock; + goto err_unlock; } trace_btree_merge(c, b); @@ -1744,7 +1736,7 @@ retry: bch2_btree_node_write(c, n, SIX_LOCK_intent); - bch2_btree_insert_node(as, parent, iter, &as->parent_keys); + bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); bch2_btree_open_bucket_put(c, n); bch2_btree_node_free_inmem(c, b, iter); @@ -1754,26 +1746,53 @@ retry: bch2_btree_iter_verify(iter, n); bch2_btree_update_done(as); -out_unlock: - if (ret != -EINTR && ret != -EAGAIN) - bch2_btree_iter_set_locks_want(iter, 1); + six_unlock_intent(&m->lock); up_read(&c->gc_lock); out: - if (ret == -EAGAIN || ret == -EINTR) { - bch2_btree_iter_unlock(iter); - ret = -EINTR; - } - + /* + * Don't downgrade locks here: we're called after successful insert, + * and the caller will downgrade locks after a successful insert + * anyways (in case e.g. a split was required first) + * + * And we're also called when inserting into interior nodes in the + * split path, and downgrading to read locks in there is potentially + * confusing: + */ closure_sync(&cl); + return; + +err_cycle_gc_lock: + six_unlock_intent(&m->lock); + + if (flags & BTREE_INSERT_NOUNLOCK) + goto out; + + bch2_btree_iter_unlock(iter); + + down_read(&c->gc_lock); + up_read(&c->gc_lock); + ret = -EINTR; + goto err; + +err_unlock: + six_unlock_intent(&m->lock); + up_read(&c->gc_lock); +err: + BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK)); - if (ret == -EINTR) { + if ((ret == -EAGAIN || ret == -EINTR) && + !(flags & BTREE_INSERT_NOUNLOCK)) { + bch2_btree_iter_unlock(iter); + closure_sync(&cl); ret = bch2_btree_iter_traverse(iter); - if (!ret) - goto retry; + if (ret) + goto out; + + goto retry; } - return ret; + goto out; } static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, @@ -1806,7 +1825,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); - bch2_btree_insert_node(as, parent, iter, &as->parent_keys); + bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags); } else { bch2_btree_set_root(as, n, iter); } @@ -1815,7 +1834,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, bch2_btree_node_free_inmem(c, b, iter); - BUG_ON(!bch2_btree_iter_node_replace(iter, n)); + bch2_btree_iter_node_replace(iter, n); bch2_btree_update_done(as); return 0; @@ -1830,7 +1849,6 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, __le64 seq, unsigned flags) { - unsigned locks_want = iter->locks_want; struct closure cl; struct btree *b; int ret; @@ -1839,7 +1857,7 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, closure_init_stack(&cl); - bch2_btree_iter_set_locks_want(iter, U8_MAX); + bch2_btree_iter_upgrade(iter, U8_MAX); if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) { if (!down_read_trylock(&c->gc_lock)) { @@ -1866,7 +1884,7 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, closure_sync(&cl); } - bch2_btree_iter_set_locks_want(iter, locks_want); + bch2_btree_iter_downgrade(iter); if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) up_read(&c->gc_lock); @@ -1920,7 +1938,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, } bch2_keylist_add(&as->parent_keys, &new_key->k_i); - bch2_btree_insert_node(as, parent, iter, &as->parent_keys); + bch2_btree_insert_node(as, parent, iter, &as->parent_keys, 0); if (new_hash) { mutex_lock(&c->btree_cache.lock); @@ -1982,6 +2000,9 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, closure_init_stack(&cl); + if (!bch2_btree_iter_upgrade(iter, U8_MAX)) + return -EINTR; + if (!down_read_trylock(&c->gc_lock)) { bch2_btree_iter_unlock(iter); down_read(&c->gc_lock); @@ -2041,6 +2062,8 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, goto err_free_update; __bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key); + + bch2_btree_iter_downgrade(iter); err: if (new_hash) { mutex_lock(&c->btree_cache.lock); |