diff options
Diffstat (limited to 'libbcachefs/btree_update_interior.c')
-rw-r--r-- | libbcachefs/btree_update_interior.c | 415 |
1 files changed, 161 insertions, 254 deletions
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 19dfc32e..00144707 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -437,10 +437,6 @@ static int bch2_btree_reserve_get(struct btree_update *as, unsigned nr_nodes, goto err_free; } - ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); - if (ret) - goto err_free; - as->prealloc_nodes[as->nr_prealloc_nodes++] = b; } @@ -458,6 +454,10 @@ static void bch2_btree_update_free(struct btree_update *as) { struct bch_fs *c = as->c; + if (as->took_gc_lock) + up_read(&c->gc_lock); + as->took_gc_lock = false; + bch2_journal_preres_put(&c->journal, &as->journal_preres); bch2_journal_pin_drop(&c->journal, &as->journal); @@ -893,24 +893,33 @@ void bch2_btree_update_done(struct btree_update *as) { BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE); + if (as->took_gc_lock) + up_read(&as->c->gc_lock); + as->took_gc_lock = false; + bch2_btree_reserve_put(as); continue_at(&as->cl, btree_update_set_nodes_written, system_freezable_wq); } struct btree_update * -bch2_btree_update_start(struct btree_trans *trans, enum btree_id id, - unsigned nr_nodes, unsigned flags, - struct closure *cl) +bch2_btree_update_start(struct btree_iter *iter, unsigned level, + unsigned nr_nodes, unsigned flags) { + struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; struct btree_update *as; + struct closure cl; int disk_res_flags = (flags & BTREE_INSERT_NOFAIL) ? BCH_DISK_RESERVATION_NOFAIL : 0; - int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED) - ? JOURNAL_RES_GET_RECLAIM : 0; + int journal_flags = 0; int ret = 0; + if (flags & BTREE_INSERT_JOURNAL_RESERVED) + journal_flags |= JOURNAL_RES_GET_RESERVED; + + closure_init_stack(&cl); +retry: /* * This check isn't necessary for correctness - it's just to potentially * prevent us from doing a lot of work that'll end up being wasted: @@ -919,12 +928,36 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id, if (ret) return ERR_PTR(ret); + /* + * XXX: figure out how far we might need to split, + * instead of locking/reserving all the way to the root: + */ + if (!bch2_btree_iter_upgrade(iter, U8_MAX)) { + trace_trans_restart_iter_upgrade(trans->ip); + return ERR_PTR(-EINTR); + } + + if (flags & BTREE_INSERT_GC_LOCK_HELD) + lockdep_assert_held(&c->gc_lock); + else if (!down_read_trylock(&c->gc_lock)) { + if (flags & BTREE_INSERT_NOUNLOCK) + return ERR_PTR(-EINTR); + + bch2_trans_unlock(trans); + down_read(&c->gc_lock); + if (!bch2_trans_relock(trans)) { + up_read(&c->gc_lock); + return ERR_PTR(-EINTR); + } + } + as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO); memset(as, 0, sizeof(*as)); closure_init(&as->cl, NULL); as->c = c; as->mode = BTREE_INTERIOR_NO_UPDATE; - as->btree_id = id; + as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD); + as->btree_id = iter->btree_id; INIT_LIST_HEAD(&as->list); INIT_LIST_HEAD(&as->unwritten_list); INIT_LIST_HEAD(&as->write_blocked_list); @@ -936,16 +969,25 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id, BTREE_UPDATE_JOURNAL_RES, journal_flags|JOURNAL_RES_GET_NONBLOCK); if (ret == -EAGAIN) { - if (flags & BTREE_INSERT_NOUNLOCK) - return ERR_PTR(-EINTR); + /* + * this would be cleaner if bch2_journal_preres_get() took a + * closure argument + */ + if (flags & BTREE_INSERT_NOUNLOCK) { + ret = -EINTR; + goto err; + } bch2_trans_unlock(trans); + if (flags & BTREE_INSERT_JOURNAL_RECLAIM) + goto err; + ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, BTREE_UPDATE_JOURNAL_RES, journal_flags); if (ret) - return ERR_PTR(ret); + goto err; if (!bch2_trans_relock(trans)) { ret = -EINTR; @@ -960,7 +1002,8 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id, if (ret) goto err; - ret = bch2_btree_reserve_get(as, nr_nodes, flags, cl); + ret = bch2_btree_reserve_get(as, nr_nodes, flags, + !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); if (ret) goto err; @@ -975,6 +1018,18 @@ bch2_btree_update_start(struct btree_trans *trans, enum btree_id id, return as; err: bch2_btree_update_free(as); + + if (ret == -EAGAIN) { + BUG_ON(flags & BTREE_INSERT_NOUNLOCK); + + bch2_trans_unlock(trans); + closure_sync(&cl); + ret = -EINTR; + } + + if (ret == -EINTR && bch2_trans_relock(trans)) + goto retry; + return ERR_PTR(ret); } @@ -1419,6 +1474,7 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; + lockdep_assert_held(&c->gc_lock); BUG_ON(!btree_node_intent_locked(iter, btree_node_root(c, b)->c.level)); BUG_ON(!b->c.level); BUG_ON(!as || as->b); @@ -1450,14 +1506,6 @@ void bch2_btree_insert_node(struct btree_update *as, struct btree *b, bch2_btree_node_unlock_write(b, iter); btree_node_interior_verify(c, b); - - /* - * when called from the btree_split path the new nodes aren't added to - * the btree iterator yet, so the merge path's unlock/wait/relock dance - * won't work: - */ - bch2_foreground_maybe_merge(c, iter, b->c.level, - flags|BTREE_INSERT_NOUNLOCK); return; split: btree_split(as, b, iter, keys, flags); @@ -1466,109 +1514,73 @@ split: int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, unsigned flags) { - struct btree_trans *trans = iter->trans; struct btree *b = iter_l(iter)->b; struct btree_update *as; - struct closure cl; + unsigned l; int ret = 0; - closure_init_stack(&cl); - - /* Hack, because gc and splitting nodes doesn't mix yet: */ - if (!(flags & BTREE_INSERT_GC_LOCK_HELD) && - !down_read_trylock(&c->gc_lock)) { - if (flags & BTREE_INSERT_NOUNLOCK) { - trace_transaction_restart_ip(trans->ip, _THIS_IP_); - return -EINTR; - } - - bch2_trans_unlock(trans); - down_read(&c->gc_lock); - - if (!bch2_trans_relock(trans)) - ret = -EINTR; - } - - /* - * XXX: figure out how far we might need to split, - * instead of locking/reserving all the way to the root: - */ - if (!bch2_btree_iter_upgrade(iter, U8_MAX)) { - trace_trans_restart_iter_upgrade(trans->ip); - ret = -EINTR; - goto out; - } - - as = bch2_btree_update_start(trans, iter->btree_id, - btree_update_reserve_required(c, b), flags, - !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); - if (IS_ERR(as)) { - ret = PTR_ERR(as); - if (ret == -EAGAIN) { - BUG_ON(flags & BTREE_INSERT_NOUNLOCK); - bch2_trans_unlock(trans); - ret = -EINTR; - - trace_transaction_restart_ip(trans->ip, _THIS_IP_); - } - goto out; - } + as = bch2_btree_update_start(iter, iter->level, + btree_update_reserve_required(c, b), flags); + if (IS_ERR(as)) + return PTR_ERR(as); btree_split(as, b, iter, NULL, flags); bch2_btree_update_done(as); - /* - * We haven't successfully inserted yet, so don't downgrade all the way - * back to read locks; - */ - __bch2_btree_iter_downgrade(iter, 1); -out: - if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) - up_read(&c->gc_lock); - closure_sync(&cl); + for (l = iter->level + 1; btree_iter_node(iter, l) && !ret; l++) + ret = bch2_foreground_maybe_merge(c, iter, l, flags); + return ret; } -void __bch2_foreground_maybe_merge(struct bch_fs *c, - struct btree_iter *iter, - unsigned level, - unsigned flags, - enum btree_node_sibling sib) +int __bch2_foreground_maybe_merge(struct bch_fs *c, + struct btree_iter *iter, + unsigned level, + unsigned flags, + enum btree_node_sibling sib) { struct btree_trans *trans = iter->trans; + struct btree_iter *sib_iter = NULL; struct btree_update *as; struct bkey_format_state new_s; struct bkey_format new_f; struct bkey_i delete; struct btree *b, *m, *n, *prev, *next, *parent; - struct closure cl; + struct bpos sib_pos; size_t sib_u64s; - int ret = 0; + int ret = 0, ret2 = 0; BUG_ON(!btree_node_locked(iter, level)); - - closure_init_stack(&cl); retry: + ret = bch2_btree_iter_traverse(iter); + if (ret) + goto err; + BUG_ON(!btree_node_locked(iter, level)); b = iter->l[level].b; - parent = btree_node_parent(iter, b); - if (!parent) + if ((sib == btree_prev_sib && !bpos_cmp(b->data->min_key, POS_MIN)) || + (sib == btree_next_sib && !bpos_cmp(b->data->max_key, POS_MAX))) { + b->sib_u64s[sib] = U16_MAX; goto out; + } - if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) - goto out; + sib_pos = sib == btree_prev_sib + ? bpos_predecessor(b->data->min_key) + : bpos_successor(b->data->max_key); - /* XXX: can't be holding read locks */ - m = bch2_btree_node_get_sibling(c, iter, b, sib); - if (IS_ERR(m)) { - ret = PTR_ERR(m); + sib_iter = bch2_trans_get_node_iter(trans, iter->btree_id, + sib_pos, U8_MAX, level, + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(sib_iter); + if (ret) goto err; - } - /* NULL means no sibling: */ - if (!m) { + m = sib_iter->l[level].b; + + if (btree_node_parent(iter, b) != + btree_node_parent(sib_iter, m)) { b->sib_u64s[sib] = U16_MAX; goto out; } @@ -1581,6 +1593,8 @@ retry: next = m; } + BUG_ON(bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key)); + bch2_bkey_format_init(&new_s); bch2_bkey_format_add_pos(&new_s, prev->data->min_key); __bch2_btree_calc_format(&new_s, prev); @@ -1598,33 +1612,21 @@ retry: } sib_u64s = min(sib_u64s, btree_max_u64s(c)); + sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1); b->sib_u64s[sib] = sib_u64s; - if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) { - six_unlock_intent(&m->c.lock); + if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold) goto out; - } - - /* We're changing btree topology, doesn't mix with gc: */ - if (!(flags & BTREE_INSERT_GC_LOCK_HELD) && - !down_read_trylock(&c->gc_lock)) - goto err_cycle_gc_lock; - - if (!bch2_btree_iter_upgrade(iter, U8_MAX)) { - ret = -EINTR; - goto err_unlock; - } - as = bch2_btree_update_start(trans, iter->btree_id, + parent = btree_node_parent(iter, b); + as = bch2_btree_update_start(iter, level, btree_update_reserve_required(c, parent) + 1, flags| BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE, - !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); - if (IS_ERR(as)) { - ret = PTR_ERR(as); - goto err_unlock; - } + BTREE_INSERT_USE_RESERVE); + ret = PTR_ERR_OR_ZERO(as); + if (ret) + goto err; trace_btree_merge(c, b); @@ -1658,6 +1660,7 @@ retry: bch2_btree_update_get_open_buckets(as, n); six_lock_increment(&b->c.lock, SIX_LOCK_intent); + six_lock_increment(&m->c.lock, SIX_LOCK_intent); bch2_btree_iter_node_drop(iter, b); bch2_btree_iter_node_drop(iter, m); @@ -1671,11 +1674,9 @@ retry: six_unlock_intent(&n->c.lock); bch2_btree_update_done(as); - - if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) - up_read(&c->gc_lock); out: bch2_btree_trans_verify_locks(trans); + bch2_trans_iter_free(trans, sib_iter); /* * Don't downgrade locks here: we're called after successful insert, @@ -1686,58 +1687,56 @@ out: * split path, and downgrading to read locks in there is potentially * confusing: */ - closure_sync(&cl); - return; - -err_cycle_gc_lock: - six_unlock_intent(&m->c.lock); - - if (flags & BTREE_INSERT_NOUNLOCK) - goto out; - - bch2_trans_unlock(trans); - - down_read(&c->gc_lock); - up_read(&c->gc_lock); - ret = -EINTR; - goto err; - -err_unlock: - six_unlock_intent(&m->c.lock); - if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) - up_read(&c->gc_lock); + return ret ?: ret2; err: - BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK)); - - if ((ret == -EAGAIN || ret == -EINTR) && - !(flags & BTREE_INSERT_NOUNLOCK)) { - bch2_trans_unlock(trans); - closure_sync(&cl); - ret = bch2_btree_iter_traverse(iter); - if (ret) - goto out; + bch2_trans_iter_put(trans, sib_iter); + sib_iter = NULL; + if (ret == -EINTR && bch2_trans_relock(trans)) goto retry; + + if (ret == -EINTR && !(flags & BTREE_INSERT_NOUNLOCK)) { + ret2 = ret; + ret = bch2_btree_iter_traverse_all(trans); + if (!ret) + goto retry; } goto out; } -static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, - struct btree *b, unsigned flags, - struct closure *cl) +/** + * bch_btree_node_rewrite - Rewrite/move a btree node + */ +int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, + __le64 seq, unsigned flags) { - struct btree *n, *parent = btree_node_parent(iter, b); + struct btree *b, *n, *parent; struct btree_update *as; + int ret; + + flags |= BTREE_INSERT_NOFAIL; +retry: + ret = bch2_btree_iter_traverse(iter); + if (ret) + goto out; + + b = bch2_btree_iter_peek_node(iter); + if (!b || b->data->keys.seq != seq) + goto out; - as = bch2_btree_update_start(iter->trans, iter->btree_id, + parent = btree_node_parent(iter, b); + as = bch2_btree_update_start(iter, b->c.level, (parent ? btree_update_reserve_required(c, parent) : 0) + 1, - flags, cl); - if (IS_ERR(as)) { + flags); + ret = PTR_ERR_OR_ZERO(as); + if (ret == -EINTR) + goto retry; + if (ret) { trace_btree_gc_rewrite_node_fail(c, b); - return PTR_ERR(as); + goto out; } bch2_btree_interior_update_will_free_node(as, b); @@ -1768,60 +1767,8 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, six_unlock_intent(&n->c.lock); bch2_btree_update_done(as); - return 0; -} - -/** - * bch_btree_node_rewrite - Rewrite/move a btree node - * - * Returns 0 on success, -EINTR or -EAGAIN on failure (i.e. - * btree_check_reserve() has to wait) - */ -int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, - __le64 seq, unsigned flags) -{ - struct btree_trans *trans = iter->trans; - struct closure cl; - struct btree *b; - int ret; - - flags |= BTREE_INSERT_NOFAIL; - - closure_init_stack(&cl); - - bch2_btree_iter_upgrade(iter, U8_MAX); - - if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) { - if (!down_read_trylock(&c->gc_lock)) { - bch2_trans_unlock(trans); - down_read(&c->gc_lock); - } - } - - while (1) { - ret = bch2_btree_iter_traverse(iter); - if (ret) - break; - - b = bch2_btree_iter_peek_node(iter); - if (!b || b->data->keys.seq != seq) - break; - - ret = __btree_node_rewrite(c, iter, b, flags, &cl); - if (ret != -EAGAIN && - ret != -EINTR) - break; - - bch2_trans_unlock(trans); - closure_sync(&cl); - } - +out: bch2_btree_iter_downgrade(iter); - - if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) - up_read(&c->gc_lock); - - closure_sync(&cl); return ret; } @@ -1892,71 +1839,34 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, struct btree_update *as = NULL; struct btree *new_hash = NULL; struct closure cl; - int ret; + int ret = 0; closure_init_stack(&cl); - if (!bch2_btree_iter_upgrade(iter, U8_MAX)) - return -EINTR; - - if (!down_read_trylock(&c->gc_lock)) { - bch2_trans_unlock(iter->trans); - down_read(&c->gc_lock); - - if (!bch2_trans_relock(iter->trans)) { - ret = -EINTR; - goto err; - } - } - /* * check btree_ptr_hash_val() after @b is locked by * btree_iter_traverse(): */ if (btree_ptr_hash_val(new_key) != b->hash_val) { - /* bch2_btree_reserve_get will unlock */ ret = bch2_btree_cache_cannibalize_lock(c, &cl); if (ret) { bch2_trans_unlock(iter->trans); - up_read(&c->gc_lock); closure_sync(&cl); - down_read(&c->gc_lock); - - if (!bch2_trans_relock(iter->trans)) { - ret = -EINTR; - goto err; - } + if (!bch2_trans_relock(iter->trans)) + return -EINTR; } new_hash = bch2_btree_node_mem_alloc(c); } -retry: - as = bch2_btree_update_start(iter->trans, iter->btree_id, - parent ? btree_update_reserve_required(c, parent) : 0, - BTREE_INSERT_NOFAIL, &cl); + as = bch2_btree_update_start(iter, b->c.level, + parent ? btree_update_reserve_required(c, parent) : 0, + BTREE_INSERT_NOFAIL); if (IS_ERR(as)) { ret = PTR_ERR(as); - if (ret == -EAGAIN) - ret = -EINTR; - - if (ret == -EINTR) { - bch2_trans_unlock(iter->trans); - up_read(&c->gc_lock); - closure_sync(&cl); - down_read(&c->gc_lock); - - if (bch2_trans_relock(iter->trans)) - goto retry; - } - goto err; } - ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(new_key)); - if (ret) - goto err_free_update; - __bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key); bch2_btree_iter_downgrade(iter); @@ -1969,12 +1879,9 @@ err: six_unlock_write(&new_hash->c.lock); six_unlock_intent(&new_hash->c.lock); } - up_read(&c->gc_lock); closure_sync(&cl); + bch2_btree_cache_cannibalize_unlock(c); return ret; -err_free_update: - bch2_btree_update_free(as); - goto err; } /* Init code: */ |