diff options
Diffstat (limited to 'libbcachefs/btree_update_interior.c')
-rw-r--r-- | libbcachefs/btree_update_interior.c | 202 |
1 files changed, 136 insertions, 66 deletions
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index f6f2517d..75b70187 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -586,12 +586,12 @@ static void __bch2_btree_update_free(struct btree_update *as) bch2_journal_pin_drop(&c->journal, &as->journal); bch2_journal_pin_flush(&c->journal, &as->journal); - BUG_ON((as->nr_new_nodes || as->nr_pending) && - !bch2_journal_error(&c->journal));; + BUG_ON(as->nr_new_nodes || as->nr_pending); if (as->reserve) bch2_btree_reserve_put(c, as->reserve); + list_del(&as->unwritten_list); list_del(&as->list); closure_debug_destroy(&as->cl); @@ -609,37 +609,28 @@ static void bch2_btree_update_free(struct btree_update *as) mutex_unlock(&c->btree_interior_update_lock); } -static void btree_update_nodes_reachable(struct btree_update *as, u64 seq) +static inline bool six_trylock_intentwrite(struct six_lock *lock) { - struct bch_fs *c = as->c; - - while (as->nr_new_nodes) { - struct btree *b = as->new_nodes[--as->nr_new_nodes]; + if (!six_trylock_intent(lock)) + return false; - BUG_ON(b->will_make_reachable != (unsigned long) as); - b->will_make_reachable = 0; - - /* - * b->will_make_reachable prevented it from being written, so - * write it now if it needs to be written: - */ - btree_node_lock_type(c, b, SIX_LOCK_read); - bch2_btree_node_write_cond(c, b, btree_node_need_write(b)); - six_unlock_read(&b->lock); + if (!six_trylock_write(lock)) { + six_unlock_intent(lock); + return false; } - while (as->nr_pending) - bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending], - seq); + return true; } static void btree_update_nodes_written(struct closure *cl) { struct btree_update *as = container_of(cl, struct btree_update, cl); + struct btree *nodes_need_write[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES + 1]; + unsigned nr_nodes_need_write; struct journal_res res = { 0 }; struct bch_fs *c = as->c; + struct btree_root *r; struct btree *b; - struct bset *i; int ret; /* @@ -650,6 +641,7 @@ static void btree_update_nodes_written(struct closure *cl) mutex_lock(&c->btree_interior_update_lock); as->nodes_written = true; again: + nr_nodes_need_write = 0; as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, struct btree_update, unwritten_list); if (!as || !as->nodes_written) { @@ -658,31 +650,57 @@ again: } b = as->b; - if (b && !six_trylock_intent(&b->lock)) { + if (b && !six_trylock_intentwrite(&b->lock)) { mutex_unlock(&c->btree_interior_update_lock); + btree_node_lock_type(c, b, SIX_LOCK_intent); + six_lock_write(&b->lock); + + six_unlock_write(&b->lock); six_unlock_intent(&b->lock); + mutex_lock(&c->btree_interior_update_lock); goto again; } - list_del(&as->unwritten_list); - ret = bch2_journal_res_get(&c->journal, &res, as->journal_u64s, + JOURNAL_RES_GET_NONBLOCK| JOURNAL_RES_GET_RESERVED); - if (ret) { - BUG_ON(!bch2_journal_error(&c->journal)); - /* can't unblock btree writes */ - goto free_update; + if (ret == -EAGAIN) { + unsigned u64s = as->journal_u64s; + + if (b) { + six_unlock_write(&b->lock); + six_unlock_intent(&b->lock); + } + + mutex_unlock(&c->btree_interior_update_lock); + + ret = bch2_journal_res_get(&c->journal, &res, u64s, + JOURNAL_RES_GET_CHECK| + JOURNAL_RES_GET_RESERVED); + if (!ret) { + mutex_lock(&c->btree_interior_update_lock); + goto again; + } } - { + if (!ret) { struct journal_buf *buf = &c->journal.buf[res.idx]; struct jset_entry *entry = vstruct_idx(buf->data, res.offset); res.offset += as->journal_u64s; res.u64s -= as->journal_u64s; memcpy_u64s(entry, as->journal_entries, as->journal_u64s); + } else { + /* + * On journal error we have to run most of the normal path so + * that shutdown works - unblocking btree node writes in + * particular and writing them if needed - except for + * journalling the update: + */ + + BUG_ON(!bch2_journal_error(&c->journal)); } switch (as->mode) { @@ -690,26 +708,41 @@ again: BUG(); case BTREE_INTERIOR_UPDATING_NODE: /* @b is the node we did the final insert into: */ - BUG_ON(!res.ref); - six_lock_write(&b->lock); + /* + * On failure to get a journal reservation, we still have to + * unblock the write and allow most of the write path to happen + * so that shutdown works, but the i->journal_seq mechanism + * won't work to prevent the btree write from being visible (we + * didn't get a journal sequence number) - instead + * __bch2_btree_node_write() doesn't do the actual write if + * we're in journal error state: + */ + list_del(&as->write_blocked_list); - i = btree_bset_last(b); - i->journal_seq = cpu_to_le64( - max(res.seq, - le64_to_cpu(i->journal_seq))); + if (!ret) { + struct bset *i = btree_bset_last(b); + + i->journal_seq = cpu_to_le64( + max(res.seq, + le64_to_cpu(i->journal_seq))); + + bch2_btree_add_journal_pin(c, b, res.seq); + } + + nodes_need_write[nr_nodes_need_write++] = b; - bch2_btree_add_journal_pin(c, b, res.seq); six_unlock_write(&b->lock); + six_unlock_intent(&b->lock); break; case BTREE_INTERIOR_UPDATING_AS: BUG_ON(b); break; - case BTREE_INTERIOR_UPDATING_ROOT: { - struct btree_root *r = &c->btree_roots[as->btree_id]; + case BTREE_INTERIOR_UPDATING_ROOT: + r = &c->btree_roots[as->btree_id]; BUG_ON(b); @@ -721,25 +754,24 @@ again: mutex_unlock(&c->btree_root_lock); break; } - } bch2_journal_pin_drop(&c->journal, &as->journal); bch2_journal_res_put(&c->journal, &res); bch2_journal_preres_put(&c->journal, &as->journal_preres); -free_update: - /* Do btree write after dropping journal res: */ - if (b) { - /* - * b->write_blocked prevented it from being written, so - * write it now if it needs to be written: - */ - btree_node_write_if_need(c, b, SIX_LOCK_intent); - six_unlock_intent(&b->lock); + + while (as->nr_new_nodes) { + b = as->new_nodes[--as->nr_new_nodes]; + + BUG_ON(b->will_make_reachable != (unsigned long) as); + b->will_make_reachable = 0; + + nodes_need_write[nr_nodes_need_write++] = b; } - if (!ret) - btree_update_nodes_reachable(as, res.seq); + while (as->nr_pending) + bch2_btree_node_free_ondisk(c, + &as->pending[--as->nr_pending], res.seq); __bch2_btree_update_free(as); /* @@ -747,6 +779,22 @@ free_update: * nodes to be writeable: */ closure_wake_up(&c->btree_interior_update_wait); + + /* + * Can't take btree node locks while holding btree_interior_update_lock: + * */ + mutex_unlock(&c->btree_interior_update_lock); + + /* Do btree writes after dropping journal res/locks: */ + while (nr_nodes_need_write) { + b = nodes_need_write[--nr_nodes_need_write]; + + btree_node_lock_type(c, b, SIX_LOCK_read); + bch2_btree_node_write_cond(c, b, btree_node_need_write(b)); + six_unlock_read(&b->lock); + } + + mutex_lock(&c->btree_interior_update_lock); goto again; } @@ -949,17 +997,41 @@ void bch2_btree_update_done(struct btree_update *as) } struct btree_update * -bch2_btree_update_start(struct bch_fs *c, enum btree_id id, +bch2_btree_update_start(struct btree_trans *trans, enum btree_id id, unsigned nr_nodes, unsigned flags, struct closure *cl) { + struct bch_fs *c = trans->c; + struct journal_preres journal_preres = { 0 }; struct btree_reserve *reserve; struct btree_update *as; int ret; + ret = bch2_journal_preres_get(&c->journal, &journal_preres, + BTREE_UPDATE_JOURNAL_RES, + JOURNAL_RES_GET_NONBLOCK); + if (ret == -EAGAIN) { + if (flags & BTREE_INSERT_NOUNLOCK) + return ERR_PTR(-EINTR); + + bch2_trans_unlock(trans); + + ret = bch2_journal_preres_get(&c->journal, &journal_preres, + BTREE_UPDATE_JOURNAL_RES, 0); + if (ret) + return ERR_PTR(ret); + + if (!bch2_trans_relock(trans)) { + bch2_journal_preres_put(&c->journal, &journal_preres); + return ERR_PTR(-EINTR); + } + } + reserve = bch2_btree_reserve_get(c, nr_nodes, flags, cl); - if (IS_ERR(reserve)) + if (IS_ERR(reserve)) { + bch2_journal_preres_put(&c->journal, &journal_preres); return ERR_CAST(reserve); + } as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO); memset(as, 0, sizeof(*as)); @@ -969,18 +1041,11 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id, as->btree_id = id; as->reserve = reserve; INIT_LIST_HEAD(&as->write_blocked_list); + INIT_LIST_HEAD(&as->unwritten_list); + as->journal_preres = journal_preres; bch2_keylist_init(&as->parent_keys, as->inline_keys); - ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, - ARRAY_SIZE(as->journal_entries), 0); - if (ret) { - bch2_btree_reserve_put(c, reserve); - closure_debug_destroy(&as->cl); - mempool_free(as, &c->btree_interior_update_pool); - return ERR_PTR(ret); - } - mutex_lock(&c->btree_interior_update_lock); list_add_tail(&as->list, &c->btree_interior_update_list); mutex_unlock(&c->btree_interior_update_lock); @@ -1531,8 +1596,10 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, /* Hack, because gc and splitting nodes doesn't mix yet: */ if (!(flags & BTREE_INSERT_GC_LOCK_HELD) && !down_read_trylock(&c->gc_lock)) { - if (flags & BTREE_INSERT_NOUNLOCK) + if (flags & BTREE_INSERT_NOUNLOCK) { + trace_transaction_restart_ip(trans->ip, _THIS_IP_); return -EINTR; + } bch2_trans_unlock(trans); down_read(&c->gc_lock); @@ -1551,7 +1618,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, goto out; } - as = bch2_btree_update_start(c, iter->btree_id, + as = bch2_btree_update_start(trans, iter->btree_id, btree_update_reserve_required(c, b), flags, !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); if (IS_ERR(as)) { @@ -1560,6 +1627,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter, BUG_ON(flags & BTREE_INSERT_NOUNLOCK); bch2_trans_unlock(trans); ret = -EINTR; + + trace_transaction_restart_ip(trans->ip, _THIS_IP_); } goto out; } @@ -1663,8 +1732,9 @@ retry: goto err_unlock; } - as = bch2_btree_update_start(c, iter->btree_id, + as = bch2_btree_update_start(trans, iter->btree_id, btree_update_reserve_required(c, parent) + 1, + flags| BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE, !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL); @@ -1776,7 +1846,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter, struct btree *n, *parent = btree_node_parent(iter, b); struct btree_update *as; - as = bch2_btree_update_start(c, iter->btree_id, + as = bch2_btree_update_start(iter->trans, iter->btree_id, (parent ? btree_update_reserve_required(c, parent) : 0) + 1, @@ -2043,7 +2113,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, new_hash = bch2_btree_node_mem_alloc(c); } - as = bch2_btree_update_start(c, iter->btree_id, + as = bch2_btree_update_start(iter->trans, iter->btree_id, parent ? btree_update_reserve_required(c, parent) : 0, BTREE_INSERT_NOFAIL| BTREE_INSERT_USE_RESERVE| |