summaryrefslogtreecommitdiff
path: root/libbcachefs/btree_update_interior.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/btree_update_interior.c')
-rw-r--r--libbcachefs/btree_update_interior.c202
1 files changed, 136 insertions, 66 deletions
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index f6f2517d..75b70187 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -586,12 +586,12 @@ static void __bch2_btree_update_free(struct btree_update *as)
bch2_journal_pin_drop(&c->journal, &as->journal);
bch2_journal_pin_flush(&c->journal, &as->journal);
- BUG_ON((as->nr_new_nodes || as->nr_pending) &&
- !bch2_journal_error(&c->journal));;
+ BUG_ON(as->nr_new_nodes || as->nr_pending);
if (as->reserve)
bch2_btree_reserve_put(c, as->reserve);
+ list_del(&as->unwritten_list);
list_del(&as->list);
closure_debug_destroy(&as->cl);
@@ -609,37 +609,28 @@ static void bch2_btree_update_free(struct btree_update *as)
mutex_unlock(&c->btree_interior_update_lock);
}
-static void btree_update_nodes_reachable(struct btree_update *as, u64 seq)
+static inline bool six_trylock_intentwrite(struct six_lock *lock)
{
- struct bch_fs *c = as->c;
-
- while (as->nr_new_nodes) {
- struct btree *b = as->new_nodes[--as->nr_new_nodes];
+ if (!six_trylock_intent(lock))
+ return false;
- BUG_ON(b->will_make_reachable != (unsigned long) as);
- b->will_make_reachable = 0;
-
- /*
- * b->will_make_reachable prevented it from being written, so
- * write it now if it needs to be written:
- */
- btree_node_lock_type(c, b, SIX_LOCK_read);
- bch2_btree_node_write_cond(c, b, btree_node_need_write(b));
- six_unlock_read(&b->lock);
+ if (!six_trylock_write(lock)) {
+ six_unlock_intent(lock);
+ return false;
}
- while (as->nr_pending)
- bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending],
- seq);
+ return true;
}
static void btree_update_nodes_written(struct closure *cl)
{
struct btree_update *as = container_of(cl, struct btree_update, cl);
+ struct btree *nodes_need_write[BTREE_MAX_DEPTH * 2 + GC_MERGE_NODES + 1];
+ unsigned nr_nodes_need_write;
struct journal_res res = { 0 };
struct bch_fs *c = as->c;
+ struct btree_root *r;
struct btree *b;
- struct bset *i;
int ret;
/*
@@ -650,6 +641,7 @@ static void btree_update_nodes_written(struct closure *cl)
mutex_lock(&c->btree_interior_update_lock);
as->nodes_written = true;
again:
+ nr_nodes_need_write = 0;
as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
struct btree_update, unwritten_list);
if (!as || !as->nodes_written) {
@@ -658,31 +650,57 @@ again:
}
b = as->b;
- if (b && !six_trylock_intent(&b->lock)) {
+ if (b && !six_trylock_intentwrite(&b->lock)) {
mutex_unlock(&c->btree_interior_update_lock);
+
btree_node_lock_type(c, b, SIX_LOCK_intent);
+ six_lock_write(&b->lock);
+
+ six_unlock_write(&b->lock);
six_unlock_intent(&b->lock);
+
mutex_lock(&c->btree_interior_update_lock);
goto again;
}
- list_del(&as->unwritten_list);
-
ret = bch2_journal_res_get(&c->journal, &res, as->journal_u64s,
+ JOURNAL_RES_GET_NONBLOCK|
JOURNAL_RES_GET_RESERVED);
- if (ret) {
- BUG_ON(!bch2_journal_error(&c->journal));
- /* can't unblock btree writes */
- goto free_update;
+ if (ret == -EAGAIN) {
+ unsigned u64s = as->journal_u64s;
+
+ if (b) {
+ six_unlock_write(&b->lock);
+ six_unlock_intent(&b->lock);
+ }
+
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ ret = bch2_journal_res_get(&c->journal, &res, u64s,
+ JOURNAL_RES_GET_CHECK|
+ JOURNAL_RES_GET_RESERVED);
+ if (!ret) {
+ mutex_lock(&c->btree_interior_update_lock);
+ goto again;
+ }
}
- {
+ if (!ret) {
struct journal_buf *buf = &c->journal.buf[res.idx];
struct jset_entry *entry = vstruct_idx(buf->data, res.offset);
res.offset += as->journal_u64s;
res.u64s -= as->journal_u64s;
memcpy_u64s(entry, as->journal_entries, as->journal_u64s);
+ } else {
+ /*
+ * On journal error we have to run most of the normal path so
+ * that shutdown works - unblocking btree node writes in
+ * particular and writing them if needed - except for
+ * journalling the update:
+ */
+
+ BUG_ON(!bch2_journal_error(&c->journal));
}
switch (as->mode) {
@@ -690,26 +708,41 @@ again:
BUG();
case BTREE_INTERIOR_UPDATING_NODE:
/* @b is the node we did the final insert into: */
- BUG_ON(!res.ref);
- six_lock_write(&b->lock);
+ /*
+ * On failure to get a journal reservation, we still have to
+ * unblock the write and allow most of the write path to happen
+ * so that shutdown works, but the i->journal_seq mechanism
+ * won't work to prevent the btree write from being visible (we
+ * didn't get a journal sequence number) - instead
+ * __bch2_btree_node_write() doesn't do the actual write if
+ * we're in journal error state:
+ */
+
list_del(&as->write_blocked_list);
- i = btree_bset_last(b);
- i->journal_seq = cpu_to_le64(
- max(res.seq,
- le64_to_cpu(i->journal_seq)));
+ if (!ret) {
+ struct bset *i = btree_bset_last(b);
+
+ i->journal_seq = cpu_to_le64(
+ max(res.seq,
+ le64_to_cpu(i->journal_seq)));
+
+ bch2_btree_add_journal_pin(c, b, res.seq);
+ }
+
+ nodes_need_write[nr_nodes_need_write++] = b;
- bch2_btree_add_journal_pin(c, b, res.seq);
six_unlock_write(&b->lock);
+ six_unlock_intent(&b->lock);
break;
case BTREE_INTERIOR_UPDATING_AS:
BUG_ON(b);
break;
- case BTREE_INTERIOR_UPDATING_ROOT: {
- struct btree_root *r = &c->btree_roots[as->btree_id];
+ case BTREE_INTERIOR_UPDATING_ROOT:
+ r = &c->btree_roots[as->btree_id];
BUG_ON(b);
@@ -721,25 +754,24 @@ again:
mutex_unlock(&c->btree_root_lock);
break;
}
- }
bch2_journal_pin_drop(&c->journal, &as->journal);
bch2_journal_res_put(&c->journal, &res);
bch2_journal_preres_put(&c->journal, &as->journal_preres);
-free_update:
- /* Do btree write after dropping journal res: */
- if (b) {
- /*
- * b->write_blocked prevented it from being written, so
- * write it now if it needs to be written:
- */
- btree_node_write_if_need(c, b, SIX_LOCK_intent);
- six_unlock_intent(&b->lock);
+
+ while (as->nr_new_nodes) {
+ b = as->new_nodes[--as->nr_new_nodes];
+
+ BUG_ON(b->will_make_reachable != (unsigned long) as);
+ b->will_make_reachable = 0;
+
+ nodes_need_write[nr_nodes_need_write++] = b;
}
- if (!ret)
- btree_update_nodes_reachable(as, res.seq);
+ while (as->nr_pending)
+ bch2_btree_node_free_ondisk(c,
+ &as->pending[--as->nr_pending], res.seq);
__bch2_btree_update_free(as);
/*
@@ -747,6 +779,22 @@ free_update:
* nodes to be writeable:
*/
closure_wake_up(&c->btree_interior_update_wait);
+
+ /*
+ * Can't take btree node locks while holding btree_interior_update_lock:
+ * */
+ mutex_unlock(&c->btree_interior_update_lock);
+
+ /* Do btree writes after dropping journal res/locks: */
+ while (nr_nodes_need_write) {
+ b = nodes_need_write[--nr_nodes_need_write];
+
+ btree_node_lock_type(c, b, SIX_LOCK_read);
+ bch2_btree_node_write_cond(c, b, btree_node_need_write(b));
+ six_unlock_read(&b->lock);
+ }
+
+ mutex_lock(&c->btree_interior_update_lock);
goto again;
}
@@ -949,17 +997,41 @@ void bch2_btree_update_done(struct btree_update *as)
}
struct btree_update *
-bch2_btree_update_start(struct bch_fs *c, enum btree_id id,
+bch2_btree_update_start(struct btree_trans *trans, enum btree_id id,
unsigned nr_nodes, unsigned flags,
struct closure *cl)
{
+ struct bch_fs *c = trans->c;
+ struct journal_preres journal_preres = { 0 };
struct btree_reserve *reserve;
struct btree_update *as;
int ret;
+ ret = bch2_journal_preres_get(&c->journal, &journal_preres,
+ BTREE_UPDATE_JOURNAL_RES,
+ JOURNAL_RES_GET_NONBLOCK);
+ if (ret == -EAGAIN) {
+ if (flags & BTREE_INSERT_NOUNLOCK)
+ return ERR_PTR(-EINTR);
+
+ bch2_trans_unlock(trans);
+
+ ret = bch2_journal_preres_get(&c->journal, &journal_preres,
+ BTREE_UPDATE_JOURNAL_RES, 0);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (!bch2_trans_relock(trans)) {
+ bch2_journal_preres_put(&c->journal, &journal_preres);
+ return ERR_PTR(-EINTR);
+ }
+ }
+
reserve = bch2_btree_reserve_get(c, nr_nodes, flags, cl);
- if (IS_ERR(reserve))
+ if (IS_ERR(reserve)) {
+ bch2_journal_preres_put(&c->journal, &journal_preres);
return ERR_CAST(reserve);
+ }
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOIO);
memset(as, 0, sizeof(*as));
@@ -969,18 +1041,11 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id,
as->btree_id = id;
as->reserve = reserve;
INIT_LIST_HEAD(&as->write_blocked_list);
+ INIT_LIST_HEAD(&as->unwritten_list);
+ as->journal_preres = journal_preres;
bch2_keylist_init(&as->parent_keys, as->inline_keys);
- ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
- ARRAY_SIZE(as->journal_entries), 0);
- if (ret) {
- bch2_btree_reserve_put(c, reserve);
- closure_debug_destroy(&as->cl);
- mempool_free(as, &c->btree_interior_update_pool);
- return ERR_PTR(ret);
- }
-
mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->list, &c->btree_interior_update_list);
mutex_unlock(&c->btree_interior_update_lock);
@@ -1531,8 +1596,10 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
/* Hack, because gc and splitting nodes doesn't mix yet: */
if (!(flags & BTREE_INSERT_GC_LOCK_HELD) &&
!down_read_trylock(&c->gc_lock)) {
- if (flags & BTREE_INSERT_NOUNLOCK)
+ if (flags & BTREE_INSERT_NOUNLOCK) {
+ trace_transaction_restart_ip(trans->ip, _THIS_IP_);
return -EINTR;
+ }
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
@@ -1551,7 +1618,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
goto out;
}
- as = bch2_btree_update_start(c, iter->btree_id,
+ as = bch2_btree_update_start(trans, iter->btree_id,
btree_update_reserve_required(c, b), flags,
!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
if (IS_ERR(as)) {
@@ -1560,6 +1627,8 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
bch2_trans_unlock(trans);
ret = -EINTR;
+
+ trace_transaction_restart_ip(trans->ip, _THIS_IP_);
}
goto out;
}
@@ -1663,8 +1732,9 @@ retry:
goto err_unlock;
}
- as = bch2_btree_update_start(c, iter->btree_id,
+ as = bch2_btree_update_start(trans, iter->btree_id,
btree_update_reserve_required(c, parent) + 1,
+ flags|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE,
!(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
@@ -1776,7 +1846,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
struct btree *n, *parent = btree_node_parent(iter, b);
struct btree_update *as;
- as = bch2_btree_update_start(c, iter->btree_id,
+ as = bch2_btree_update_start(iter->trans, iter->btree_id,
(parent
? btree_update_reserve_required(c, parent)
: 0) + 1,
@@ -2043,7 +2113,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
new_hash = bch2_btree_node_mem_alloc(c);
}
- as = bch2_btree_update_start(c, iter->btree_id,
+ as = bch2_btree_update_start(iter->trans, iter->btree_id,
parent ? btree_update_reserve_required(c, parent) : 0,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|