diff options
Diffstat (limited to 'libbcachefs/btree_update_leaf.c')
-rw-r--r-- | libbcachefs/btree_update_leaf.c | 312 |
1 files changed, 190 insertions, 122 deletions
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index cc41140f..a62d8307 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -227,19 +227,36 @@ btree_insert_key_leaf(struct btree_insert *trans, return ret; } +#define trans_for_each_entry(trans, i) \ + for ((i) = (trans)->entries; (i) < (trans)->entries + (trans)->nr; (i)++) + +/* + * We sort transaction entries so that if multiple iterators point to the same + * leaf node they'll be adjacent: + */ static bool same_leaf_as_prev(struct btree_insert *trans, struct btree_insert_entry *i) { - /* - * Because we sorted the transaction entries, if multiple iterators - * point to the same leaf node they'll always be adjacent now: - */ return i != trans->entries && i[0].iter->l[0].b == i[-1].iter->l[0].b; } -#define trans_for_each_entry(trans, i) \ - for ((i) = (trans)->entries; (i) < (trans)->entries + (trans)->nr; (i)++) +static inline struct btree_insert_entry *trans_next_leaf(struct btree_insert *trans, + struct btree_insert_entry *i) +{ + struct btree *b = i->iter->l[0].b; + + do { + i++; + } while (i < trans->entries + trans->nr && b == i->iter->l[0].b); + + return i; +} + +#define trans_for_each_leaf(trans, i) \ + for ((i) = (trans)->entries; \ + (i) < (trans)->entries + (trans)->nr; \ + (i) = trans_next_leaf(trans, i)) inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, struct btree_iter *iter) @@ -262,19 +279,16 @@ static void multi_lock_write(struct bch_fs *c, struct btree_insert *trans) { struct btree_insert_entry *i; - trans_for_each_entry(trans, i) - if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, - i->iter); + trans_for_each_leaf(trans, i) + bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter); } static void multi_unlock_write(struct btree_insert *trans) { struct btree_insert_entry *i; - trans_for_each_entry(trans, i) - if (!same_leaf_as_prev(trans, i)) - bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); + trans_for_each_leaf(trans, i) + bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); } static inline int btree_trans_cmp(struct btree_insert_entry l, @@ -285,56 +299,24 @@ static inline int btree_trans_cmp(struct btree_insert_entry l, /* Normal update interface: */ -/** - * __bch_btree_insert_at - insert keys at given iterator positions - * - * This is main entry point for btree updates. - * - * Return values: - * -EINTR: locking changed, this function should be called again. Only returned - * if passed BTREE_INSERT_ATOMIC. - * -EROFS: filesystem read only - * -EIO: journal or btree node IO error +/* + * Get journal reservation, take write locks, and attempt to do btree update(s): */ -int __bch2_btree_insert_at(struct btree_insert *trans) +static inline int do_btree_insert_at(struct btree_insert *trans, + struct btree_iter **split, + bool *cycle_gc_lock) { struct bch_fs *c = trans->c; struct btree_insert_entry *i; - struct btree_iter *split = NULL; - bool cycle_gc_lock = false; unsigned u64s; int ret; - trans_for_each_entry(trans, i) { - BUG_ON(i->iter->level); - BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); - BUG_ON(debug_check_bkeys(c) && - bch2_bkey_invalid(c, i->iter->btree_id, - bkey_i_to_s_c(i->k))); - } - - bubble_sort(trans->entries, trans->nr, btree_trans_cmp); - - if (unlikely(!percpu_ref_tryget(&c->writes))) - return -EROFS; -retry_locks: - ret = -EINTR; - trans_for_each_entry(trans, i) { - if (!bch2_btree_iter_set_locks_want(i->iter, 1)) - goto err; + trans_for_each_entry(trans, i) + BUG_ON(i->done); - if (i->iter->uptodate == BTREE_ITER_NEED_TRAVERSE) { - ret = bch2_btree_iter_traverse(i->iter); - if (ret) - goto err; - } - } -retry: - trans->did_work = false; u64s = 0; trans_for_each_entry(trans, i) - if (!i->done) - u64s += jset_u64s(i->k->k.u64s + i->extra_res); + u64s += jset_u64s(i->k->k.u64s + i->extra_res); memset(&trans->journal_res, 0, sizeof(trans->journal_res)); @@ -344,13 +326,13 @@ retry: u64s, u64s) : 0; if (ret) - goto err; + return ret; multi_lock_write(c, trans); if (race_fault()) { ret = -EINTR; - goto unlock; + goto out; } u64s = 0; @@ -365,129 +347,210 @@ retry: * bch2_btree_node_write(), converting an unwritten bset to a * written one */ - if (!i->done) { - u64s += i->k->k.u64s + i->extra_res; - if (!bch2_btree_node_insert_fits(c, - i->iter->l[0].b, u64s)) { - split = i->iter; - goto unlock; - } + u64s += i->k->k.u64s + i->extra_res; + if (!bch2_btree_node_insert_fits(c, + i->iter->l[0].b, u64s)) { + ret = -EINTR; + *split = i->iter; + goto out; } } - ret = 0; - split = NULL; - cycle_gc_lock = false; + if (journal_seq_verify(c) && + !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) + trans_for_each_entry(trans, i) + i->k->k.version.lo = trans->journal_res.seq; trans_for_each_entry(trans, i) { - if (i->done) - continue; - switch (btree_insert_key_leaf(trans, i)) { case BTREE_INSERT_OK: i->done = true; break; case BTREE_INSERT_JOURNAL_RES_FULL: case BTREE_INSERT_NEED_TRAVERSE: - ret = -EINTR; - break; case BTREE_INSERT_NEED_RESCHED: - ret = -EAGAIN; + ret = -EINTR; break; case BTREE_INSERT_BTREE_NODE_FULL: - split = i->iter; + ret = -EINTR; + *split = i->iter; break; case BTREE_INSERT_ENOSPC: ret = -ENOSPC; break; case BTREE_INSERT_NEED_GC_LOCK: - cycle_gc_lock = true; ret = -EINTR; + *cycle_gc_lock = true; break; default: BUG(); } - if (!trans->did_work && (ret || split)) + /* + * If we did some work (i.e. inserted part of an extent), + * we have to do all the other updates as well: + */ + if (!trans->did_work && (ret || *split)) break; } -unlock: +out: multi_unlock_write(trans); bch2_journal_res_put(&c->journal, &trans->journal_res); - if (split) - goto split; - if (ret) - goto err; + return ret; +} - trans_for_each_entry(trans, i) - if (i->iter->flags & BTREE_ITER_AT_END_OF_LEAF) - goto out; +/** + * __bch_btree_insert_at - insert keys at given iterator positions + * + * This is main entry point for btree updates. + * + * Return values: + * -EINTR: locking changed, this function should be called again. Only returned + * if passed BTREE_INSERT_ATOMIC. + * -EROFS: filesystem read only + * -EIO: journal or btree node IO error + */ +int __bch2_btree_insert_at(struct btree_insert *trans) +{ + struct bch_fs *c = trans->c; + struct btree_insert_entry *i; + struct btree_iter *linked, *split = NULL; + bool cycle_gc_lock = false; + unsigned flags; + int ret; + + for_each_btree_iter(trans->entries[0].iter, linked) + bch2_btree_iter_verify_locks(linked); + + /* for the sake of sanity: */ + BUG_ON(trans->nr > 1 && !(trans->flags & BTREE_INSERT_ATOMIC)); trans_for_each_entry(trans, i) { - /* - * iterators are inconsistent when they hit end of leaf, until - * traversed again - */ - if (i->iter->uptodate < BTREE_ITER_NEED_TRAVERSE && - !same_leaf_as_prev(trans, i)) - bch2_foreground_maybe_merge(c, i->iter, 0); + BUG_ON(i->iter->level); + BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); + BUG_ON(debug_check_bkeys(c) && + bch2_bkey_invalid(c, i->iter->btree_id, + bkey_i_to_s_c(i->k))); + BUG_ON(i->iter->uptodate == BTREE_ITER_END); } -out: - /* make sure we didn't lose an error: */ - if (!ret && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) - trans_for_each_entry(trans, i) - BUG_ON(!i->done); + bubble_sort(trans->entries, trans->nr, btree_trans_cmp); + + if (unlikely(!percpu_ref_tryget(&c->writes))) + return -EROFS; +retry: + split = NULL; + cycle_gc_lock = false; + + trans_for_each_entry(trans, i) { + if (!bch2_btree_iter_upgrade(i->iter, 1)) { + ret = -EINTR; + goto err; + } + + if (i->iter->flags & BTREE_ITER_ERROR) { + ret = -EIO; + goto err; + } + } + + ret = do_btree_insert_at(trans, &split, &cycle_gc_lock); + if (unlikely(ret)) + goto err; + + trans_for_each_leaf(trans, i) + bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags); + + trans_for_each_entry(trans, i) + bch2_btree_iter_downgrade(i->iter); +out: percpu_ref_put(&c->writes); + + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) { + /* make sure we didn't drop or screw up locks: */ + for_each_btree_iter(trans->entries[0].iter, linked) { + bch2_btree_iter_verify_locks(linked); + BUG_ON((trans->flags & BTREE_INSERT_NOUNLOCK) && + trans->did_work && + linked->uptodate >= BTREE_ITER_NEED_RELOCK); + } + + /* make sure we didn't lose an error: */ + if (!ret) + trans_for_each_entry(trans, i) + BUG_ON(!i->done); + } + + BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR); + return ret; -split: - /* - * have to drop journal res before splitting, because splitting means - * allocating new btree nodes, and holding a journal reservation - * potentially blocks the allocator: - */ - ret = bch2_btree_split_leaf(c, split, trans->flags); +err: + flags = trans->flags; /* - * This can happen when we insert part of an extent - with an update - * with multiple keys, we don't want to redo the entire update - that's - * just too confusing: + * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree + * update; if we haven't done anything yet it doesn't apply */ - if (!ret && - (trans->flags & BTREE_INSERT_ATOMIC) && - trans->did_work) - ret = -EINTR; + if (!trans->did_work) + flags &= ~BTREE_INSERT_NOUNLOCK; - if (ret) - goto err; + if (split) { + ret = bch2_btree_split_leaf(c, split, flags); + + /* + * if the split succeeded without dropping locks the insert will + * still be atomic (in the BTREE_INSERT_ATOMIC sense, what the + * caller peeked() and is overwriting won't have changed) + */ +#if 0 + /* + * XXX: + * split -> btree node merging (of parent node) might still drop + * locks when we're not passing it BTREE_INSERT_NOUNLOCK + */ + if (!ret && !trans->did_work) + goto retry; +#endif + + /* + * don't care if we got ENOSPC because we told split it + * couldn't block: + */ + if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) + ret = -EINTR; + } - /* - * if the split didn't have to drop locks the insert will still be - * atomic (in the BTREE_INSERT_ATOMIC sense, what the caller peeked() - * and is overwriting won't have changed) - */ - goto retry_locks; -err: if (cycle_gc_lock) { - down_read(&c->gc_lock); + if (!down_read_trylock(&c->gc_lock)) { + if (flags & BTREE_INSERT_NOUNLOCK) + goto out; + + bch2_btree_iter_unlock(trans->entries[0].iter); + down_read(&c->gc_lock); + } up_read(&c->gc_lock); } if (ret == -EINTR) { + if (flags & BTREE_INSERT_NOUNLOCK) + goto out; + trans_for_each_entry(trans, i) { int ret2 = bch2_btree_iter_traverse(i->iter); if (ret2) { ret = ret2; goto out; } + + BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK); } /* * BTREE_ITER_ATOMIC means we have to return -EINTR if we * dropped locks: */ - if (!(trans->flags & BTREE_INSERT_ATOMIC)) + if (!(flags & BTREE_INSERT_ATOMIC)) goto retry; } @@ -549,7 +612,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, bch2_btree_iter_init(&iter, c, id, bkey_start_pos(&k->k), BTREE_ITER_INTENT); ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq, flags, - BTREE_INSERT_ENTRY(&iter, k)); + BTREE_INSERT_ENTRY(&iter, k)); bch2_btree_iter_unlock(&iter); return ret; @@ -584,6 +647,11 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, if (bkey_cmp(iter.pos, end) >= 0) break; + if (k.k->type == KEY_TYPE_DISCARD) { + bch2_btree_iter_next(&iter); + continue; + } + bkey_init(&delete.k); /* @@ -615,8 +683,8 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, } ret = bch2_btree_insert_at(c, disk_res, hook, journal_seq, - BTREE_INSERT_NOFAIL, - BTREE_INSERT_ENTRY(&iter, &delete)); + BTREE_INSERT_NOFAIL, + BTREE_INSERT_ENTRY(&iter, &delete)); if (ret) break; |