summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-06-05 23:27:54 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-06-27 12:25:13 -0400
commitb2731a37294088e58d71bb1f71d2afca1335e9fe (patch)
tree6a62b03be68740d399fa0c2de5ef4b9aec4b5520
parent572fa15cf07ebb440cad9dab57f801ddd0bf035c (diff)
bcachefs: bch2_btree_iter_upgrade()/downgrade()
Replaces bch2_btree_iter_set_locks_want() - also add more assertions Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/btree_cache.c37
-rw-r--r--fs/bcachefs/btree_gc.c2
-rw-r--r--fs/bcachefs/btree_iter.c272
-rw-r--r--fs/bcachefs/btree_iter.h28
-rw-r--r--fs/bcachefs/btree_locking.h15
-rw-r--r--fs/bcachefs/btree_update_interior.c61
-rw-r--r--fs/bcachefs/btree_update_leaf.c35
-rw-r--r--fs/bcachefs/migrate.c13
8 files changed, 272 insertions, 191 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 09bab9ac5d6a..b0dc4c8a85cb 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -763,17 +763,15 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
struct btree_node_iter node_iter;
struct bkey_packed *k;
BKEY_PADDED(k) tmp;
- struct btree *ret;
+ struct btree *ret = NULL;
unsigned level = b->level;
parent = btree_iter_node(iter, level + 1);
if (!parent)
return NULL;
- if (!bch2_btree_node_relock(iter, level + 1)) {
- bch2_btree_iter_set_locks_want(iter, level + 2);
- return ERR_PTR(-EINTR);
- }
+ if (!bch2_btree_node_relock(iter, level + 1))
+ goto out_upgrade;
node_iter = iter->l[parent->level].iter;
@@ -786,7 +784,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
: (bch2_btree_node_iter_advance(&node_iter, parent),
bch2_btree_node_iter_peek_all(&node_iter, parent));
if (!k)
- return NULL;
+ goto out;
} while (bkey_deleted(k));
bch2_bkey_unpack(parent, &tmp.k, k);
@@ -796,10 +794,8 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (PTR_ERR_OR_ZERO(ret) == -EINTR && may_drop_locks) {
struct btree_iter *linked;
- if (!bch2_btree_node_relock(iter, level + 1)) {
- bch2_btree_iter_set_locks_want(iter, level + 2);
- return ERR_PTR(-EINTR);
- }
+ if (!bch2_btree_node_relock(iter, level + 1))
+ goto out_upgrade;
/*
* We might have got -EINTR because trylock failed, and we're
@@ -815,7 +811,11 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
ret = bch2_btree_node_get(c, iter, &tmp.k, level,
SIX_LOCK_intent);
- bch2_btree_iter_relock(iter);
+ /*
+ * before btree_iter_relock() calls btree_iter_verify_locks():
+ */
+ if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
+ btree_node_unlock(iter, level + 1);
if (!bch2_btree_node_relock(iter, level)) {
btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
@@ -825,12 +825,25 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
ret = ERR_PTR(-EINTR);
}
}
+
+ bch2_btree_iter_relock(iter);
}
+out:
+ if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
+ btree_node_unlock(iter, level + 1);
+
+ bch2_btree_iter_verify_locks(iter);
BUG_ON((!may_drop_locks || !IS_ERR(ret)) &&
- !btree_node_locked(iter, level));
+ (iter->uptodate >= BTREE_ITER_NEED_RELOCK ||
+ !btree_node_locked(iter, level)));
return ret;
+out_upgrade:
+ if (may_drop_locks)
+ bch2_btree_iter_upgrade(iter, level + 2);
+ ret = ERR_PTR(-EINTR);
+ goto out;
}
void bch2_btree_node_prefetch(struct bch_fs *c, const struct bkey_i *k,
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 8f5594cf0cde..969c1f19414e 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -790,7 +790,7 @@ next:
BUG_ON(iter->l[old_nodes[0]->level].b != old_nodes[0]);
- BUG_ON(!bch2_btree_iter_node_replace(iter, new_nodes[0]));
+ bch2_btree_iter_node_replace(iter, new_nodes[0]);
for (i = 0; i < nr_new_nodes; i++)
bch2_btree_open_bucket_put(c, new_nodes[i]);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 330abc577c9a..3ad7e082f9b3 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -46,7 +46,7 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
struct btree_iter *linked;
unsigned readers = 0;
- for_each_linked_btree_iter(iter, linked)
+ for_each_btree_iter(iter, linked)
if (linked->l[b->level].b == b &&
btree_node_read_locked(linked, b->level))
readers++;
@@ -68,7 +68,7 @@ bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
{
struct btree_iter *linked;
struct btree *b = iter->l[level].b;
- int want = btree_lock_want(iter, level);
+ int want = __btree_lock_want(iter, level);
int have = btree_node_locked_type(iter, level);
if (want == have)
@@ -101,38 +101,6 @@ success:
return true;
}
-static bool __bch2_btree_iter_relock(struct btree_iter *iter)
-{
- unsigned l = iter->level;
-
- do {
- if (!btree_iter_node(iter, l))
- break;
-
- if (!bch2_btree_node_relock(iter, l)) {
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
- return false;
- }
-
- l++;
- } while (l < iter->locks_want);
-
- if (iter->uptodate == BTREE_ITER_NEED_RELOCK)
- iter->uptodate = BTREE_ITER_NEED_PEEK;
- return true;
-}
-
-bool bch2_btree_iter_relock(struct btree_iter *iter)
-{
- struct btree_iter *linked;
- bool ret = true;
-
- for_each_btree_iter(iter, linked)
- ret &= __bch2_btree_iter_relock(linked);
-
- return ret;
-}
-
/* Slowpath: */
bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
unsigned level,
@@ -217,57 +185,135 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
/* Btree iterator locking: */
-static void btree_iter_drop_extra_locks(struct btree_iter *iter)
+#ifdef CONFIG_BCACHEFS_DEBUG
+void bch2_btree_iter_verify_locks(struct btree_iter *iter)
{
unsigned l;
- while (iter->nodes_locked &&
- (l = __fls(iter->nodes_locked)) > iter->locks_want) {
- if (l > iter->level) {
- btree_node_unlock(iter, l);
- } else {
- if (btree_node_intent_locked(iter, l)) {
- six_lock_downgrade(&iter->l[l].b->lock);
- iter->nodes_intent_locked ^= 1 << l;
+ if (iter->uptodate == BTREE_ITER_END) {
+ BUG_ON(iter->nodes_locked);
+ return;
+ }
+
+ for (l = 0; btree_iter_node(iter, l); l++) {
+ if (iter->uptodate >= BTREE_ITER_NEED_RELOCK &&
+ !btree_node_locked(iter, l))
+ continue;
+
+ BUG_ON(btree_lock_want(iter, l) !=
+ btree_node_locked_type(iter, l));
+ }
+}
+#endif
+
+static bool __bch2_btree_iter_relock(struct btree_iter *iter)
+{
+ if (iter->uptodate < BTREE_ITER_NEED_RELOCK) {
+ bch2_btree_iter_verify_locks(iter);
+ return true;
+ }
+
+ if (iter->uptodate == BTREE_ITER_NEED_RELOCK) {
+ unsigned l = iter->level;
+
+ do {
+ if (!btree_iter_node(iter, l))
+ break;
+
+ if (!bch2_btree_node_relock(iter, l)) {
+ btree_iter_set_dirty(iter,
+ BTREE_ITER_NEED_TRAVERSE);
+ return false;
}
- break;
- }
+
+ l++;
+ } while (l < iter->locks_want);
+
+ iter->uptodate = BTREE_ITER_NEED_PEEK;
+ bch2_btree_iter_verify_locks(iter);
+ return true;
}
+
+ return false;
}
-bool __bch2_btree_iter_set_locks_want(struct btree_iter *iter,
- unsigned new_locks_want)
+bool bch2_btree_iter_relock(struct btree_iter *iter)
{
struct btree_iter *linked;
+ bool ret = true;
- /* Drop locks we don't want anymore: */
- if (new_locks_want < iter->locks_want)
- for_each_linked_btree_iter(iter, linked)
- if (linked->locks_want > new_locks_want) {
- linked->locks_want = max_t(unsigned, 1,
- new_locks_want);
- btree_iter_drop_extra_locks(linked);
- }
+ for_each_btree_iter(iter, linked)
+ ret &= __bch2_btree_iter_relock(linked);
- iter->locks_want = new_locks_want;
- btree_iter_drop_extra_locks(iter);
+ return ret;
+}
+
+bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
+ unsigned new_locks_want)
+{
+ struct btree_iter *linked;
+
+ if (iter->locks_want < new_locks_want) {
+ iter->locks_want = new_locks_want;
+ btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
+ }
if (__bch2_btree_iter_relock(iter))
return true;
/*
- * Just an optimization: ancestor nodes must be locked before child
- * nodes, so set locks_want on iterators that might lock ancestors
- * before us to avoid getting -EINTR later:
+ * Ancestor nodes must be locked before child nodes, so set locks_want
+ * on iterators that might lock ancestors before us to avoid getting
+ * -EINTR later:
*/
for_each_linked_btree_iter(iter, linked)
if (linked->btree_id == iter->btree_id &&
- btree_iter_cmp(linked, iter) <= 0)
- linked->locks_want = max_t(unsigned, linked->locks_want,
- new_locks_want);
+ btree_iter_cmp(linked, iter) <= 0 &&
+ linked->locks_want < new_locks_want) {
+ linked->locks_want = new_locks_want;
+ btree_iter_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
+ }
+
return false;
}
+void __bch2_btree_iter_downgrade(struct btree_iter *iter,
+ unsigned downgrade_to)
+{
+ struct btree_iter *linked;
+ unsigned l;
+
+ /*
+ * We downgrade linked iterators as well because btree_iter_upgrade
+ * might have had to modify locks_want on linked iterators due to lock
+ * ordering:
+ */
+ for_each_btree_iter(iter, linked) {
+ unsigned new_locks_want = downgrade_to ?:
+ (linked->flags & BTREE_ITER_INTENT ? 1 : 0);
+
+ if (linked->locks_want <= new_locks_want)
+ continue;
+
+ linked->locks_want = new_locks_want;
+
+ while (linked->nodes_locked &&
+ (l = __fls(linked->nodes_locked)) >= linked->locks_want) {
+ if (l > linked->level) {
+ btree_node_unlock(linked, l);
+ } else {
+ if (btree_node_intent_locked(linked, l)) {
+ six_lock_downgrade(&linked->l[l].b->lock);
+ linked->nodes_intent_locked ^= 1 << l;
+ }
+ break;
+ }
+ }
+
+ bch2_btree_iter_verify_locks(linked);
+ }
+}
+
int bch2_btree_iter_unlock(struct btree_iter *iter)
{
struct btree_iter *linked;
@@ -609,11 +655,12 @@ static inline void btree_iter_node_set(struct btree_iter *iter,
* A btree node is being replaced - update the iterator to point to the new
* node:
*/
-bool bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
+void bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
{
+ enum btree_node_locked_type t;
struct btree_iter *linked;
- for_each_linked_btree_iter(iter, linked)
+ for_each_btree_iter(iter, linked)
if (btree_iter_pos_in_node(linked, b)) {
/*
* bch2_btree_iter_node_drop() has already been called -
@@ -622,51 +669,28 @@ bool bch2_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
*/
BUG_ON(btree_node_locked(linked, b->level));
- /*
- * If @linked wants this node read locked, we don't want
- * to actually take the read lock now because it's not
- * legal to hold read locks on other nodes while we take
- * write locks, so the journal can make forward
- * progress...
- *
- * Instead, btree_iter_node_set() sets things up so
- * bch2_btree_node_relock() will succeed:
- */
-
- if (btree_want_intent(linked, b->level)) {
- six_lock_increment(&b->lock, SIX_LOCK_intent);
- mark_btree_node_intent_locked(linked, b->level);
+ t = btree_lock_want(linked, b->level);
+ if (t != BTREE_NODE_UNLOCKED) {
+ six_lock_increment(&b->lock, t);
+ mark_btree_node_locked(linked, b->level, t);
}
btree_iter_node_set(linked, b);
}
- if (!btree_iter_pos_in_node(iter, b)) {
- six_unlock_intent(&b->lock);
- return false;
- }
-
- mark_btree_node_intent_locked(iter, b->level);
- btree_iter_node_set(iter, b);
- return true;
-}
-
-void bch2_btree_iter_node_drop_linked(struct btree_iter *iter, struct btree *b)
-{
- struct btree_iter *linked;
-
- for_each_linked_btree_iter(iter, linked)
- bch2_btree_iter_node_drop(linked, b);
+ six_unlock_intent(&b->lock);
}
void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
{
+ struct btree_iter *linked;
unsigned level = b->level;
- if (iter->l[level].b == b) {
- btree_node_unlock(iter, level);
- iter->l[level].b = BTREE_ITER_NOT_END;
- }
+ for_each_btree_iter(iter, linked)
+ if (linked->l[level].b == b) {
+ btree_node_unlock(linked, level);
+ linked->l[level].b = BTREE_ITER_NOT_END;
+ }
}
/*
@@ -707,7 +731,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
return 0;
}
- lock_type = btree_lock_want(iter, iter->level);
+ lock_type = __btree_lock_want(iter, iter->level);
if (unlikely(!btree_node_lock(b, POS_MAX, iter->level,
iter, lock_type)))
return -EINTR;
@@ -765,7 +789,7 @@ static inline int btree_iter_down(struct btree_iter *iter)
struct btree_iter_level *l = &iter->l[iter->level];
struct btree *b;
unsigned level = iter->level - 1;
- enum six_lock_type lock_type = btree_lock_want(iter, level);
+ enum six_lock_type lock_type = __btree_lock_want(iter, level);
BKEY_PADDED(k) tmp;
BUG_ON(!btree_node_locked(iter, iter->level));
@@ -793,6 +817,12 @@ static void btree_iter_up(struct btree_iter *iter)
btree_node_unlock(iter, iter->level++);
}
+static void btree_iter_set_end(struct btree_iter *iter)
+{
+ iter->uptodate = BTREE_ITER_END;
+ __bch2_btree_iter_unlock(iter);
+}
+
int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
static int btree_iter_traverse_error(struct btree_iter *iter, int ret)
@@ -956,6 +986,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
}
iter->uptodate = BTREE_ITER_NEED_PEEK;
+ bch2_btree_iter_verify_locks(iter);
return 0;
}
@@ -963,11 +994,7 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
{
int ret;
- if (iter->uptodate < BTREE_ITER_NEED_RELOCK)
- return 0;
-
- if (iter->uptodate == BTREE_ITER_NEED_RELOCK &&
- __bch2_btree_iter_relock(iter))
+ if (__bch2_btree_iter_relock(iter))
return 0;
ret = __bch2_btree_iter_traverse(iter);
@@ -987,6 +1014,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
int ret;
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
+ bch2_btree_iter_verify_locks(iter);
if (iter->uptodate == BTREE_ITER_UPTODATE)
return iter->l[iter->level].b;
@@ -1000,7 +1028,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
b = iter->l[iter->level].b;
if (!b) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return NULL;
}
@@ -1018,11 +1046,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
int ret;
EBUG_ON(iter->flags & BTREE_ITER_IS_EXTENTS);
+ bch2_btree_iter_verify_locks(iter);
btree_iter_up(iter);
if (!btree_iter_node(iter, iter->level)) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return NULL;
}
@@ -1042,6 +1071,15 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
* the next child node
*/
+ /*
+ * We don't really want to be unlocking here except we can't
+ * directly tell btree_iter_traverse() "traverse to this level"
+ * except by setting iter->level, so we have to unlock so we
+ * don't screw up our lock invariants:
+ */
+ if (btree_node_read_locked(iter, iter->level))
+ btree_node_unlock(iter, iter->level);
+
/* ick: */
iter->pos = iter->btree_id == BTREE_ID_INODES
? btree_type_successor(iter->btree_id, iter->pos)
@@ -1104,8 +1142,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
(iter->btree_id == BTREE_ID_EXTENTS));
EBUG_ON(iter->flags & BTREE_ITER_SLOTS);
- EBUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
- !btree_node_locked(iter, 0));
+ bch2_btree_iter_verify_locks(iter);
if (iter->uptodate == BTREE_ITER_UPTODATE) {
struct bkey_packed *k =
@@ -1135,7 +1172,7 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
/* got to the end of the leaf, iterator needs to be traversed: */
iter->pos = l->b->key.k.p;
if (!bkey_cmp(iter->pos, POS_MAX)) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return bkey_s_c_null;
}
@@ -1162,7 +1199,7 @@ struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter)
iter->pos = l->b->key.k.p;
if (!bkey_cmp(iter->pos, POS_MAX)) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return bkey_s_c_null;
}
@@ -1181,6 +1218,7 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
(iter->btree_id == BTREE_ID_EXTENTS));
EBUG_ON(iter->flags & BTREE_ITER_SLOTS);
+ bch2_btree_iter_verify_locks(iter);
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
k = bch2_btree_iter_peek(iter);
@@ -1243,7 +1281,7 @@ recheck:
if (iter->flags & BTREE_ITER_IS_EXTENTS) {
if (n.p.offset == KEY_OFFSET_MAX) {
if (n.p.inode == KEY_INODE_MAX) {
- iter->uptodate = BTREE_ITER_END;
+ btree_iter_set_end(iter);
return bkey_s_c_null;
}
@@ -1277,8 +1315,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
(iter->btree_id == BTREE_ID_EXTENTS));
EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS));
- EBUG_ON(iter->uptodate == BTREE_ITER_UPTODATE &&
- !btree_node_locked(iter, 0));
+ bch2_btree_iter_verify_locks(iter);
if (iter->uptodate == BTREE_ITER_UPTODATE) {
struct bkey_s_c ret = { .k = &iter->k };
@@ -1304,6 +1341,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *iter)
{
+ EBUG_ON(!!(iter->flags & BTREE_ITER_IS_EXTENTS) !=
+ (iter->btree_id == BTREE_ID_EXTENTS));
+ EBUG_ON(!(iter->flags & BTREE_ITER_SLOTS));
+ bch2_btree_iter_verify_locks(iter);
+
iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index e7e8ae209d0b..99e51b27675d 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -92,9 +92,11 @@ __next_iter_with_node(struct btree_iter *iter, struct btree *b,
#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_btree_iter_verify(struct btree_iter *, struct btree *);
+void bch2_btree_iter_verify_locks(struct btree_iter *);
#else
static inline void bch2_btree_iter_verify(struct btree_iter *iter,
- struct btree *b) {}
+ struct btree *b) {}
+static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
#endif
void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
@@ -102,22 +104,28 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
struct bkey_packed *, unsigned, unsigned);
int bch2_btree_iter_unlock(struct btree_iter *);
-bool __bch2_btree_iter_set_locks_want(struct btree_iter *, unsigned);
-static inline bool bch2_btree_iter_set_locks_want(struct btree_iter *iter,
- unsigned new_locks_want)
+bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
+
+static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
+ unsigned new_locks_want)
{
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
- if (iter->locks_want == new_locks_want &&
- iter->nodes_intent_locked == (1 << new_locks_want) - 1)
- return true;
+ return iter->locks_want < new_locks_want
+ ? __bch2_btree_iter_upgrade(iter, new_locks_want)
+ : iter->uptodate <= BTREE_ITER_NEED_PEEK;
+}
+
+void __bch2_btree_iter_downgrade(struct btree_iter *, unsigned);
- return __bch2_btree_iter_set_locks_want(iter, new_locks_want);
+static inline void bch2_btree_iter_downgrade(struct btree_iter *iter)
+{
+ if (iter->locks_want > (iter->flags & BTREE_ITER_INTENT) ? 1 : 0)
+ __bch2_btree_iter_downgrade(iter, 0);
}
-bool bch2_btree_iter_node_replace(struct btree_iter *, struct btree *);
-void bch2_btree_iter_node_drop_linked(struct btree_iter *, struct btree *);
+void bch2_btree_iter_node_replace(struct btree_iter *, struct btree *);
void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index a6c55dff1822..13d93306b4be 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -75,16 +75,23 @@ static inline void mark_btree_node_intent_locked(struct btree_iter *iter,
mark_btree_node_locked(iter, level, SIX_LOCK_intent);
}
-static inline enum six_lock_type btree_lock_want(struct btree_iter *iter, int level)
+static inline enum six_lock_type __btree_lock_want(struct btree_iter *iter, int level)
{
return level < iter->locks_want
? SIX_LOCK_intent
: SIX_LOCK_read;
}
-static inline bool btree_want_intent(struct btree_iter *iter, int level)
+static inline enum btree_node_locked_type
+btree_lock_want(struct btree_iter *iter, int level)
{
- return btree_lock_want(iter, level) == SIX_LOCK_intent;
+ if (level < iter->level)
+ return BTREE_NODE_UNLOCKED;
+ if (level < iter->locks_want)
+ return BTREE_NODE_INTENT_LOCKED;
+ if (level == iter->level)
+ return BTREE_NODE_READ_LOCKED;
+ return BTREE_NODE_UNLOCKED;
}
static inline void btree_node_unlock(struct btree_iter *iter, unsigned level)
@@ -158,7 +165,7 @@ bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
static inline bool bch2_btree_node_relock(struct btree_iter *iter,
unsigned level)
{
- return likely(btree_lock_want(iter, level) ==
+ return likely(__btree_lock_want(iter, level) ==
btree_node_locked_type(iter, level)) ||
__bch2_btree_node_relock(iter, level);
}
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 9b644dc794e7..3e13f78476a2 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -223,8 +223,7 @@ found:
mutex_unlock(&c->btree_interior_update_lock);
}
-static void __btree_node_free(struct bch_fs *c, struct btree *b,
- struct btree_iter *iter)
+static void __btree_node_free(struct bch_fs *c, struct btree *b)
{
trace_btree_node_free(c, b);
@@ -237,21 +236,11 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b,
clear_btree_node_noevict(b);
- btree_node_lock_type(c, b, SIX_LOCK_write);
-
bch2_btree_node_hash_remove(&c->btree_cache, b);
mutex_lock(&c->btree_cache.lock);
list_move(&b->list, &c->btree_cache.freeable);
mutex_unlock(&c->btree_cache.lock);
-
- /*
- * By using six_unlock_write() directly instead of
- * bch2_btree_node_unlock_write(), we don't update the iterator's
- * sequence numbers and cause future bch2_btree_node_relock() calls to
- * fail:
- */
- six_unlock_write(&b->lock);
}
void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
@@ -264,7 +253,9 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
clear_btree_node_dirty(b);
- __btree_node_free(c, b, NULL);
+ btree_node_lock_type(c, b, SIX_LOCK_write);
+ __btree_node_free(c, b);
+ six_unlock_write(&b->lock);
bch2_open_bucket_put_refs(c, &ob.nr, ob.refs);
}
@@ -283,9 +274,9 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
*/
btree_update_drop_new_node(c, b);
- bch2_btree_iter_node_drop_linked(iter, b);
-
- __btree_node_free(c, b, iter);
+ __bch2_btree_node_lock_write(b, iter);
+ __btree_node_free(c, b);
+ six_unlock_write(&b->lock);
bch2_btree_iter_node_drop(iter, b);
}
@@ -499,7 +490,9 @@ static void bch2_btree_reserve_put(struct bch_fs *c, struct btree_reserve *reser
bch2_btree_open_bucket_put(c, b);
}
- __btree_node_free(c, b, NULL);
+ btree_node_lock_type(c, b, SIX_LOCK_write);
+ __btree_node_free(c, b);
+ six_unlock_write(&b->lock);
six_unlock_intent(&b->lock);
}
@@ -1593,7 +1586,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
* XXX: figure out how far we might need to split,
* instead of locking/reserving all the way to the root:
*/
- if (!bch2_btree_iter_set_locks_want(iter, U8_MAX)) {
+ if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
ret = -EINTR;
goto out;
}
@@ -1614,7 +1607,11 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
btree_split(as, b, iter, NULL, flags);
bch2_btree_update_done(as);
- bch2_btree_iter_set_locks_want(iter, 1);
+ /*
+ * We haven't successfully inserted yet, so don't downgrade all the way
+ * back to read locks;
+ */
+ __bch2_btree_iter_downgrade(iter, 1);
out:
up_read(&c->gc_lock);
closure_sync(&cl);
@@ -1697,7 +1694,7 @@ retry:
if (!down_read_trylock(&c->gc_lock))
goto err_cycle_gc_lock;
- if (!bch2_btree_iter_set_locks_want(iter, U8_MAX)) {
+ if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
ret = -EINTR;
goto err_unlock;
}
@@ -1753,7 +1750,15 @@ retry:
six_unlock_intent(&m->lock);
up_read(&c->gc_lock);
out:
- bch2_btree_iter_set_locks_want(iter, 1);
+ /*
+ * Don't downgrade locks here: we're called after successful insert,
+ * and the caller will downgrade locks after a successful insert
+ * anyways (in case e.g. a split was required first)
+ *
+ * And we're also called when inserting into interior nodes in the
+ * split path, and downgrading to read locks in there is potentially
+ * confusing:
+ */
closure_sync(&cl);
return;
@@ -1771,8 +1776,6 @@ err_cycle_gc_lock:
goto err;
err_unlock:
- if (ret != -EINTR && ret != -EAGAIN)
- bch2_btree_iter_set_locks_want(iter, 1);
six_unlock_intent(&m->lock);
up_read(&c->gc_lock);
err:
@@ -1831,7 +1834,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
bch2_btree_node_free_inmem(c, b, iter);
- BUG_ON(!bch2_btree_iter_node_replace(iter, n));
+ bch2_btree_iter_node_replace(iter, n);
bch2_btree_update_done(as);
return 0;
@@ -1846,7 +1849,6 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
__le64 seq, unsigned flags)
{
- unsigned locks_want = iter->locks_want;
struct closure cl;
struct btree *b;
int ret;
@@ -1855,7 +1857,7 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
closure_init_stack(&cl);
- bch2_btree_iter_set_locks_want(iter, U8_MAX);
+ bch2_btree_iter_upgrade(iter, U8_MAX);
if (!(flags & BTREE_INSERT_GC_LOCK_HELD)) {
if (!down_read_trylock(&c->gc_lock)) {
@@ -1882,7 +1884,7 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
closure_sync(&cl);
}
- bch2_btree_iter_set_locks_want(iter, locks_want);
+ bch2_btree_iter_downgrade(iter);
if (!(flags & BTREE_INSERT_GC_LOCK_HELD))
up_read(&c->gc_lock);
@@ -1998,6 +2000,9 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
closure_init_stack(&cl);
+ if (!bch2_btree_iter_upgrade(iter, U8_MAX))
+ return -EINTR;
+
if (!down_read_trylock(&c->gc_lock)) {
bch2_btree_iter_unlock(iter);
down_read(&c->gc_lock);
@@ -2057,6 +2062,8 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
goto err_free_update;
__bch2_btree_node_update_key(c, as, iter, b, new_hash, new_key);
+
+ bch2_btree_iter_downgrade(iter);
err:
if (new_hash) {
mutex_lock(&c->btree_cache.lock);
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index fee65c0c8a8e..a62d83070367 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -415,11 +415,14 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
- struct btree_iter *split = NULL;
+ struct btree_iter *linked, *split = NULL;
bool cycle_gc_lock = false;
unsigned flags;
int ret;
+ for_each_btree_iter(trans->entries[0].iter, linked)
+ bch2_btree_iter_verify_locks(linked);
+
/* for the sake of sanity: */
BUG_ON(trans->nr > 1 && !(trans->flags & BTREE_INSERT_ATOMIC));
@@ -441,13 +444,7 @@ retry:
cycle_gc_lock = false;
trans_for_each_entry(trans, i) {
- if (i->iter->locks_want < 1 &&
- !bch2_btree_iter_set_locks_want(i->iter, 1)) {
- ret = -EINTR;
- goto err;
- }
-
- if (i->iter->uptodate > BTREE_ITER_NEED_PEEK) {
+ if (!bch2_btree_iter_upgrade(i->iter, 1)) {
ret = -EINTR;
goto err;
}
@@ -466,18 +463,24 @@ retry:
bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
trans_for_each_entry(trans, i)
- bch2_btree_iter_set_locks_want(i->iter, 1);
+ bch2_btree_iter_downgrade(i->iter);
out:
percpu_ref_put(&c->writes);
- if ((trans->flags & BTREE_INSERT_NOUNLOCK) && trans->did_work)
- trans_for_each_entry(trans, i)
- BUG_ON(!btree_node_locked(i->iter, 0));
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+ /* make sure we didn't drop or screw up locks: */
+ for_each_btree_iter(trans->entries[0].iter, linked) {
+ bch2_btree_iter_verify_locks(linked);
+ BUG_ON((trans->flags & BTREE_INSERT_NOUNLOCK) &&
+ trans->did_work &&
+ linked->uptodate >= BTREE_ITER_NEED_RELOCK);
+ }
- /* make sure we didn't lose an error: */
- if (!ret && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
- trans_for_each_entry(trans, i)
- BUG_ON(!i->done);
+ /* make sure we didn't lose an error: */
+ if (!ret)
+ trans_for_each_entry(trans, i)
+ BUG_ON(!i->done);
+ }
BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index a0a4c6891f41..215c5aa5be0e 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -126,7 +126,13 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
retry:
if (!bch2_extent_has_device(bkey_i_to_s_c_extent(&b->key),
dev_idx)) {
- bch2_btree_iter_set_locks_want(&iter, 0);
+ /*
+ * we might have found a btree node key we
+ * needed to update, and then tried to update it
+ * but got -EINTR after upgrading the iter, but
+ * then raced and the node is now gone:
+ */
+ bch2_btree_iter_downgrade(&iter);
ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
bkey_i_to_s_c(&b->key));
@@ -141,11 +147,6 @@ retry:
if (ret)
goto err;
- if (!bch2_btree_iter_set_locks_want(&iter, U8_MAX)) {
- b = bch2_btree_iter_peek_node(&iter);
- goto retry;
- }
-
ret = bch2_btree_node_update_key(c, &iter, b, new_key);
if (ret == -EINTR) {
b = bch2_btree_iter_peek_node(&iter);