summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-05-14 14:08:23 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2019-06-10 14:08:05 -0400
commit2d67f0c49b571c735587e70ca54b6be95bae9721 (patch)
treedbe229fdd98bf949ef6c8be263b7afd9d850151a
parente8cf623efed27836f9681049a38360a2772ec67b (diff)
bcachefs: improved btree locking tracepoints
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r--fs/bcachefs/btree_iter.c75
-rw-r--r--fs/bcachefs/btree_iter.h33
-rw-r--r--fs/bcachefs/btree_update_interior.c1
-rw-r--r--fs/bcachefs/btree_update_leaf.c7
-rw-r--r--include/trace/events/bcachefs.h48
5 files changed, 123 insertions, 41 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 4931ca740b9f..fb036c26e79c 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -15,13 +15,18 @@ static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *,
struct btree_iter_level *,
struct bkey *);
-#define BTREE_ITER_NOT_END ((struct btree *) 1)
+#define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1)
+#define BTREE_ITER_NO_NODE_DROP ((struct btree *) 2)
+#define BTREE_ITER_NO_NODE_LOCK_ROOT ((struct btree *) 3)
+#define BTREE_ITER_NO_NODE_UP ((struct btree *) 4)
+#define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5)
+#define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6)
+#define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7)
static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
{
return l < BTREE_MAX_DEPTH &&
- iter->l[l].b &&
- iter->l[l].b != BTREE_ITER_NOT_END;
+ (unsigned long) iter->l[l].b >= 128;
}
/* Returns < 0 if @k is before iter pos, > 0 if @k is after */
@@ -106,19 +111,20 @@ bool __bch2_btree_node_relock(struct btree_iter *iter, unsigned level)
struct btree *b = btree_iter_node(iter, level);
int want = __btree_lock_want(iter, level);
- if (!b || b == BTREE_ITER_NOT_END)
+ if (!is_btree_node(iter, level))
return false;
if (race_fault())
return false;
- if (!six_relock_type(&b->lock, want, iter->l[level].lock_seq) &&
- !(iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1 &&
- btree_node_lock_increment(iter, b, level, want)))
+ if (six_relock_type(&b->lock, want, iter->l[level].lock_seq) ||
+ (btree_node_lock_seq_matches(iter, b, level) &&
+ btree_node_lock_increment(iter, b, level, want))) {
+ mark_btree_node_locked(iter, level, want);
+ return true;
+ } else {
return false;
-
- mark_btree_node_locked(iter, level, want);
- return true;
+ }
}
static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
@@ -141,7 +147,7 @@ static bool bch2_btree_node_upgrade(struct btree_iter *iter, unsigned level)
: six_relock_type(&b->lock, SIX_LOCK_intent, iter->l[level].lock_seq))
goto success;
- if (iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1 &&
+ if (btree_node_lock_seq_matches(iter, b, level) &&
btree_node_lock_increment(iter, b, level, BTREE_NODE_INTENT_LOCKED)) {
btree_node_unlock(iter, level);
goto success;
@@ -166,6 +172,23 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
if (!(upgrade
? bch2_btree_node_upgrade(iter, l)
: bch2_btree_node_relock(iter, l))) {
+ if (upgrade)
+ trace_node_upgrade_fail(l, iter->l[l].lock_seq,
+ is_btree_node(iter, l)
+ ? 0
+ : (unsigned long) iter->l[l].b,
+ is_btree_node(iter, l)
+ ? iter->l[l].b->lock.state.seq
+ : 0);
+ else
+ trace_node_relock_fail(l, iter->l[l].lock_seq,
+ is_btree_node(iter, l)
+ ? 0
+ : (unsigned long) iter->l[l].b,
+ is_btree_node(iter, l)
+ ? iter->l[l].b->lock.state.seq
+ : 0);
+
fail_idx = l;
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
}
@@ -180,7 +203,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter,
*/
while (fail_idx >= 0) {
btree_node_unlock(iter, fail_idx);
- iter->l[fail_idx].b = BTREE_ITER_NOT_END;
+ iter->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
--fail_idx;
}
@@ -810,7 +833,7 @@ void bch2_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
trans_for_each_iter(iter->trans, linked)
if (linked->l[level].b == b) {
__btree_node_unlock(linked, level);
- linked->l[level].b = BTREE_ITER_NOT_END;
+ linked->l[level].b = BTREE_ITER_NO_NODE_DROP;
}
}
@@ -848,7 +871,8 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
* that depth
*/
iter->level = depth_want;
- iter->l[iter->level].b = NULL;
+ for (i = iter->level; i < BTREE_MAX_DEPTH; i++)
+ iter->l[i].b = NULL;
return 1;
}
@@ -861,13 +885,14 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
b->level == iter->level &&
!race_fault())) {
for (i = 0; i < iter->level; i++)
- iter->l[i].b = BTREE_ITER_NOT_END;
+ iter->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
iter->l[iter->level].b = b;
+ for (i = iter->level + 1; i < BTREE_MAX_DEPTH; i++)
+ iter->l[i].b = NULL;
mark_btree_node_locked(iter, iter->level, lock_type);
btree_iter_node_set(iter, b);
return 0;
-
}
six_unlock_type(&b->lock, lock_type);
@@ -973,7 +998,7 @@ retry_all:
if (unlikely(ret == -EIO)) {
trans->error = true;
iter->flags |= BTREE_ITER_ERROR;
- iter->l[iter->level].b = BTREE_ITER_NOT_END;
+ iter->l[iter->level].b = BTREE_ITER_NO_NODE_ERROR;
goto out;
}
@@ -1008,12 +1033,12 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
unsigned l = iter->level;
while (btree_iter_node(iter, l) &&
- !(is_btree_node(iter, l) &&
- bch2_btree_node_relock(iter, l) &&
- (!check_pos ||
- btree_iter_pos_in_node(iter, iter->l[l].b)))) {
+ (!is_btree_node(iter, l) ||
+ !bch2_btree_node_relock(iter, l) ||
+ (check_pos &&
+ !btree_iter_pos_in_node(iter, iter->l[l].b)))) {
btree_node_unlock(iter, l);
- iter->l[l].b = BTREE_ITER_NOT_END;
+ iter->l[l].b = BTREE_ITER_NO_NODE_UP;
l++;
}
@@ -1069,7 +1094,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
return 0;
iter->level = depth_want;
- iter->l[iter->level].b = BTREE_ITER_NOT_END;
+ iter->l[iter->level].b = BTREE_ITER_NO_NODE_DOWN;
return ret;
}
}
@@ -1590,7 +1615,7 @@ static inline void bch2_btree_iter_init(struct btree_trans *trans,
iter->nodes_intent_locked = 0;
for (i = 0; i < ARRAY_SIZE(iter->l); i++)
iter->l[i].b = NULL;
- iter->l[iter->level].b = BTREE_ITER_NOT_END;
+ iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
prefetch(c->btree_roots[btree_id].b);
}
@@ -1798,7 +1823,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
for (i = 0; i < ARRAY_SIZE(iter->l); i++)
iter->l[i].b = NULL;
- iter->l[iter->level].b = BTREE_ITER_NOT_END;
+ iter->l[iter->level].b = BTREE_ITER_NO_NODE_INIT;
return iter;
}
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 33d95c43bdba..4dd8b01c597e 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -17,6 +17,19 @@ static inline struct btree *btree_iter_node(struct btree_iter *iter,
return level < BTREE_MAX_DEPTH ? iter->l[level].b : NULL;
}
+static inline bool btree_node_lock_seq_matches(const struct btree_iter *iter,
+ const struct btree *b, unsigned level)
+{
+ /*
+ * We don't compare the low bits of the lock sequence numbers because
+ * @iter might have taken a write lock on @b, and we don't want to skip
+ * the linked iterator if the sequence numbers were equal before taking
+ * that write lock. The lock sequence number is incremented by taking
+ * and releasing write locks and is even when unlocked:
+ */
+ return iter->l[level].lock_seq >> 1 == b->lock.state.seq >> 1;
+}
+
static inline struct btree *btree_node_parent(struct btree_iter *iter,
struct btree *b)
{
@@ -55,30 +68,20 @@ __trans_next_iter(struct btree_trans *trans, unsigned idx)
static inline bool __iter_has_node(const struct btree_iter *iter,
const struct btree *b)
{
- /*
- * We don't compare the low bits of the lock sequence numbers because
- * @iter might have taken a write lock on @b, and we don't want to skip
- * the linked iterator if the sequence numbers were equal before taking
- * that write lock. The lock sequence number is incremented by taking
- * and releasing write locks and is even when unlocked:
- */
-
return iter->l[b->level].b == b &&
- iter->l[b->level].lock_seq >> 1 == b->lock.state.seq >> 1;
+ btree_node_lock_seq_matches(iter, b, b->level);
}
static inline struct btree_iter *
__trans_next_iter_with_node(struct btree_trans *trans, struct btree *b,
unsigned idx)
{
- EBUG_ON(idx < trans->nr_iters && trans->iters[idx].idx != idx);
+ struct btree_iter *iter = __trans_next_iter(trans, idx);
- for (; idx < trans->nr_iters; idx++)
- if ((trans->iters_linked & (1ULL << idx)) &&
- __iter_has_node(&trans->iters[idx], b))
- return &trans->iters[idx];
+ while (iter && !__iter_has_node(iter, b))
+ iter = __trans_next_iter(trans, iter->idx + 1);
- return NULL;
+ return iter;
}
#define trans_for_each_iter_with_node(_trans, _b, _iter) \
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 0873c7b301bd..1a0adaeaae46 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1586,6 +1586,7 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
* instead of locking/reserving all the way to the root:
*/
if (!bch2_btree_iter_upgrade(iter, U8_MAX)) {
+ trace_trans_restart_iter_upgrade(c, iter->trans->ip);
ret = -EINTR;
goto out;
}
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 1edcf0cffe57..07580f57985d 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -567,6 +567,8 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
update_triggers_transactional(trans, i)) {
ret = bch2_trans_mark_update(trans, i,
&trans->fs_usage_deltas);
+ if (ret == -EINTR)
+ trace_trans_restart_mark(c, trans->ip);
if (ret)
return ret;
}
@@ -714,7 +716,9 @@ int bch2_trans_commit_error(struct btree_trans *trans,
* don't care if we got ENOSPC because we told split it
* couldn't block:
*/
- if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) {
+ if (!ret ||
+ ret == -EINTR ||
+ (flags & BTREE_INSERT_NOUNLOCK)) {
trans_restart(" (split)");
trace_trans_restart_btree_node_split(c, trans->ip);
ret = -EINTR;
@@ -806,6 +810,7 @@ static int __bch2_trans_commit(struct btree_trans *trans,
if (!bch2_btree_iter_upgrade(i->iter, 1)) {
trans_restart(" (failed upgrade, locks_want %u uptodate %u)",
old_locks_want, old_uptodate);
+ trace_trans_restart_upgrade(c, trans->ip);
ret = -EINTR;
goto err;
}
diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
index 83a5f874ba80..4b96f2a5fa08 100644
--- a/include/trace/events/bcachefs.h
+++ b/include/trace/events/bcachefs.h
@@ -561,6 +561,21 @@ DEFINE_EVENT(transaction_restart, trans_restart_btree_node_split,
TP_ARGS(c, ip)
);
+DEFINE_EVENT(transaction_restart, trans_restart_mark,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_upgrade,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
+DEFINE_EVENT(transaction_restart, trans_restart_iter_upgrade,
+ TP_PROTO(struct bch_fs *c, unsigned long ip),
+ TP_ARGS(c, ip)
+);
+
DEFINE_EVENT(transaction_restart, trans_restart_traverse,
TP_PROTO(struct bch_fs *c, unsigned long ip),
TP_ARGS(c, ip)
@@ -571,6 +586,39 @@ DEFINE_EVENT(transaction_restart, trans_restart_atomic,
TP_ARGS(c, ip)
);
+DECLARE_EVENT_CLASS(node_lock_fail,
+ TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+ TP_ARGS(level, iter_seq, node, node_seq),
+
+ TP_STRUCT__entry(
+ __field(u32, level)
+ __field(u32, iter_seq)
+ __field(u32, node)
+ __field(u32, node_seq)
+ ),
+
+ TP_fast_assign(
+ __entry->level = level;
+ __entry->iter_seq = iter_seq;
+ __entry->node = node;
+ __entry->node_seq = node_seq;
+ ),
+
+ TP_printk("level %u iter seq %u node %u node seq %u",
+ __entry->level, __entry->iter_seq,
+ __entry->node, __entry->node_seq)
+);
+
+DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
+ TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+ TP_ARGS(level, iter_seq, node, node_seq)
+);
+
+DEFINE_EVENT(node_lock_fail, node_relock_fail,
+ TP_PROTO(unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
+ TP_ARGS(level, iter_seq, node, node_seq)
+);
+
#endif /* _TRACE_BCACHE_H */
/* This part must be outside protection */