diff options
Diffstat (limited to 'libbcachefs/btree_iter.c')
-rw-r--r-- | libbcachefs/btree_iter.c | 166 |
1 files changed, 105 insertions, 61 deletions
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index c7473981..3af00e24 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -189,6 +189,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade, (upgrade ? trace_node_upgrade_fail : trace_node_relock_fail)(iter->trans->ip, trace_ip, + btree_iter_type(iter) == BTREE_ITER_CACHED, iter->btree_id, &iter->real_pos, l, iter->l[l].lock_seq, is_btree_node(iter, l) @@ -197,7 +198,6 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade, is_btree_node(iter, l) ? iter->l[l].b->c.lock.state.seq : 0); - fail_idx = l; btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); } @@ -309,7 +309,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, } if (unlikely(deadlock_iter)) { - trace_trans_restart_would_deadlock(iter->trans->ip, ip, + trace_trans_restart_would_deadlock(trans->ip, ip, trans->in_traverse_all, reason, deadlock_iter->btree_id, btree_iter_type(deadlock_iter), @@ -317,6 +317,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos, iter->btree_id, btree_iter_type(iter), &pos); + btree_trans_restart(trans); return false; } @@ -375,10 +376,44 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans) static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {} #endif +/* + * Only for btree_cache.c - only relocks intent locks + */ +bool bch2_btree_iter_relock_intent(struct btree_iter *iter) +{ + unsigned l; + + for (l = iter->level; + l < iter->locks_want && btree_iter_node(iter, l); + l++) { + if (!bch2_btree_node_relock(iter, l)) { + trace_node_relock_fail(iter->trans->ip, _RET_IP_, + btree_iter_type(iter) == BTREE_ITER_CACHED, + iter->btree_id, &iter->real_pos, + l, iter->l[l].lock_seq, + is_btree_node(iter, l) + ? 0 + : (unsigned long) iter->l[l].b, + is_btree_node(iter, l) + ? iter->l[l].b->c.lock.state.seq + : 0); + btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE); + btree_trans_restart(iter->trans); + return false; + } + } + + return true; +} + __flatten bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip) { - return btree_iter_get_locks(iter, false, trace_ip); + bool ret = btree_iter_get_locks(iter, false, trace_ip); + + if (!ret) + btree_trans_restart(iter->trans); + return ret; } bool __bch2_btree_iter_upgrade(struct btree_iter *iter, @@ -421,6 +456,8 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter, btree_iter_get_locks(linked, true, _THIS_IP_); } + if (iter->should_be_locked) + btree_trans_restart(iter->trans); return false; } @@ -459,8 +496,7 @@ void bch2_trans_downgrade(struct btree_trans *trans) /* Btree transaction locking: */ -static inline bool btree_iter_should_be_locked(struct btree_trans *trans, - struct btree_iter *iter) +static inline bool btree_iter_should_be_locked(struct btree_iter *iter) { return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) || iter->should_be_locked; @@ -470,11 +506,15 @@ bool bch2_trans_relock(struct btree_trans *trans) { struct btree_iter *iter; + if (unlikely(trans->restarted)) + return false; + trans_for_each_iter(trans, iter) - if (!bch2_btree_iter_relock(iter, _RET_IP_) && - btree_iter_should_be_locked(trans, iter)) { + if (btree_iter_should_be_locked(iter) && + !bch2_btree_iter_relock(iter, _RET_IP_)) { trace_trans_restart_relock(trans->ip, _RET_IP_, iter->btree_id, &iter->real_pos); + BUG_ON(!trans->restarted); return false; } return true; @@ -1055,11 +1095,12 @@ static int lock_root_check_fn(struct six_lock *lock, void *p) return b == *rootp ? 0 : -1; } -static inline int btree_iter_lock_root(struct btree_iter *iter, +static inline int btree_iter_lock_root(struct btree_trans *trans, + struct btree_iter *iter, unsigned depth_want, unsigned long trace_ip) { - struct bch_fs *c = iter->trans->c; + struct bch_fs *c = trans->c; struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b; enum six_lock_type lock_type; unsigned i; @@ -1087,8 +1128,11 @@ static inline int btree_iter_lock_root(struct btree_iter *iter, if (unlikely(!btree_node_lock(b, SPOS_MAX, iter->level, iter, lock_type, lock_root_check_fn, rootp, - trace_ip))) - return -EINTR; + trace_ip))) { + if (trans->restarted) + return -EINTR; + continue; + } if (likely(b == READ_ONCE(*rootp) && b->c.level == iter->level && @@ -1109,7 +1153,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter, } noinline -static void btree_iter_prefetch(struct btree_iter *iter) +static int btree_iter_prefetch(struct btree_iter *iter) { struct bch_fs *c = iter->trans->c; struct btree_iter_level *l = &iter->l[iter->level]; @@ -1120,10 +1164,11 @@ static void btree_iter_prefetch(struct btree_iter *iter) ? (iter->level > 1 ? 0 : 2) : (iter->level > 1 ? 1 : 16); bool was_locked = btree_node_locked(iter, iter->level); + int ret = 0; bch2_bkey_buf_init(&tmp); - while (nr) { + while (nr && !ret) { if (!bch2_btree_node_relock(iter, iter->level)) break; @@ -1133,14 +1178,15 @@ static void btree_iter_prefetch(struct btree_iter *iter) break; bch2_bkey_buf_unpack(&tmp, c, l->b, k); - bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id, - iter->level - 1); + ret = bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id, + iter->level - 1); } if (!was_locked) btree_node_unlock(iter, iter->level); bch2_bkey_buf_exit(&tmp, c); + return ret; } static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, @@ -1164,10 +1210,11 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, btree_node_unlock(iter, plevel); } -static __always_inline int btree_iter_down(struct btree_iter *iter, +static __always_inline int btree_iter_down(struct btree_trans *trans, + struct btree_iter *iter, unsigned long trace_ip) { - struct bch_fs *c = iter->trans->c; + struct bch_fs *c = trans->c; struct btree_iter_level *l = &iter->l[iter->level]; struct btree *b; unsigned level = iter->level - 1; @@ -1181,7 +1228,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter, bch2_bkey_buf_unpack(&tmp, c, l->b, bch2_btree_node_iter_peek(&l->iter, l->b)); - b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip); + b = bch2_btree_node_get(trans, iter, tmp.k, level, lock_type, trace_ip); ret = PTR_ERR_OR_ZERO(b); if (unlikely(ret)) goto err; @@ -1194,7 +1241,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter, btree_node_mem_ptr_set(iter, level + 1, b); if (iter->flags & BTREE_ITER_PREFETCH) - btree_iter_prefetch(iter); + ret = btree_iter_prefetch(iter); if (btree_node_read_locked(iter, level + 1)) btree_node_unlock(iter, level + 1); @@ -1215,25 +1262,19 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret, struct btree_iter *iter; u8 sorted[BTREE_ITER_MAX]; int i, nr_sorted = 0; - bool relock_fail; if (trans->in_traverse_all) return -EINTR; trans->in_traverse_all = true; retry_all: + trans->restarted = false; + nr_sorted = 0; - relock_fail = false; trans_for_each_iter(trans, iter) { - if (!bch2_btree_iter_relock(iter, _THIS_IP_)) - relock_fail = true; sorted[nr_sorted++] = iter->idx; - } - - if (!relock_fail) { - trans->in_traverse_all = false; - return 0; + iter->should_be_locked = false; } #define btree_iter_cmp_by_idx(_l, _r) \ @@ -1289,15 +1330,6 @@ retry_all: if (ret) goto retry_all; } - - if (hweight64(trans->iters_live) > 1) - ret = -EINTR; - else - trans_for_each_iter(trans, iter) - if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) { - ret = -EINTR; - break; - } out: bch2_btree_cache_cannibalize_unlock(c); @@ -1307,7 +1339,7 @@ out: return ret; } -int bch2_btree_iter_traverse_all(struct btree_trans *trans) +static int bch2_btree_iter_traverse_all(struct btree_trans *trans) { return __btree_iter_traverse_all(trans, 0, _RET_IP_); } @@ -1353,9 +1385,19 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter, static int btree_iter_traverse_one(struct btree_iter *iter, unsigned long trace_ip) { + struct btree_trans *trans = iter->trans; unsigned l, depth_want = iter->level; int ret = 0; + /* + * Ensure we obey iter->should_be_locked: if it's set, we can't unlock + * and re-traverse the iterator without a transaction restart: + */ + if (iter->should_be_locked) { + ret = bch2_btree_iter_relock(iter, trace_ip) ? 0 : -EINTR; + goto out; + } + if (btree_iter_type(iter) == BTREE_ITER_CACHED) { ret = bch2_btree_iter_traverse_cached(iter); goto out; @@ -1385,8 +1427,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter, */ while (iter->level > depth_want) { ret = btree_iter_node(iter, iter->level) - ? btree_iter_down(iter, trace_ip) - : btree_iter_lock_root(iter, depth_want, trace_ip); + ? btree_iter_down(trans, iter, trace_ip) + : btree_iter_lock_root(trans, iter, depth_want, trace_ip); if (unlikely(ret)) { if (ret == 1) { /* @@ -1414,7 +1456,9 @@ static int btree_iter_traverse_one(struct btree_iter *iter, iter->uptodate = BTREE_ITER_NEED_PEEK; out: - trace_iter_traverse(iter->trans->ip, trace_ip, + BUG_ON((ret == -EINTR) != !!trans->restarted); + trace_iter_traverse(trans->ip, trace_ip, + btree_iter_type(iter) == BTREE_ITER_CACHED, iter->btree_id, &iter->real_pos, ret); bch2_btree_iter_verify(iter); return ret; @@ -1427,8 +1471,10 @@ static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter) ret = bch2_trans_cond_resched(trans) ?: btree_iter_traverse_one(iter, _RET_IP_); - if (unlikely(ret)) + if (unlikely(ret) && hweight64(trans->iters_linked) == 1) { ret = __btree_iter_traverse_all(trans, ret, _RET_IP_); + BUG_ON(ret == -EINTR); + } return ret; } @@ -1559,6 +1605,8 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p int cmp = bpos_cmp(new_pos, iter->real_pos); unsigned l = iter->level; + EBUG_ON(iter->trans->restarted); + if (!cmp) goto out; @@ -2118,6 +2166,8 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, struct btree_iter *iter, *best = NULL; struct bpos real_pos, pos_min = POS_MIN; + EBUG_ON(trans->restarted); + if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && btree_node_type_is_extents(btree_id) && !(flags & BTREE_ITER_NOT_EXTENTS) && @@ -2282,6 +2332,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) if (old_bytes) { trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes); + btree_trans_restart(trans); return ERR_PTR(-EINTR); } } @@ -2307,33 +2358,26 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans) } /** - * bch2_trans_reset() - reset a transaction after a interrupted attempt + * bch2_trans_begin() - reset a transaction after a interrupted attempt * @trans: transaction to reset - * @flags: transaction reset flags. * * While iterating over nodes or updating nodes a attempt to lock a btree * node may return EINTR when the trylock fails. When this occurs - * bch2_trans_reset() or bch2_trans_begin() should be called and the - * transaction retried. - * - * Transaction reset flags include: - * - * - TRANS_RESET_NOUNLOCK - Do not attempt to unlock and reschedule the - * transaction. - * - TRANS_RESET_NOTRAVERSE - Do not traverse all linked iters. + * bch2_trans_begin() should be called and the transaction retried. */ -void bch2_trans_reset(struct btree_trans *trans, unsigned flags) +void bch2_trans_begin(struct btree_trans *trans) { struct btree_iter *iter; - trans_for_each_iter(trans, iter) { + trans_for_each_iter(trans, iter) iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT| BTREE_ITER_SET_POS_AFTER_COMMIT); - iter->should_be_locked = false; - } + /* + * XXX: we shouldn't be doing this if the transaction was restarted, but + * currently we still overflow transaction iterators if we do that + * */ bch2_trans_unlink_iters(trans); - trans->iters_touched &= trans->iters_live; trans->extra_journal_res = 0; @@ -2351,12 +2395,12 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags) (void *) &trans->fs_usage_deltas->memset_start); } - if (!(flags & TRANS_RESET_NOUNLOCK)) - bch2_trans_cond_resched(trans); + bch2_trans_cond_resched(trans); - if (!(flags & TRANS_RESET_NOTRAVERSE) && - trans->iters_linked) + if (trans->restarted) bch2_btree_iter_traverse_all(trans); + + trans->restarted = false; } static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c) |