diff options
Diffstat (limited to 'libbcachefs/btree_iter.c')
-rw-r--r-- | libbcachefs/btree_iter.c | 122 |
1 files changed, 65 insertions, 57 deletions
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 8955555d..a28d2dd7 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -86,7 +86,7 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter) struct btree_iter *linked; unsigned readers = 0; - EBUG_ON(btree_node_read_locked(iter, b->level)); + EBUG_ON(!btree_node_intent_locked(iter, b->level)); trans_for_each_iter(iter->trans, linked) if (linked->l[b->level].b == b && @@ -496,6 +496,23 @@ static inline void __bch2_btree_iter_verify(struct btree_iter *iter, #endif +static void btree_node_iter_set_set_pos(struct btree_node_iter *iter, + struct btree *b, + struct bset_tree *t, + struct bkey_packed *k) +{ + struct btree_node_iter_set *set; + + btree_node_iter_for_each(iter, set) + if (set->end == t->end_offset) { + set->k = __btree_node_key_to_offset(b, k); + bch2_btree_node_iter_sort(iter, b); + return; + } + + bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t)); +} + static void __bch2_btree_node_iter_fix(struct btree_iter *iter, struct btree *b, struct btree_node_iter *node_iter, @@ -527,7 +544,8 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter, bch2_btree_node_iter_peek_all(node_iter, b), &iter->k); } - return; + + goto iter_current_key_not_modified; found: set->end = t->end_offset; @@ -569,60 +587,42 @@ found: bkey_disassemble(l->b, k, &iter->k); } iter_current_key_not_modified: - /* - * Interior nodes are special because iterators for interior nodes don't - * obey the usual invariants regarding the iterator position: - * - * We may have whiteouts that compare greater than the iterator - * position, and logically should be in the iterator, but that we - * skipped past to find the first live key greater than the iterator - * position. This becomes an issue when we insert a new key that is - * greater than the current iterator position, but smaller than the - * whiteouts we've already skipped past - this happens in the course of - * a btree split. - * - * We have to rewind the iterator past to before those whiteouts here, - * else bkey_node_iter_prev() is not going to work and who knows what - * else would happen. And we have to do it manually, because here we've - * already done the insert and the iterator is currently inconsistent: - * - * We've got multiple competing invariants, here - we have to be careful - * about rewinding iterators for interior nodes, because they should - * always point to the key for the child node the btree iterator points - * to. + * When a new key is added, and the node iterator now points to that + * key, the iterator might have skipped past deleted keys that should + * come after the key the iterator now points to. We have to rewind to + * before those deleted keys - otherwise bch2_btree_node_iter_prev_all() + * breaks: */ - if (b->level && new_u64s && - btree_iter_pos_cmp(iter, b, where) > 0) { + if (!bch2_btree_node_iter_end(node_iter) && + (b->level || + (iter->flags & BTREE_ITER_IS_EXTENTS))) { struct bset_tree *t; - struct bkey_packed *k; + struct bkey_packed *k, *k2, *p; + + k = bch2_btree_node_iter_peek_all(node_iter, b); for_each_bset(b, t) { - if (bch2_bkey_to_bset(b, where) == t) + bool set_pos = false; + + if (node_iter->data[0].end == t->end_offset) continue; - k = bch2_bkey_prev_all(b, t, - bch2_btree_node_iter_bset_pos(node_iter, b, t)); - if (k && - bkey_iter_cmp(b, k, where) > 0) { - struct btree_node_iter_set *set; - unsigned offset = - __btree_node_key_to_offset(b, bkey_next(k)); - - btree_node_iter_for_each(node_iter, set) - if (set->k == offset) { - set->k = __btree_node_key_to_offset(b, k); - bch2_btree_node_iter_sort(node_iter, b); - goto next_bset; - } - - bch2_btree_node_iter_push(node_iter, b, k, - btree_bkey_last(b, t)); + k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t); + + while ((p = bch2_bkey_prev_all(b, t, k2)) && + bkey_iter_cmp(b, k, p) < 0) { + k2 = p; + set_pos = true; } -next_bset: - t = t; + + if (set_pos) + btree_node_iter_set_set_pos(node_iter, + b, t, k2); } } + + bch2_btree_node_iter_verify(node_iter, b); } void bch2_btree_node_iter_fix(struct btree_iter *iter, @@ -1436,8 +1436,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter) recheck: while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k && - bkey_deleted(k.k) && - bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0) + bkey_cmp(k.k->p, iter->pos) <= 0) bch2_btree_node_iter_advance(&l->iter, l->b); /* @@ -1477,6 +1476,8 @@ recheck: EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0); EBUG_ON(bkey_deleted(k.k)); iter->uptodate = BTREE_ITER_UPTODATE; + + __bch2_btree_iter_verify(iter, l->b); return k; } @@ -1507,6 +1508,8 @@ recheck: iter->k = n; iter->uptodate = BTREE_ITER_UPTODATE; + + __bch2_btree_iter_verify(iter, l->b); return (struct bkey_s_c) { &iter->k, NULL }; } @@ -1539,19 +1542,18 @@ recheck: goto recheck; } - if (k.k && - !bkey_deleted(k.k) && - !bkey_cmp(iter->pos, k.k->p)) { - iter->uptodate = BTREE_ITER_UPTODATE; - return k; - } else { + if (!k.k || + bkey_deleted(k.k) || + bkey_cmp(iter->pos, k.k->p)) { /* hole */ bkey_init(&iter->k); iter->k.p = iter->pos; - - iter->uptodate = BTREE_ITER_UPTODATE; - return (struct bkey_s_c) { &iter->k, NULL }; + k = (struct bkey_s_c) { &iter->k, NULL }; } + + iter->uptodate = BTREE_ITER_UPTODATE; + __bch2_btree_iter_verify(iter, l->b); + return k; } struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) @@ -1779,6 +1781,12 @@ found: iter->flags &= ~(BTREE_ITER_INTENT|BTREE_ITER_PREFETCH); iter->flags |= flags & (BTREE_ITER_INTENT|BTREE_ITER_PREFETCH); + + if ((iter->flags & BTREE_ITER_INTENT) && + !bch2_btree_iter_upgrade(iter, 1)) { + trace_trans_restart_upgrade(trans->ip); + return ERR_PTR(-EINTR); + } } BUG_ON(iter->btree_id != btree_id); |