summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_iter.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_iter.c')
-rw-r--r--fs/bcachefs/btree_iter.c532
1 files changed, 332 insertions, 200 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 8955555d6603..40cd87d73a4f 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -86,7 +86,7 @@ void __bch2_btree_node_lock_write(struct btree *b, struct btree_iter *iter)
struct btree_iter *linked;
unsigned readers = 0;
- EBUG_ON(btree_node_read_locked(iter, b->level));
+ EBUG_ON(!btree_node_intent_locked(iter, b->level));
trans_for_each_iter(iter->trans, linked)
if (linked->l[b->level].b == b &&
@@ -496,6 +496,23 @@ static inline void __bch2_btree_iter_verify(struct btree_iter *iter,
#endif
+static void btree_node_iter_set_set_pos(struct btree_node_iter *iter,
+ struct btree *b,
+ struct bset_tree *t,
+ struct bkey_packed *k)
+{
+ struct btree_node_iter_set *set;
+
+ btree_node_iter_for_each(iter, set)
+ if (set->end == t->end_offset) {
+ set->k = __btree_node_key_to_offset(b, k);
+ bch2_btree_node_iter_sort(iter, b);
+ return;
+ }
+
+ bch2_btree_node_iter_push(iter, b, k, btree_bkey_last(b, t));
+}
+
static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
struct btree *b,
struct btree_node_iter *node_iter,
@@ -509,6 +526,10 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
unsigned offset = __btree_node_key_to_offset(b, where);
int shift = new_u64s - clobber_u64s;
unsigned old_end = t->end_offset - shift;
+ unsigned orig_iter_pos = node_iter->data[0].k;
+ bool iter_current_key_modified =
+ orig_iter_pos >= offset &&
+ orig_iter_pos <= offset + clobber_u64s;
btree_node_iter_for_each(node_iter, set)
if (set->end == old_end)
@@ -517,17 +538,12 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
/* didn't find the bset in the iterator - might have to readd it: */
if (new_u64s &&
btree_iter_pos_cmp(iter, b, where) > 0) {
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-
bch2_btree_node_iter_push(node_iter, b, where, end);
-
- if (!b->level &&
- node_iter == &iter->l[0].iter)
- bkey_disassemble(b,
- bch2_btree_node_iter_peek_all(node_iter, b),
- &iter->k);
+ goto fixup_done;
+ } else {
+ /* Iterator is after key that changed */
+ return;
}
- return;
found:
set->end = t->end_offset;
@@ -543,85 +559,66 @@ found:
if (set->k == set->end)
bch2_btree_node_iter_set_drop(node_iter, set);
} else {
+ /* Iterator is after key that changed */
set->k = (int) set->k + shift;
- goto iter_current_key_not_modified;
+ return;
}
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-
bch2_btree_node_iter_sort(node_iter, b);
- if (!b->level && node_iter == &iter->l[0].iter) {
- /*
- * not legal to call bkey_debugcheck() here, because we're
- * called midway through the update path after update has been
- * marked but before deletes have actually happened:
- */
-#if 0
- __btree_iter_peek_all(iter, &iter->l[0], &iter->k);
-#endif
- struct btree_iter_level *l = &iter->l[0];
- struct bkey_packed *k =
- bch2_btree_node_iter_peek_all(&l->iter, l->b);
-
- if (unlikely(!k))
- iter->k.type = KEY_TYPE_deleted;
- else
- bkey_disassemble(l->b, k, &iter->k);
- }
-iter_current_key_not_modified:
+fixup_done:
+ if (node_iter->data[0].k != orig_iter_pos)
+ iter_current_key_modified = true;
/*
- * Interior nodes are special because iterators for interior nodes don't
- * obey the usual invariants regarding the iterator position:
- *
- * We may have whiteouts that compare greater than the iterator
- * position, and logically should be in the iterator, but that we
- * skipped past to find the first live key greater than the iterator
- * position. This becomes an issue when we insert a new key that is
- * greater than the current iterator position, but smaller than the
- * whiteouts we've already skipped past - this happens in the course of
- * a btree split.
- *
- * We have to rewind the iterator past to before those whiteouts here,
- * else bkey_node_iter_prev() is not going to work and who knows what
- * else would happen. And we have to do it manually, because here we've
- * already done the insert and the iterator is currently inconsistent:
- *
- * We've got multiple competing invariants, here - we have to be careful
- * about rewinding iterators for interior nodes, because they should
- * always point to the key for the child node the btree iterator points
- * to.
+ * When a new key is added, and the node iterator now points to that
+ * key, the iterator might have skipped past deleted keys that should
+ * come after the key the iterator now points to. We have to rewind to
+ * before those deleted keys - otherwise
+ * bch2_btree_node_iter_prev_all() breaks:
*/
- if (b->level && new_u64s &&
- btree_iter_pos_cmp(iter, b, where) > 0) {
+ if (!bch2_btree_node_iter_end(node_iter) &&
+ iter_current_key_modified &&
+ (b->level ||
+ (iter->flags & BTREE_ITER_IS_EXTENTS))) {
struct bset_tree *t;
- struct bkey_packed *k;
+ struct bkey_packed *k, *k2, *p;
+
+ k = bch2_btree_node_iter_peek_all(node_iter, b);
for_each_bset(b, t) {
- if (bch2_bkey_to_bset(b, where) == t)
+ bool set_pos = false;
+
+ if (node_iter->data[0].end == t->end_offset)
continue;
- k = bch2_bkey_prev_all(b, t,
- bch2_btree_node_iter_bset_pos(node_iter, b, t));
- if (k &&
- bkey_iter_cmp(b, k, where) > 0) {
- struct btree_node_iter_set *set;
- unsigned offset =
- __btree_node_key_to_offset(b, bkey_next(k));
-
- btree_node_iter_for_each(node_iter, set)
- if (set->k == offset) {
- set->k = __btree_node_key_to_offset(b, k);
- bch2_btree_node_iter_sort(node_iter, b);
- goto next_bset;
- }
-
- bch2_btree_node_iter_push(node_iter, b, k,
- btree_bkey_last(b, t));
+ k2 = bch2_btree_node_iter_bset_pos(node_iter, b, t);
+
+ while ((p = bch2_bkey_prev_all(b, t, k2)) &&
+ bkey_iter_cmp(b, k, p) < 0) {
+ k2 = p;
+ set_pos = true;
}
-next_bset:
- t = t;
+
+ if (set_pos)
+ btree_node_iter_set_set_pos(node_iter,
+ b, t, k2);
+ }
+ }
+
+ if (!b->level &&
+ node_iter == &iter->l[0].iter &&
+ iter_current_key_modified) {
+ struct bkey_packed *k =
+ bch2_btree_node_iter_peek_all(node_iter, b);
+
+ if (likely(k)) {
+ bkey_disassemble(b, k, &iter->k);
+ } else {
+ /* XXX: for extents, calculate size of hole? */
+ iter->k.type = KEY_TYPE_deleted;
}
+
+ btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
}
}
@@ -635,14 +632,18 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
struct bset_tree *t = bch2_bkey_to_bset(b, where);
struct btree_iter *linked;
- if (node_iter != &iter->l[b->level].iter)
+ if (node_iter != &iter->l[b->level].iter) {
__bch2_btree_node_iter_fix(iter, b, node_iter, t,
- where, clobber_u64s, new_u64s);
+ where, clobber_u64s, new_u64s);
+ bch2_btree_node_iter_verify(node_iter, b);
+ }
- trans_for_each_iter_with_node(iter->trans, b, linked)
+ trans_for_each_iter_with_node(iter->trans, b, linked) {
__bch2_btree_node_iter_fix(linked, b,
- &linked->l[b->level].iter, t,
- where, clobber_u64s, new_u64s);
+ &linked->l[b->level].iter, t,
+ where, clobber_u64s, new_u64s);
+ __bch2_btree_iter_verify(linked, b);
+ }
}
static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
@@ -685,6 +686,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
bch2_btree_node_iter_peek(&l->iter, l->b));
}
+static inline struct bkey_s_c __btree_iter_prev(struct btree_iter *iter,
+ struct btree_iter_level *l)
+{
+ return __btree_iter_unpack(iter, l, &iter->k,
+ bch2_btree_node_iter_prev(&l->iter, l->b));
+}
+
static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
struct btree_iter_level *l,
int max_advance)
@@ -743,18 +751,29 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
btree_node_unlock(iter, b->level + 1);
}
+static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
+ struct btree *b)
+{
+ return bkey_cmp(iter->pos, b->data->min_key) < 0;
+}
+
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
struct btree *b)
{
- return __btree_iter_pos_cmp(iter, NULL,
- bkey_to_packed(&b->key), true) < 0;
+ int cmp = bkey_cmp(b->key.k.p, iter->pos);
+
+ if (!cmp &&
+ (iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ bkey_cmp(b->key.k.p, POS_MAX))
+ cmp = -1;
+ return cmp < 0;
}
static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
struct btree *b)
{
return iter->btree_id == b->btree_id &&
- bkey_cmp(iter->pos, b->data->min_key) >= 0 &&
+ !btree_iter_pos_before_node(iter, b) &&
!btree_iter_pos_after_node(iter, b);
}
@@ -956,10 +975,10 @@ static void btree_iter_up(struct btree_iter *iter)
btree_node_unlock(iter, iter->level++);
}
-int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
+static int btree_iter_traverse_one(struct btree_iter *);
static int __btree_iter_traverse_all(struct btree_trans *trans,
- struct btree_iter *orig_iter, int ret)
+ struct btree_iter *orig_iter, int ret)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter;
@@ -1003,7 +1022,7 @@ retry_all:
iter = &trans->iters[sorted[i]];
do {
- ret = __bch2_btree_iter_traverse(iter);
+ ret = btree_iter_traverse_one(iter);
} while (ret == -EINTR);
if (ret)
@@ -1021,16 +1040,27 @@ int bch2_btree_iter_traverse_all(struct btree_trans *trans)
return __btree_iter_traverse_all(trans, NULL, 0);
}
-static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
- bool check_pos)
+static inline bool btree_iter_good_node(struct btree_iter *iter,
+ unsigned l, int check_pos)
+{
+ if (!is_btree_node(iter, l) ||
+ !bch2_btree_node_relock(iter, l))
+ return false;
+
+ if (check_pos <= 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
+ return false;
+ if (check_pos >= 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
+ return false;
+ return true;
+}
+
+static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
+ int check_pos)
{
unsigned l = iter->level;
while (btree_iter_node(iter, l) &&
- (!is_btree_node(iter, l) ||
- !bch2_btree_node_relock(iter, l) ||
- (check_pos &&
- !btree_iter_pos_in_node(iter, iter->l[l].b)))) {
+ !btree_iter_good_node(iter, l, check_pos)) {
btree_node_unlock(iter, l);
iter->l[l].b = BTREE_ITER_NO_NODE_UP;
l++;
@@ -1048,7 +1078,7 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
* On error, caller (peek_node()/peek_key()) must return NULL; the error is
* stashed in the iterator and returned from bch2_trans_exit().
*/
-int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
+static int btree_iter_traverse_one(struct btree_iter *iter)
{
unsigned depth_want = iter->level;
@@ -1062,7 +1092,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
* XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
* here unnecessary
*/
- iter->level = btree_iter_up_until_locked(iter, true);
+ iter->level = btree_iter_up_until_good_node(iter, 0);
/*
* If we've got a btree node locked (i.e. we aren't about to relock the
@@ -1070,8 +1100,11 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
*
* XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary
*/
- if (btree_iter_node(iter, iter->level))
+ if (btree_iter_node(iter, iter->level)) {
+ BUG_ON(!btree_iter_pos_in_node(iter, iter->l[iter->level].b));
+
btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1);
+ }
/*
* Note: iter->nodes[iter->level] may be temporarily NULL here - that
@@ -1100,12 +1133,12 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
return 0;
}
-int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
{
int ret;
ret = bch2_trans_cond_resched(iter->trans) ?:
- __bch2_btree_iter_traverse(iter);
+ btree_iter_traverse_one(iter);
if (unlikely(ret))
ret = __btree_iter_traverse_all(iter->trans, iter, ret);
@@ -1234,19 +1267,11 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
}
-void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
+static unsigned btree_iter_pos_changed(struct btree_iter *iter, int cmp)
{
- int cmp = bkey_cmp(new_pos, iter->pos);
- unsigned level;
-
- if (!cmp)
- return;
+ unsigned l = btree_iter_up_until_good_node(iter, cmp);
- iter->pos = new_pos;
-
- level = btree_iter_up_until_locked(iter, true);
-
- if (btree_iter_node(iter, level)) {
+ if (btree_iter_node(iter, l)) {
/*
* We might have to skip over many keys, or just a few: try
* advancing the node iterator, and if we have to skip over too
@@ -1254,37 +1279,98 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
* is expensive).
*/
if (cmp < 0 ||
- !btree_iter_advance_to_pos(iter, &iter->l[level], 8))
- __btree_iter_init(iter, level);
+ !btree_iter_advance_to_pos(iter, &iter->l[l], 8))
+ __btree_iter_init(iter, l);
/* Don't leave it locked if we're not supposed to: */
- if (btree_lock_want(iter, level) == BTREE_NODE_UNLOCKED)
- btree_node_unlock(iter, level);
+ if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
+ btree_node_unlock(iter, l);
}
- if (level != iter->level)
+ return l;
+}
+
+void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
+{
+ int cmp = bkey_cmp(new_pos, iter->pos);
+ unsigned l;
+
+ if (!cmp)
+ return;
+
+ iter->pos = new_pos;
+
+ l = btree_iter_pos_changed(iter, cmp);
+
+ if (l != iter->level)
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
else
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
}
+static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
+{
+ struct btree_iter_level *l = &iter->l[0];
+
+ iter->pos = l->b->key.k.p;
+ iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
+
+ if (!bkey_cmp(iter->pos, POS_MAX)) {
+ bkey_init(&iter->k);
+ iter->k.p = POS_MAX;
+ return false;
+ }
+
+ iter->pos = btree_type_successor(iter->btree_id, iter->pos);
+ btree_iter_pos_changed(iter, 1);
+ return true;
+}
+
+static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
+{
+ struct btree_iter_level *l = &iter->l[0];
+
+ iter->pos = l->b->data->min_key;
+ iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
+
+ if (!bkey_cmp(iter->pos, POS_MIN)) {
+ bkey_init(&iter->k);
+ iter->k.p = POS_MIN;
+ return false;
+ }
+
+ iter->pos = btree_type_predecessor(iter->btree_id, iter->pos);
+ btree_iter_pos_changed(iter, -1);
+ return true;
+}
+
static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct bkey_s_c ret = { .k = &iter->k };
if (!bkey_deleted(&iter->k)) {
- EBUG_ON(bch2_btree_node_iter_end(&l->iter));
- ret.v = bkeyp_val(&l->b->format,
- __bch2_btree_node_iter_peek_all(&l->iter, l->b));
+ struct bkey_packed *_k =
+ __bch2_btree_node_iter_peek_all(&l->iter, l->b);
+
+ ret.v = bkeyp_val(&l->b->format, _k);
+
+ if (debug_check_iterators(iter->trans->c)) {
+ struct bkey k = bkey_unpack_key(l->b, _k);
+ BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
+ }
+
+ if (debug_check_bkeys(iter->trans->c))
+ bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
}
- if (debug_check_bkeys(iter->trans->c) &&
- !bkey_deleted(ret.k))
- bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
return ret;
}
+/**
+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
+ * current position
+ */
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
@@ -1297,24 +1383,16 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
return btree_iter_peek_uptodate(iter);
while (1) {
- if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
- ret = bch2_btree_iter_traverse(iter);
- if (unlikely(ret))
- return bkey_s_c_err(ret);
- }
+ ret = bch2_btree_iter_traverse(iter);
+ if (unlikely(ret))
+ return bkey_s_c_err(ret);
k = __btree_iter_peek(iter, l);
if (likely(k.k))
break;
- /* got to the end of the leaf, iterator needs to be traversed: */
- iter->pos = l->b->key.k.p;
- iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
-
- if (!bkey_cmp(iter->pos, POS_MAX))
+ if (!btree_iter_set_pos_to_next_leaf(iter))
return bkey_s_c_null;
-
- iter->pos = btree_type_successor(iter->btree_id, iter->pos);
}
/*
@@ -1329,22 +1407,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
return k;
}
-static noinline
-struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter)
-{
- struct btree_iter_level *l = &iter->l[0];
-
- iter->pos = l->b->key.k.p;
- iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
-
- if (!bkey_cmp(iter->pos, POS_MAX))
- return bkey_s_c_null;
-
- iter->pos = btree_type_successor(iter->btree_id, iter->pos);
-
- return bch2_btree_iter_peek(iter);
-}
-
+/**
+ * bch2_btree_iter_next: returns first key greater than iterator's current
+ * position
+ */
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
@@ -1353,15 +1419,19 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
- iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
-
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
+ if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
+ return bkey_s_c_null;
+
/*
* XXX: when we just need to relock we should be able to avoid
* calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
* for that to work
*/
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+ iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
+
+ bch2_btree_iter_set_pos(iter,
+ btree_type_successor(iter->btree_id, iter->k.p));
return bch2_btree_iter_peek(iter);
}
@@ -1369,9 +1439,12 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
do {
bch2_btree_node_iter_advance(&l->iter, l->b);
p = bch2_btree_node_iter_peek_all(&l->iter, l->b);
- if (unlikely(!p))
- return bch2_btree_iter_peek_next_leaf(iter);
- } while (bkey_whiteout(p));
+ } while (likely(p) && bkey_whiteout(p));
+
+ if (unlikely(!p))
+ return btree_iter_set_pos_to_next_leaf(iter)
+ ? bch2_btree_iter_peek(iter)
+ : bkey_s_c_null;
k = __btree_iter_unpack(iter, l, &iter->k, p);
@@ -1380,51 +1453,79 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
return k;
}
-struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
+/**
+ * bch2_btree_iter_peek_prev: returns first key less than or equal to
+ * iterator's current position
+ */
+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
- struct bkey_packed *p;
struct bkey_s_c k;
int ret;
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
- if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
- k = bch2_btree_iter_peek(iter);
- if (IS_ERR(k.k))
- return k;
- }
+ if (iter->uptodate == BTREE_ITER_UPTODATE)
+ return btree_iter_peek_uptodate(iter);
while (1) {
- p = bch2_btree_node_iter_prev(&l->iter, l->b);
- if (likely(p))
- break;
-
- iter->pos = l->b->data->min_key;
- if (!bkey_cmp(iter->pos, POS_MIN))
- return bkey_s_c_null;
-
- bch2_btree_iter_set_pos(iter,
- btree_type_predecessor(iter->btree_id, iter->pos));
-
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
- p = bch2_btree_node_iter_peek(&l->iter, l->b);
- if (p)
+ k = __btree_iter_peek(iter, l);
+ if (!k.k ||
+ bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+ k = __btree_iter_prev(iter, l);
+
+ if (likely(k.k))
break;
- }
- k = __btree_iter_unpack(iter, l, &iter->k, p);
+ if (!btree_iter_set_pos_to_prev_leaf(iter))
+ return bkey_s_c_null;
+ }
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
-
iter->pos = bkey_start_pos(k.k);
iter->uptodate = BTREE_ITER_UPTODATE;
return k;
}
+/**
+ * bch2_btree_iter_prev: returns first key less than iterator's current
+ * position
+ */
+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
+{
+ struct btree_iter_level *l = &iter->l[0];
+ struct bkey_s_c k;
+
+ bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
+
+ if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
+ /*
+ * XXX: when we just need to relock we should be able to avoid
+ * calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
+ * for that to work
+ */
+ iter->pos = btree_type_predecessor(iter->btree_id,
+ iter->pos);
+ iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
+
+ return bch2_btree_iter_peek_prev(iter);
+ }
+
+ k = __btree_iter_prev(iter, l);
+ if (unlikely(!k.k))
+ return btree_iter_set_pos_to_prev_leaf(iter)
+ ? bch2_btree_iter_peek(iter)
+ : bkey_s_c_null;
+
+ EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0);
+ iter->pos = bkey_start_pos(k.k);
+ return k;
+}
+
static inline struct bkey_s_c
__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
{
@@ -1436,8 +1537,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
recheck:
while ((k = __btree_iter_peek_all(iter, l, &iter->k)).k &&
- bkey_deleted(k.k) &&
- bkey_cmp(bkey_start_pos(k.k), iter->pos) == 0)
+ bkey_cmp(k.k->p, iter->pos) <= 0)
bch2_btree_node_iter_advance(&l->iter, l->b);
/*
@@ -1477,6 +1577,8 @@ recheck:
EBUG_ON(bkey_cmp(k.k->p, iter->pos) < 0);
EBUG_ON(bkey_deleted(k.k));
iter->uptodate = BTREE_ITER_UPTODATE;
+
+ __bch2_btree_iter_verify(iter, l->b);
return k;
}
@@ -1507,6 +1609,8 @@ recheck:
iter->k = n;
iter->uptodate = BTREE_ITER_UPTODATE;
+
+ __bch2_btree_iter_verify(iter, l->b);
return (struct bkey_s_c) { &iter->k, NULL };
}
@@ -1539,19 +1643,18 @@ recheck:
goto recheck;
}
- if (k.k &&
- !bkey_deleted(k.k) &&
- !bkey_cmp(iter->pos, k.k->p)) {
- iter->uptodate = BTREE_ITER_UPTODATE;
- return k;
- } else {
+ if (!k.k ||
+ bkey_deleted(k.k) ||
+ bkey_cmp(iter->pos, k.k->p)) {
/* hole */
bkey_init(&iter->k);
iter->k.p = iter->pos;
-
- iter->uptodate = BTREE_ITER_UPTODATE;
- return (struct bkey_s_c) { &iter->k, NULL };
+ k = (struct bkey_s_c) { &iter->k, NULL };
}
+
+ iter->uptodate = BTREE_ITER_UPTODATE;
+ __bch2_btree_iter_verify(iter, l->b);
+ return k;
}
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
@@ -1563,11 +1666,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (iter->uptodate == BTREE_ITER_UPTODATE)
return btree_iter_peek_uptodate(iter);
- if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
- ret = bch2_btree_iter_traverse(iter);
- if (unlikely(ret))
- return bkey_s_c_err(ret);
- }
+ ret = bch2_btree_iter_traverse(iter);
+ if (unlikely(ret))
+ return bkey_s_c_err(ret);
return __bch2_btree_iter_peek_slot(iter);
}
@@ -1669,7 +1770,10 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
static int bch2_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size)
{
- void *new_iters, *new_updates;
+ void *new_iters, *new_updates, *new_sorted;
+ size_t iters_bytes;
+ size_t updates_bytes;
+ size_t sorted_bytes;
new_size = roundup_pow_of_two(new_size);
@@ -1682,9 +1786,13 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
bch2_trans_unlock(trans);
- new_iters = kmalloc(sizeof(struct btree_iter) * new_size +
- sizeof(struct btree_insert_entry) * (new_size + 4),
- GFP_NOFS);
+ iters_bytes = sizeof(struct btree_iter) * new_size;
+ updates_bytes = sizeof(struct btree_insert_entry) * (new_size + 4);
+ sorted_bytes = sizeof(u8) * (new_size + 4);
+
+ new_iters = kmalloc(iters_bytes +
+ updates_bytes +
+ sorted_bytes, GFP_NOFS);
if (new_iters)
goto success;
@@ -1693,7 +1801,8 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
trans->used_mempool = true;
success:
- new_updates = new_iters + sizeof(struct btree_iter) * new_size;
+ new_updates = new_iters + iters_bytes;
+ new_sorted = new_updates + updates_bytes;
memcpy(new_iters, trans->iters,
sizeof(struct btree_iter) * trans->nr_iters);
@@ -1708,9 +1817,10 @@ success:
if (trans->iters != trans->iters_onstack)
kfree(trans->iters);
- trans->iters = new_iters;
- trans->updates = new_updates;
- trans->size = new_size;
+ trans->iters = new_iters;
+ trans->updates = new_updates;
+ trans->updates_sorted = new_sorted;
+ trans->size = new_size;
if (trans->iters_live) {
trace_trans_restart_iters_realloced(trans->ip, trans->size);
@@ -1779,6 +1889,12 @@ found:
iter->flags &= ~(BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
iter->flags |= flags & (BTREE_ITER_INTENT|BTREE_ITER_PREFETCH);
+
+ if ((iter->flags & BTREE_ITER_INTENT) &&
+ !bch2_btree_iter_upgrade(iter, 1)) {
+ trace_trans_restart_upgrade(trans->ip);
+ return ERR_PTR(-EINTR);
+ }
}
BUG_ON(iter->btree_id != btree_id);
@@ -1949,6 +2065,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
trans->size = ARRAY_SIZE(trans->iters_onstack);
trans->iters = trans->iters_onstack;
trans->updates = trans->updates_onstack;
+ trans->updates_sorted = trans->updates_sorted_onstack;
trans->fs_usage_deltas = NULL;
if (expected_nr_iters > trans->size)
@@ -1973,3 +2090,18 @@ int bch2_trans_exit(struct btree_trans *trans)
return trans->error ? -EIO : 0;
}
+
+void bch2_fs_btree_iter_exit(struct bch_fs *c)
+{
+ mempool_exit(&c->btree_iters_pool);
+}
+
+int bch2_fs_btree_iter_init(struct bch_fs *c)
+{
+ unsigned nr = BTREE_ITER_MAX;
+
+ return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
+ sizeof(struct btree_iter) * nr +
+ sizeof(struct btree_insert_entry) * (nr + 4) +
+ sizeof(u8) * (nr + 4));
+}