summaryrefslogtreecommitdiff
path: root/drivers/md/bcache/btree_iter.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache/btree_iter.c')
-rw-r--r--drivers/md/bcache/btree_iter.c418
1 files changed, 276 insertions, 142 deletions
diff --git a/drivers/md/bcache/btree_iter.c b/drivers/md/bcache/btree_iter.c
index 2c469ad4936a..343b44439f59 100644
--- a/drivers/md/bcache/btree_iter.c
+++ b/drivers/md/bcache/btree_iter.c
@@ -12,47 +12,54 @@
#define BTREE_ITER_NOT_END ((struct btree *) 1)
-static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
+static inline bool is_btree_node(struct btree_iter_state *_iter, unsigned l)
{
- return iter->l[l].node && iter->l[l].node != BTREE_ITER_NOT_END;
+ struct btree *b = _iter->l[l].node;
+
+ return b && b != BTREE_ITER_NOT_END;
}
/* Btree node locking: */
/*
- * Updates the saved lock sequence number, so that btree_node_relock() will
+ * Updates the saved lock sequence number, so that bch_btree_node_relock() will
* succeed:
*/
void btree_node_unlock_write(struct btree *b, struct btree_iter *iter)
{
+ struct btree_iter_state *_iter = iter_s(iter);
struct btree_iter *linked;
- EBUG_ON(iter->l[b->level].node != b);
- EBUG_ON(iter->l[b->level].lock_seq + 1 != b->lock.state.seq);
+ EBUG_ON(!btree_iter_has_node(_iter, b));
+ EBUG_ON(_iter->l[b->level].lock_seq + 1 != b->lock.state.seq);
for_each_linked_btree_node(iter, b, linked)
- linked->l[b->level].lock_seq += 2;
+ iter_s(linked)->l[b->level].lock_seq += 2;
- iter->l[b->level].lock_seq += 2;
+ _iter->l[b->level].lock_seq += 2;
six_unlock_write(&b->lock);
}
void btree_node_lock_write(struct btree *b, struct btree_iter *iter)
{
+ struct btree_iter_state *_iter = iter_s(iter);
struct btree_iter *linked;
unsigned readers = 0;
- EBUG_ON(iter->l[b->level].node != b);
- EBUG_ON(iter->l[b->level].lock_seq != b->lock.state.seq);
+ EBUG_ON(!btree_iter_has_node(_iter, b));
+ EBUG_ON(_iter->l[b->level].lock_seq != b->lock.state.seq);
if (six_trylock_write(&b->lock))
return;
- for_each_linked_btree_iter(iter, linked)
- if (linked->l[b->level].node == b &&
- btree_node_read_locked(linked, b->level))
+ for_each_linked_btree_iter(iter, linked) {
+ struct btree_iter_state *_linked = iter_s(linked);
+
+ if (btree_iter_has_node(_linked, b) &&
+ btree_node_read_locked(_linked, b->level))
readers++;
+ }
if (likely(!readers)) {
six_lock_write(&b->lock);
@@ -88,27 +95,29 @@ void __btree_node_lock_write(struct btree *b, struct btree_iter *iter)
six_lock_write(&b->lock);
}
-static bool btree_lock_upgrade(struct btree_iter *iter, unsigned level)
+static bool btree_lock_upgrade(struct btree_iter *iter,
+ struct btree_iter_state *_iter,
+ unsigned level)
{
struct btree_iter *linked;
- struct btree *b = iter->l[level].node;
+ struct btree *b = _iter->l[level].node;
- if (btree_node_intent_locked(iter, level))
+ if (btree_node_intent_locked(_iter, level))
return true;
- if (!is_btree_node(iter, level))
+ if (!is_btree_node(_iter, level))
return false;
- if (btree_node_locked(iter, level)
+ if (btree_node_locked(_iter, level)
? six_trylock_convert(&b->lock, SIX_LOCK_read, SIX_LOCK_intent)
- : six_relock_intent(&b->lock, iter->l[level].lock_seq))
+ : six_relock_intent(&b->lock, _iter->l[level].lock_seq))
goto success;
for_each_linked_btree_iter(iter, linked)
- if (linked->l[level].node == b &&
- btree_node_intent_locked(linked, level) &&
- iter->l[level].lock_seq == b->lock.state.seq) {
- btree_node_unlock(iter, level);
+ if (iter_s(linked)->l[level].node == b &&
+ btree_node_intent_locked(iter_s(linked), level) &&
+ _iter->l[level].lock_seq == b->lock.state.seq) {
+ btree_node_unlock(_iter, level);
six_lock_increment(&b->lock, SIX_LOCK_intent);
goto success;
}
@@ -117,30 +126,31 @@ static bool btree_lock_upgrade(struct btree_iter *iter, unsigned level)
trace_bcache_btree_upgrade_lock_fail(b, iter);
return false;
success:
- mark_btree_node_intent_locked(iter, level);
+ mark_btree_node_intent_locked(_iter, level);
trace_bcache_btree_upgrade_lock(b, iter);
return true;
}
/* Btree iterator locking: */
-bool bch_btree_iter_upgrade(struct btree_iter *iter)
+static bool __btree_iter_upgrade(struct btree_iter *iter,
+ struct btree_iter_state *_iter)
{
int i;
for (i = iter->level;
i < min_t(int, iter->locks_want, BTREE_MAX_DEPTH);
i++)
- if (iter->l[i].node && !btree_lock_upgrade(iter, i)) {
+ if (_iter->l[i].node && !btree_lock_upgrade(iter, _iter, i)) {
do {
- btree_node_unlock(iter, i);
+ btree_node_unlock(_iter, i);
/*
- * Make sure btree_node_relock() in
+ * Make sure bch_btree_node_relock() in
* btree_iter_traverse() fails, so that we keep
* going up and get all the intent locks we need
*/
- iter->l[i].lock_seq--;
+ _iter->l[i].lock_seq--;
} while (--i >= 0);
return false;
@@ -149,40 +159,50 @@ bool bch_btree_iter_upgrade(struct btree_iter *iter)
return true;
}
-int bch_btree_iter_unlock(struct btree_iter *iter)
+bool bch_btree_iter_upgrade(struct btree_iter *iter)
+{
+ return __btree_iter_upgrade(iter, iter_s(iter));
+}
+
+void __btree_iter_unlock(struct btree_iter_state *_iter)
{
- unsigned l = 0;
+ while (_iter->nodes_locked)
+ btree_node_unlock(_iter, __ffs(_iter->nodes_locked));
+}
- while (iter->nodes_locked)
- btree_node_unlock(iter, l++);
+int bch_btree_iter_unlock(struct btree_iter *iter)
+{
+ __btree_iter_unlock(iter_s(iter));
return iter->error;
}
-bool btree_node_relock(struct btree_iter *iter, unsigned level)
+bool bch_btree_node_relock(struct btree_iter *iter,
+ struct btree_iter_state *_iter,
+ unsigned level)
{
struct btree_iter *linked;
- struct btree *b = iter->l[level].node;
+ struct btree *b = _iter->l[level].node;
enum six_lock_type type = btree_lock_want(iter, level);
- if (btree_node_locked(iter, level))
+ if (btree_node_locked(_iter, level))
return true;
if (race_fault())
return false;
- if (is_btree_node(iter, level) &&
- six_relock_type(&b->lock, iter->l[level].lock_seq, type)) {
- mark_btree_node_locked(iter, level, type);
+ if (is_btree_node(_iter, level) &&
+ six_relock_type(&b->lock, _iter->l[level].lock_seq, type)) {
+ mark_btree_node_locked(_iter, level, type);
return true;
}
for_each_linked_btree_iter(iter, linked)
- if (linked->l[level].node == b &&
- btree_node_locked_type(linked, level) == type &&
- iter->l[level].lock_seq == b->lock.state.seq) {
+ if (iter_s(linked)->l[level].node == b &&
+ btree_node_locked_type(iter_s(linked), level) == type &&
+ _iter->l[level].lock_seq == b->lock.state.seq) {
six_lock_increment(&b->lock, type);
- mark_btree_node_locked(iter, level, type);
+ mark_btree_node_locked(_iter, level, type);
return true;
}
@@ -327,9 +347,9 @@ void bch_btree_node_iter_fix(struct btree_iter *iter,
}
/* peek_all() doesn't skip deleted keys */
-static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *iter)
+static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *iter,
+ struct btree_iter_level *l)
{
- struct btree_iter_level *l = &iter->l[iter->level];
const struct bkey_format *f = &l->node->keys.format;
struct bkey_packed *k =
bch_btree_node_iter_peek_all(&l->node_iter, &l->node->keys);
@@ -346,9 +366,9 @@ static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *iter)
return ret;
}
-static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter)
+static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
+ struct btree_iter_level *l)
{
- struct btree_iter_level *l = &iter->l[iter->level];
const struct bkey_format *f = &l->node->keys.format;
struct bkey_packed *k =
bch_btree_node_iter_peek(&l->node_iter, &l->node->keys);
@@ -365,10 +385,8 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter)
return ret;
}
-static inline void __btree_iter_advance(struct btree_iter *iter)
+static inline void __btree_iter_advance(struct btree_iter_level *l)
{
- struct btree_iter_level *l = &iter->l[iter->level];
-
bch_btree_node_iter_advance(&l->node_iter, &l->node->keys);
}
@@ -381,10 +399,11 @@ static inline void btree_iter_node_iter_init(struct btree_iter *iter,
}
static inline void btree_iter_node_set(struct btree_iter *iter,
+ struct btree_iter_state *_iter,
struct btree *b,
struct bpos pos)
{
- struct btree_iter_level *l = &iter->l[b->level];
+ struct btree_iter_level *l = &_iter->l[b->level];
BUG_ON(b->lock.state.seq & 1);
@@ -416,7 +435,7 @@ bool bch_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
* the old node we're replacing has already been
* unlocked and the pointer invalidated
*/
- BUG_ON(btree_node_locked(linked, b->level));
+ EBUG_ON(btree_node_locked(iter_s(linked), b->level));
/*
* If @linked wants this node read locked, we don't want
@@ -426,15 +445,16 @@ bool bch_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
* progress...
*
* Instead, btree_iter_node_set() sets things up so
- * btree_node_relock() will succeed:
+ * bch_btree_node_relock() will succeed:
*/
if (btree_want_intent(linked, b->level)) {
six_lock_increment(&b->lock, SIX_LOCK_intent);
- mark_btree_node_intent_locked(linked, b->level);
+ mark_btree_node_intent_locked(iter_s(linked), b->level);
}
- btree_iter_node_set(linked, b, linked->pos);
+ btree_iter_node_set(linked, iter_s(linked),
+ b, linked->pos);
}
if (!btree_iter_pos_in_node(iter, b)) {
@@ -442,8 +462,8 @@ bool bch_btree_iter_node_replace(struct btree_iter *iter, struct btree *b)
return false;
}
- mark_btree_node_intent_locked(iter, b->level);
- btree_iter_node_set(iter, b, iter->pos);
+ mark_btree_node_intent_locked(iter_s(iter), b->level);
+ btree_iter_node_set(iter, iter_s(iter), b, iter->pos);
return true;
}
@@ -452,21 +472,25 @@ void bch_btree_iter_node_drop_linked(struct btree_iter *iter, struct btree *b)
struct btree_iter *linked;
unsigned level = b->level;
- for_each_linked_btree_iter(iter, linked)
- if (linked->l[level].node == b) {
- btree_node_unlock(linked, level);
- linked->l[level].node = BTREE_ITER_NOT_END;
+ for_each_linked_btree_iter(iter, linked) {
+ struct btree_iter_state *_linked = iter_s(linked);
+
+ if (_linked->l[level].node == b) {
+ btree_node_unlock(_linked, level);
+ _linked->l[level].node = BTREE_ITER_NOT_END;
}
+ }
}
void bch_btree_iter_node_drop(struct btree_iter *iter, struct btree *b)
{
+ struct btree_iter_state *_iter = iter_s(iter);
unsigned level = b->level;
- if (iter->l[level].node == b) {
+ if (_iter->l[level].node == b) {
BUG_ON(b->lock.state.intent_lock != 1);
- btree_node_unlock(iter, level);
- iter->l[level].node = BTREE_ITER_NOT_END;
+ btree_node_unlock(_iter, level);
+ _iter->l[level].node = BTREE_ITER_NOT_END;
}
}
@@ -480,15 +504,17 @@ void bch_btree_iter_reinit_node(struct btree_iter *iter, struct btree *b)
for_each_linked_btree_node(iter, b, linked)
btree_iter_node_iter_init(linked,
- &linked->l[b->level],
+ &iter_s(linked)->l[b->level],
linked->pos);
btree_iter_node_iter_init(iter,
- &iter->l[b->level],
+ &iter_s(iter)->l[b->level],
iter->pos);
}
-static void btree_iter_verify_locking(struct btree_iter *iter, unsigned level)
+static void btree_iter_verify_locking(struct btree_iter *iter,
+ struct btree_iter_state *_iter,
+ unsigned level)
{
#ifdef CONFIG_BCACHE_DEBUG
struct btree_iter *linked;
@@ -505,24 +531,27 @@ static void btree_iter_verify_locking(struct btree_iter *iter, unsigned level)
* retaking fails then return -EINTR... but, let's keep things simple
* for now:
*/
- BUG_ON(iter->nodes_locked != iter->nodes_intent_locked);
+ BUG_ON(_iter->nodes_locked != _iter->nodes_intent_locked);
for_each_linked_btree_iter(iter, linked)
- BUG_ON(linked->nodes_locked != linked->nodes_intent_locked);
+ BUG_ON(iter_s(linked)->nodes_locked !=
+ iter_s(linked)->nodes_intent_locked);
/* Lock ordering: */
for_each_linked_btree_iter(iter, linked) {
- BUG_ON(linked->nodes_locked &&
+ BUG_ON(iter_s(linked)->nodes_locked &&
btree_iter_cmp(linked, iter) > 0);
- BUG_ON(linked->nodes_locked &&
+ BUG_ON(iter_s(linked)->nodes_locked &&
linked->btree_id == iter->btree_id &&
- level > __fls(linked->nodes_locked));
+ level > __fls(iter_s(linked)->nodes_locked));
}
#endif
}
-static inline void btree_iter_lock_root(struct btree_iter *iter, struct bpos pos)
+static inline void btree_iter_lock_root(struct btree_iter *iter,
+ struct btree_iter_state *_iter,
+ struct bpos pos)
{
struct cache_set *c = iter->c;
@@ -530,42 +559,45 @@ static inline void btree_iter_lock_root(struct btree_iter *iter, struct bpos pos
struct btree *b = c->btree_roots[iter->btree_id].b;
unsigned level = READ_ONCE(b->level);
- btree_iter_verify_locking(iter, level);
+ btree_iter_verify_locking(iter, _iter, level);
if (btree_node_lock(b, iter, level,
(b != c->btree_roots[iter->btree_id].b))) {
iter->level = level;
if (level + 1 < BTREE_MAX_DEPTH)
- iter->l[level + 1].node = NULL;
- btree_iter_node_set(iter, b, pos);
+ _iter->l[level + 1].node = NULL;
+ btree_iter_node_set(iter, _iter, b, pos);
break;
}
}
}
-static inline int btree_iter_down(struct btree_iter *iter, struct bpos pos,
- struct closure *cl)
+static inline int btree_iter_down(struct btree_iter *iter,
+ struct btree_iter_state *_iter,
+ struct bpos pos, struct closure *cl)
{
struct btree *b;
- struct bkey_s_c k = __btree_iter_peek(iter);
+ struct bkey_s_c k = __btree_iter_peek(iter,
+ &_iter->l[iter->level]);
BKEY_PADDED(k) tmp;
bkey_reassemble(&tmp.k, k);
- b = bch_btree_node_get(iter, &tmp.k, iter->level - 1, cl);
+ b = bch_btree_node_get(iter, _iter, &tmp.k, iter->level - 1, cl);
if (unlikely(IS_ERR(b)))
return PTR_ERR(b);
- btree_iter_verify_locking(iter, iter->level - 1);
+ btree_iter_verify_locking(iter, _iter, iter->level - 1);
--iter->level;
- btree_iter_node_set(iter, b, pos);
+ btree_iter_node_set(iter, _iter, b, pos);
return 0;
}
-static void btree_iter_up(struct btree_iter *iter)
+static void btree_iter_up(struct btree_iter *iter,
+ struct btree_iter_state *_iter)
{
- btree_node_unlock(iter, iter->level++);
+ btree_node_unlock(_iter, iter->level++);
}
/*
@@ -578,9 +610,13 @@ static void btree_iter_up(struct btree_iter *iter)
* stashed in the iterator and returned from bch_btree_iter_unlock().
*/
static int __must_check __bch_btree_iter_traverse(struct btree_iter *iter,
- unsigned l, struct bpos pos)
+ struct btree_iter_state *_iter,
+ unsigned traverse_to,
+ struct bpos pos)
{
- if (!iter->l[iter->level].node)
+ struct btree_iter_level *l;
+
+ if (!_iter->l[iter->level].node)
return 0;
iter->at_end_of_leaf = false;
@@ -589,25 +625,25 @@ retry:
* If the current node isn't locked, go up until we have a locked node
* or run out of nodes:
*/
- while (iter->l[iter->level].node &&
- !(is_btree_node(iter, iter->level) &&
- btree_node_relock(iter, iter->level) &&
+ while (_iter->l[iter->level].node &&
+ !(is_btree_node(_iter, iter->level) &&
+ bch_btree_node_relock(iter, _iter, iter->level) &&
btree_iter_pos_cmp(pos,
- &iter->l[iter->level].node->key.k,
- iter->is_extents)))
- btree_iter_up(iter);
+ &_iter->l[iter->level].node->key.k),
+ iter->is_extents))
+ btree_iter_up(iter, _iter);
/*
* If we've got a btree node locked (i.e. we aren't about to relock the
* root) - advance its node iterator if necessary:
*/
if (iter->level < BTREE_MAX_DEPTH &&
- iter->l[iter->level].node) {
+ (l = &_iter->l[iter->level])->node) {
struct bkey_s_c k;
- while ((k = __btree_iter_peek_all(iter)).k &&
+ while ((k = __btree_iter_peek_all(iter, l)).k &&
!btree_iter_pos_cmp(pos, k.k, iter->is_extents))
- __btree_iter_advance(iter);
+ __btree_iter_advance(l);
}
/*
@@ -616,17 +652,17 @@ retry:
* here it indicates that relocking the root failed - it's critical that
* btree_iter_lock_root() comes next and that it can't fail
*/
- while (iter->level > l)
+ while (iter->level > traverse_to)
if (iter->level < BTREE_MAX_DEPTH &&
- iter->l[iter->level].node) {
+ _iter->l[iter->level].node) {
struct closure cl;
int ret;
closure_init_stack(&cl);
- ret = btree_iter_down(iter, pos, &cl);
+ ret = btree_iter_down(iter, _iter, pos, &cl);
if (unlikely(ret)) {
- bch_btree_iter_unlock(iter);
+ __btree_iter_unlock(_iter);
closure_sync(&cl);
/*
@@ -634,7 +670,7 @@ retry:
* intent locks, make sure to get them again:
*/
if (ret == -EAGAIN || ret == -EINTR) {
- bch_btree_iter_upgrade(iter);
+ __btree_iter_upgrade(iter, _iter);
goto retry;
}
@@ -643,7 +679,7 @@ retry:
return ret;
}
} else {
- btree_iter_lock_root(iter, pos);
+ btree_iter_lock_root(iter, _iter, pos);
}
return 0;
@@ -651,13 +687,15 @@ retry:
int __must_check bch_btree_iter_traverse(struct btree_iter *iter)
{
- return __bch_btree_iter_traverse(iter, iter->level, iter->pos);
+ return __bch_btree_iter_traverse(iter, iter_s(iter),
+ iter->level, iter->pos);
}
/* Iterate across nodes (leaf and interior nodes) */
struct btree *bch_btree_iter_peek_node(struct btree_iter *iter)
{
+ struct btree_iter_state *_iter = iter_s(iter);
struct btree *b;
int ret;
@@ -667,7 +705,7 @@ struct btree *bch_btree_iter_peek_node(struct btree_iter *iter)
if (ret)
return NULL;
- b = iter->l[iter->level].node;
+ b = _iter->l[iter->level].node;
EBUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0);
iter->pos = b->key.k.p;
@@ -677,15 +715,16 @@ struct btree *bch_btree_iter_peek_node(struct btree_iter *iter)
struct btree *bch_btree_iter_next_node(struct btree_iter *iter)
{
+ struct btree_iter_state *_iter = iter_s(iter);
struct btree *b;
int ret;
EBUG_ON(iter->is_extents);
- btree_iter_up(iter);
+ btree_iter_up(iter, _iter);
if (iter->level >= BTREE_MAX_DEPTH ||
- !iter->l[iter->level].node)
+ !_iter->l[iter->level].node)
return NULL;
/* parent node usually won't be locked: redo traversal if necessary */
@@ -693,16 +732,16 @@ struct btree *bch_btree_iter_next_node(struct btree_iter *iter)
if (ret)
return NULL;
- b = iter->l[iter->level].node;
+ b = _iter->l[iter->level].node;
if (bkey_cmp(iter->pos, b->key.k.p) < 0) {
struct bpos pos = bkey_successor(iter->pos);
- ret = __bch_btree_iter_traverse(iter, 0, pos);
+ ret = __bch_btree_iter_traverse(iter, iter_s(iter), 0, pos);
if (ret)
return NULL;
- b = iter->l[iter->level].node;
+ b = _iter->l[iter->level].node;
}
iter->pos = b->key.k.p;
@@ -750,7 +789,8 @@ void bch_btree_iter_advance_pos(struct btree_iter *iter)
/* XXX: expensive */
void bch_btree_iter_rewind(struct btree_iter *iter, struct bpos pos)
{
- struct btree_iter_level *l = &iter->l[iter->level];
+ struct btree_iter_state *_iter = iter_s(iter);
+ struct btree_iter_level *l = &_iter->l[iter->level];
/* incapable of rewinding across nodes: */
BUG_ON(bkey_cmp(pos, l->node->data->min_key) < 0);
@@ -760,18 +800,19 @@ void bch_btree_iter_rewind(struct btree_iter *iter, struct bpos pos)
btree_iter_node_iter_init(iter, l, pos);
}
-struct bkey_s_c bch_btree_iter_peek(struct btree_iter *iter)
+struct bkey_s_c __bch_btree_iter_peek(struct btree_iter *iter,
+ struct btree_iter_state *_iter)
{
- struct bkey_s_c k;
struct bpos pos = iter->pos;
+ struct bkey_s_c k;
int ret;
while (1) {
- ret = __bch_btree_iter_traverse(iter, 0, pos);
+ ret = __bch_btree_iter_traverse(iter, _iter, 0, pos);
if (unlikely(ret))
return bkey_s_c_null;
- k = __btree_iter_peek(iter);
+ k = __btree_iter_peek(iter, &_iter->l[0]);
if (likely(k.k)) {
/*
* iter->pos should always be equal to the key we just
@@ -783,26 +824,106 @@ struct bkey_s_c bch_btree_iter_peek(struct btree_iter *iter)
return k;
}
- pos = iter->l[0].node->key.k.p;
+ pos = btree_iter_leaf(iter)->key.k.p;
if (!bkey_cmp(pos, POS_MAX))
return (struct bkey_s_c) { NULL, NULL };
pos = btree_type_successor(iter->btree_id, pos);
}
+
}
+struct bkey_s_c bch_btree_iter_peek(struct btree_iter *iter)
+{
+ return __bch_btree_iter_peek(iter, iter_s(iter));
+}
+
+/*
+ * iter_s(iter) is the cursor for iter->pos - the key we return
+ *
+ * iter->pos.snapshot may be different than snapshot->id (but must be an
+ * ancestor)
+ *
+ * if iter->have_alternate != 0, then iter_a(iter) is the cursor for inserting a
+ * key at iter.pos, except with iter.pos.snapshot = snapshot->id
+ */
+
struct bkey_s_c bch_btree_iter_peek_snapshot(struct btree_iter *iter,
struct snapshot *snapshot)
{
+ struct btree_iter_state *_iter = iter_s(iter);
+ struct btree_iter_state *_alternate = iter_a(iter);
struct bkey_s_c k;
- do
+ if (iter->have_alternate) {
+ __btree_iter_unlock(_alternate);
+ iter->have_alternate = 0;
+ }
+
+ k = bch_btree_iter_peek(iter);
+ if (unlikely(!k.k))
+ return k;
+
+ if (likely(bch_is_snapshot_ancestor(iter->c,
+ snapshot, k.k->p.snapshot)))
+ return k;
+
+ /*
+ * keep current position locked, advance until we find a key that is an
+ * ancestor:
+ */
+ *_alternate = *_iter;
+ _alternate->nodes_locked = 0;
+ _alternate->nodes_intent_locked = 0;
+ iter->have_alternate = 1;
+
+ iter->s_idx ^= 1;
+ swap(_iter, _alternate);
+
+ while (1) {
+ bch_btree_iter_advance_pos(iter);
+
k = bch_btree_iter_peek(iter);
- while (k.k &&
- !bch_snapshot_is_descendant(iter->c, snapshot, k.k->p.snapshot));
+ if (unlikely(!k.k))
+ return k;
+
+ if (likely(bch_is_snapshot_ancestor(iter->c,
+ snapshot, k.k->p.snapshot)))
+ return k;
+ }
return k;
+#if 0
+ struct bpos pos = iter->pos;
+ struct bkey_s_c k;
+ int ret;
+
+ while (1) {
+ ret = __bch_btree_iter_traverse(iter, 0, pos);
+ if (unlikely(ret))
+ return bkey_s_c_null;
+
+ k = __btree_iter_peek(iter);
+ if (likely(k.k)) {
+ /*
+ * iter->pos should always be equal to the key we just
+ * returned - except extents can straddle iter->pos:
+ */
+ if (!iter->is_extents ||
+ bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+ bch_btree_iter_set_pos(iter, bkey_start_pos(k.k));
+ return k;
+ }
+
+ pos = btree_iter_leaf(iter)->key.k.p;
+
+ if (!bkey_cmp(pos, POS_MAX))
+ return (struct bkey_s_c) { NULL, NULL };
+
+ pos = btree_type_successor(iter->btree_id, pos);
+ }
+#endif
}
struct bkey_s_c bch_btree_iter_peek_with_holes(struct btree_iter *iter)
@@ -812,13 +933,14 @@ struct bkey_s_c bch_btree_iter_peek_with_holes(struct btree_iter *iter)
int ret;
while (1) {
- ret = __bch_btree_iter_traverse(iter, 0, iter->pos);
+ ret = __bch_btree_iter_traverse(iter, iter_s(iter),
+ 0, iter->pos);
if (unlikely(ret))
return bkey_s_c_null;
k = iter->is_extents
- ? __btree_iter_peek(iter)
- : __btree_iter_peek_all(iter);
+ ? __btree_iter_peek(iter, &iter_s(iter)->l[0])
+ : __btree_iter_peek_all(iter, &iter_s(iter)->l[0]);
recheck:
if (!k.k || bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) {
/* hole */
@@ -832,7 +954,7 @@ recheck:
}
if (!k.k)
- k.k = &iter->l[0].node->key.k;
+ k.k = &btree_iter_leaf(iter)->key.k;
bch_key_resize(&n,
min_t(u64, KEY_SIZE_MAX,
@@ -849,7 +971,7 @@ recheck:
} else if (!bkey_deleted(k.k)) {
return k;
} else {
- __btree_iter_advance(iter);
+ __btree_iter_advance(&iter_s(iter)->l[0]);
}
}
@@ -869,26 +991,30 @@ struct bkey_s_c bch_btree_iter_peek_snapshot_with_holes(struct btree_iter *iter,
int ret;
while (1) {
- ret = __bch_btree_iter_traverse(iter, 0, iter->pos);
- if (ret)
+ ret = __bch_btree_iter_traverse(iter, iter_s(iter),
+ 0, iter->pos);
+ if (unlikely(ret))
return bkey_s_c_null;
- k = __btree_iter_peek_all(iter);
+ k = __btree_iter_peek_all(iter, &iter_s(iter)->l[0]);
+
+ /* hrm */
+
recheck:
if (!k.k || bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) {
/* hole */
bkey_init(&n);
n.p = iter->pos;
- if (!k.k)
- k.k = &iter->l[0].node->key.k;
-
- if (iter->btree_id == BTREE_ID_EXTENTS) {
+ if (iter->is_extents) {
if (n.p.offset == KEY_OFFSET_MAX) {
iter->pos = bkey_successor(iter->pos);
goto recheck;
}
+ if (!k.k)
+ k.k = &btree_iter_leaf(iter)->key.k;
+
bch_key_resize(&n,
min_t(u64, KEY_SIZE_MAX,
(k.k->p.inode == n.p.inode
@@ -904,7 +1030,7 @@ recheck:
} else if (!bkey_deleted(k.k)) {
return k;
} else {
- __btree_iter_advance(iter);
+ __btree_iter_advance(&iter_s(iter)->l[0]);
}
}
@@ -920,19 +1046,25 @@ void __bch_btree_iter_init(struct btree_iter *iter, struct cache_set *c,
enum btree_id btree_id, struct bpos pos,
int locks_want)
{
- iter->level = 0;
- iter->is_extents = btree_id == BTREE_ID_EXTENTS;
- iter->nodes_locked = 0;
- iter->nodes_intent_locked = 0;
- iter->locks_want = locks_want;
- iter->btree_id = btree_id;
- iter->at_end_of_leaf = 0;
- iter->error = 0;
+ struct btree_iter_state *_iter;
+
iter->c = c;
- iter->pos = pos;
- iter->l[iter->level].node = BTREE_ITER_NOT_END;
- iter->l[iter->level + 1].node = NULL;
iter->next = iter;
+ iter->pos = pos;
+
+ iter->error = 0;
+ iter->btree_id = btree_id;
+ iter->at_end_of_leaf = 0;
+ iter->is_extents = btree_id == BTREE_ID_EXTENTS;
+ iter->locks_want = locks_want;
+ iter->level = 0;
+ iter->s_idx = 0;
+
+ _iter = iter_s(iter);
+ _iter->nodes_locked = 0;
+ _iter->nodes_intent_locked = 0;
+ _iter->l[iter->level].node = BTREE_ITER_NOT_END;
+ _iter->l[iter->level + 1].node = NULL;
}
int bch_btree_iter_unlink(struct btree_iter *iter)
@@ -975,8 +1107,10 @@ void bch_btree_iter_init_copy(struct btree_iter *dst, struct btree_iter *src)
{
*dst = *src;
dst->next = dst;
- dst->nodes_locked = 0;
- dst->nodes_intent_locked = 0;
+ dst->s[0].nodes_locked = 0;
+ dst->s[0].nodes_intent_locked = 0;
+ dst->s[1].nodes_locked = 0;
+ dst->s[1].nodes_intent_locked = 0;
bch_btree_iter_link(src, dst);
bch_btree_iter_upgrade(dst);