summaryrefslogtreecommitdiff
path: root/fs/bcachefs/btree_journal_iter.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/btree_journal_iter.c')
-rw-r--r--fs/bcachefs/btree_journal_iter.c209
1 files changed, 121 insertions, 88 deletions
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 24f2fbe84ad7..a6f344faf751 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -46,21 +46,22 @@ static size_t __bch2_journal_key_search(struct journal_keys *keys,
enum btree_id id, unsigned level,
struct bpos pos)
{
+ struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys);
size_t l = 0, r = keys->nr, m;
while (l < r) {
m = l + ((r - l) >> 1);
- if (__journal_key_cmp(id, level, pos, idx_to_key(keys, m)) > 0)
+ if (__journal_key_cmp(c, id, level, pos, idx_to_key(keys, m)) > 0)
l = m + 1;
else
r = m;
}
BUG_ON(l < keys->nr &&
- __journal_key_cmp(id, level, pos, idx_to_key(keys, l)) > 0);
+ __journal_key_cmp(c, id, level, pos, idx_to_key(keys, l)) > 0);
BUG_ON(l &&
- __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0);
+ __journal_key_cmp(c, id, level, pos, idx_to_key(keys, l - 1)) <= 0);
return l;
}
@@ -72,10 +73,20 @@ static size_t bch2_journal_key_search(struct journal_keys *keys,
return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
}
+static inline struct journal_key_range_overwritten *__overwrite_range(struct journal_keys *keys, u32 idx)
+{
+ return idx ? keys->overwrites.data + idx : NULL;
+}
+
+static inline struct journal_key_range_overwritten *overwrite_range(struct journal_keys *keys, u32 idx)
+{
+ return idx ? rcu_dereference(keys->overwrites.data) + idx : NULL;
+}
+
/* Returns first non-overwritten key >= search key: */
-struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id,
- unsigned level, struct bpos pos,
- struct bpos end_pos, size_t *idx)
+const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id,
+ unsigned level, struct bpos pos,
+ struct bpos end_pos, size_t *idx)
{
struct journal_keys *keys = &c->journal_keys;
unsigned iters = 0;
@@ -87,7 +98,7 @@ search:
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
while (*idx &&
- __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
+ __journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) {
--(*idx);
iters++;
if (iters == 10) {
@@ -96,23 +107,23 @@ search:
}
}
- struct bkey_i *ret = NULL;
+ const struct bkey_i *ret = NULL;
rcu_read_lock(); /* for overwritten_ranges */
while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
- if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
+ if (__journal_key_cmp(c, btree_id, level, end_pos, k) < 0)
break;
if (k->overwritten) {
if (k->overwritten_range)
- *idx = rcu_dereference(k->overwritten_range)->end;
+ *idx = overwrite_range(keys, k->overwritten_range)->end;
else
*idx += 1;
continue;
}
- if (__journal_key_cmp(btree_id, level, pos, k) <= 0) {
- ret = k->k;
+ if (__journal_key_cmp(c, btree_id, level, pos, k) <= 0) {
+ ret = journal_key_k(c, k);
break;
}
@@ -129,9 +140,9 @@ search:
return ret;
}
-struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id,
- unsigned level, struct bpos pos,
- struct bpos end_pos, size_t *idx)
+const struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id,
+ unsigned level, struct bpos pos,
+ struct bpos end_pos, size_t *idx)
{
struct journal_keys *keys = &c->journal_keys;
unsigned iters = 0;
@@ -146,7 +157,7 @@ search:
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
while (*idx < keys->nr &&
- __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) {
+ __journal_key_cmp(c, btree_id, level, end_pos, idx_to_key(keys, *idx)) >= 0) {
(*idx)++;
iters++;
if (iters == 10) {
@@ -158,25 +169,25 @@ search:
if (*idx == keys->nr)
--(*idx);
- struct bkey_i *ret = NULL;
+ const struct bkey_i *ret = NULL;
rcu_read_lock(); /* for overwritten_ranges */
while (true) {
k = idx_to_key(keys, *idx);
- if (__journal_key_cmp(btree_id, level, end_pos, k) > 0)
+ if (__journal_key_cmp(c, btree_id, level, end_pos, k) > 0)
break;
if (k->overwritten) {
if (k->overwritten_range)
- *idx = rcu_dereference(k->overwritten_range)->start;
+ *idx = overwrite_range(keys, k->overwritten_range)->start;
if (!*idx)
break;
--(*idx);
continue;
}
- if (__journal_key_cmp(btree_id, level, pos, k) >= 0) {
- ret = k->k;
+ if (__journal_key_cmp(c, btree_id, level, pos, k) >= 0) {
+ ret = journal_key_k(c, k);
break;
}
@@ -194,8 +205,8 @@ search:
return ret;
}
-struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id,
- unsigned level, struct bpos pos)
+const struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id,
+ unsigned level, struct bpos pos)
{
size_t idx = 0;
@@ -264,13 +275,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
struct journal_key n = {
.btree_id = id,
.level = level,
- .k = k,
.allocated = true,
- /*
- * Ensure these keys are done last by journal replay, to unblock
- * journal reclaim:
- */
- .journal_seq = U64_MAX,
+ .allocated_k = k,
};
struct journal_keys *keys = &c->journal_keys;
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
@@ -278,8 +284,8 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
BUG_ON(test_bit(BCH_FS_rw, &c->flags));
if (idx < keys->size &&
- journal_key_cmp(&n, &keys->data[idx]) == 0) {
- struct bkey_i *o = keys->data[idx].k;
+ journal_key_cmp(c, &n, &keys->data[idx]) == 0) {
+ struct bkey_i *o = journal_key_k(c, &keys->data[idx]);
if (k->k.type == KEY_TYPE_accounting &&
o->k.type == KEY_TYPE_accounting) {
@@ -291,7 +297,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
}
if (keys->data[idx].allocated)
- kfree(keys->data[idx].k);
+ kfree(keys->data[idx].allocated_k);
keys->data[idx] = n;
return 0;
}
@@ -376,17 +382,20 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree,
unsigned level, struct bpos pos)
{
- struct journal_keys *keys = &trans->c->journal_keys;
+ if (!trans->journal_replay_not_finished)
+ return false;
+
+ struct bch_fs *c = trans->c;
+ struct journal_keys *keys = &c->journal_keys;
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
- if (!trans->journal_replay_not_finished)
+ if (idx >= keys->size ||
+ keys->data[idx].btree_id != btree ||
+ keys->data[idx].level != level)
return false;
- return (idx < keys->size &&
- keys->data[idx].btree_id == btree &&
- keys->data[idx].level == level &&
- bpos_eq(keys->data[idx].k->k.p, pos) &&
- bkey_deleted(&keys->data[idx].k->k));
+ struct bkey_i *k = journal_key_k(c, &keys->data[idx]);
+ return bpos_eq(k->k.p, pos) && bkey_deleted(&k->k);
}
static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos)
@@ -403,9 +412,9 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos
bool next_overwritten = next && next->overwritten;
struct journal_key_range_overwritten *prev_range =
- prev_overwritten ? prev->overwritten_range : NULL;
+ prev_overwritten ? __overwrite_range(keys, prev->overwritten_range) : NULL;
struct journal_key_range_overwritten *next_range =
- next_overwritten ? next->overwritten_range : NULL;
+ next_overwritten ? __overwrite_range(keys, next->overwritten_range) : NULL;
BUG_ON(prev_range && prev_range->end != idx);
BUG_ON(next_range && next_range->start != idx + 1);
@@ -413,37 +422,47 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos
if (prev_range && next_range) {
prev_range->end = next_range->end;
- keys->data[pos].overwritten_range = prev_range;
+ keys->data[pos].overwritten_range = prev->overwritten_range;
+
+ u32 old = next->overwritten_range;
+
for (size_t i = next_range->start; i < next_range->end; i++) {
struct journal_key *ip = keys->data + idx_to_pos(keys, i);
- BUG_ON(ip->overwritten_range != next_range);
- ip->overwritten_range = prev_range;
+ BUG_ON(ip->overwritten_range != old);
+ ip->overwritten_range = prev->overwritten_range;
}
-
- kfree_rcu_mightsleep(next_range);
} else if (prev_range) {
prev_range->end++;
- k->overwritten_range = prev_range;
+ k->overwritten_range = prev->overwritten_range;
if (next_overwritten) {
prev_range->end++;
- next->overwritten_range = prev_range;
+ next->overwritten_range = prev->overwritten_range;
}
} else if (next_range) {
next_range->start--;
- k->overwritten_range = next_range;
+ k->overwritten_range = next->overwritten_range;
if (prev_overwritten) {
next_range->start--;
- prev->overwritten_range = next_range;
+ prev->overwritten_range = next->overwritten_range;
}
} else if (prev_overwritten || next_overwritten) {
- struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL);
- if (!r)
+ /* 0 is a sentinel value */
+ if (darray_resize_rcu(&keys->overwrites, max(keys->overwrites.nr + 1, 2)))
return;
- r->start = idx - (size_t) prev_overwritten;
- r->end = idx + 1 + (size_t) next_overwritten;
+ if (!keys->overwrites.nr)
+ darray_push(&keys->overwrites, (struct journal_key_range_overwritten) {});
+
+ darray_push(&keys->overwrites, ((struct journal_key_range_overwritten) {
+ .start = idx - (size_t) prev_overwritten,
+ .end = idx + 1 + (size_t) next_overwritten,
+ }));
+
+ smp_wmb();
+ u32 r = keys->overwrites.nr - 1;
+
+ k->overwritten_range = r;
- rcu_assign_pointer(k->overwritten_range, r);
if (prev_overwritten)
prev->overwritten_range = r;
if (next_overwritten)
@@ -457,11 +476,15 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
struct journal_keys *keys = &c->journal_keys;
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
- if (idx < keys->size &&
- keys->data[idx].btree_id == btree &&
- keys->data[idx].level == level &&
- bpos_eq(keys->data[idx].k->k.p, pos) &&
- !keys->data[idx].overwritten) {
+ if (idx >= keys->size ||
+ keys->data[idx].btree_id != btree ||
+ keys->data[idx].level != level ||
+ keys->data[idx].overwritten)
+ return;
+
+ struct bkey_i *k = journal_key_k(c, &keys->data[idx]);
+
+ if (bpos_eq(k->k.p, pos)) {
guard(mutex)(&keys->overwrite_lock);
__bch2_journal_key_overwritten(keys, idx);
}
@@ -476,7 +499,7 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
}
}
-static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
+static struct bkey_s_c bch2_journal_iter_peek(struct bch_fs *c, struct journal_iter *iter)
{
journal_iter_verify(iter);
@@ -490,10 +513,10 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
BUG_ON(cmp);
if (!k->overwritten)
- return bkey_i_to_s_c(k->k);
+ return bkey_i_to_s_c(journal_key_k(c, k));
if (k->overwritten_range)
- iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end);
+ iter->idx = idx_to_pos(iter->keys, overwrite_range(iter->keys, k->overwritten_range)->end);
else
bch2_journal_iter_advance(iter);
}
@@ -554,7 +577,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
while (nr--) {
bch2_btree_and_journal_iter_advance(&iter);
- struct bkey_s_c k = bch2_btree_and_journal_iter_peek(&iter);
+ struct bkey_s_c k = bch2_btree_and_journal_iter_peek(c, &iter);
if (!k.k)
break;
@@ -565,7 +588,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
bch2_bkey_buf_exit(&tmp, c);
}
-struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
+struct bkey_s_c bch2_btree_and_journal_iter_peek(struct bch_fs *c, struct btree_and_journal_iter *iter)
{
struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret;
size_t iters = 0;
@@ -586,7 +609,7 @@ again:
bch2_journal_iter_advance_btree(iter);
if (iter->trans->journal_replay_not_finished)
- while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
+ while ((journal_k = bch2_journal_iter_peek(c, &iter->journal)).k &&
bpos_lt(journal_k.k->p, iter->pos))
bch2_journal_iter_advance(&iter->journal);
@@ -658,15 +681,22 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
/*
* When keys compare equal, oldest compares first:
*/
-static int journal_sort_key_cmp(const void *_l, const void *_r)
+static int journal_sort_key_cmp(const void *_l, const void *_r, const void *priv)
{
+ struct bch_fs *c = (void *) priv;
const struct journal_key *l = _l;
const struct journal_key *r = _r;
int rewind = l->rewind && r->rewind ? -1 : 1;
- return journal_key_cmp(l, r) ?:
- ((cmp_int(l->journal_seq, r->journal_seq) ?:
- cmp_int(l->journal_offset, r->journal_offset)) * rewind);
+ int cmp = journal_key_cmp(c, l, r);
+ if (cmp)
+ return cmp;
+
+ if (l->allocated || r->allocated)
+ return cmp_int(l->allocated, r->allocated);
+
+ return ((cmp_int(l->journal_seq_offset, r->journal_seq_offset) ?:
+ cmp_int(l->journal_offset, r->journal_offset)) * rewind);
}
void bch2_journal_keys_put(struct bch_fs *c)
@@ -680,20 +710,16 @@ void bch2_journal_keys_put(struct bch_fs *c)
move_gap(keys, keys->nr);
- darray_for_each(*keys, i) {
- if (i->overwritten_range &&
- (i == &darray_last(*keys) ||
- i->overwritten_range != i[1].overwritten_range))
- kfree(i->overwritten_range);
-
+ darray_for_each(*keys, i)
if (i->allocated)
- kfree(i->k);
- }
+ kfree(i->allocated_k);
kvfree(keys->data);
keys->data = NULL;
keys->nr = keys->gap = keys->size = 0;
+ darray_exit(&keys->overwrites);
+
struct journal_replay **i;
struct genradix_iter iter;
@@ -704,8 +730,10 @@ void bch2_journal_keys_put(struct bch_fs *c)
static void __journal_keys_sort(struct journal_keys *keys)
{
- sort_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]),
- journal_sort_key_cmp, NULL);
+ struct bch_fs *c = container_of(keys, struct bch_fs, journal_keys);
+
+ sort_r_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]),
+ journal_sort_key_cmp, NULL, c);
cond_resched();
@@ -717,9 +745,10 @@ static void __journal_keys_sort(struct journal_keys *keys)
* compare each individual accounting key against the version in
* the btree during replay:
*/
- if (src->k->k.type != KEY_TYPE_accounting &&
+ struct bkey_i *k = journal_key_k(c, src);
+ if (k->k.type != KEY_TYPE_accounting &&
src + 1 < &darray_top(*keys) &&
- !journal_key_cmp(src, src + 1))
+ !journal_key_cmp(c, src, src + 1))
continue;
*dst++ = *src;
@@ -763,8 +792,7 @@ int bch2_journal_keys_sort(struct bch_fs *c)
.btree_id = entry->btree_id,
.level = entry->level,
.rewind = rewind,
- .k = k,
- .journal_seq = le64_to_cpu(i->j.seq),
+ .journal_seq_offset = journal_entry_radix_idx(c, le64_to_cpu(i->j.seq)),
.journal_offset = k->_data - i->j._data,
};
@@ -801,13 +829,18 @@ void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree,
move_gap(keys, keys->nr);
- darray_for_each(*keys, i)
+ darray_for_each(*keys, i) {
+ struct bkey_i *k = journal_key_k(c, i);
+
if (!(i->btree_id == btree &&
i->level >= level_min &&
i->level <= level_max &&
- bpos_ge(i->k->k.p, start) &&
- bpos_le(i->k->k.p, end)))
+ bpos_ge(k->k.p, start) &&
+ bpos_le(k->k.p, end)))
keys->data[dst++] = *i;
+ else if (i->allocated)
+ kfree(i->allocated_k);
+ }
keys->nr = keys->gap = dst;
}
@@ -825,7 +858,7 @@ void bch2_journal_keys_dump(struct bch_fs *c)
prt_printf(&buf, "btree=");
bch2_btree_id_to_text(&buf, i->btree_id);
prt_printf(&buf, " l=%u ", i->level);
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k));
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(journal_key_k(c, i)));
pr_err("%s", buf.buf);
}
}