summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/bkey_buf.h38
-rw-r--r--fs/bcachefs/btree_journal_iter.c70
-rw-r--r--fs/bcachefs/btree_journal_iter_types.h5
-rw-r--r--fs/bcachefs/disk_accounting.c121
-rw-r--r--fs/bcachefs/journal_io.c17
5 files changed, 159 insertions, 92 deletions
diff --git a/fs/bcachefs/bkey_buf.h b/fs/bcachefs/bkey_buf.h
index a30c4ae8eb36..0d0c76b013be 100644
--- a/fs/bcachefs/bkey_buf.h
+++ b/fs/bcachefs/bkey_buf.h
@@ -10,41 +10,49 @@ struct bkey_buf {
u64 onstack[12];
};
-static inline void bch2_bkey_buf_realloc(struct bkey_buf *s,
- struct bch_fs *c, unsigned u64s)
+static inline int bch2_bkey_buf_realloc_noprof(struct bkey_buf *s,
+ struct bch_fs *c, unsigned u64s)
{
if (s->k == (void *) s->onstack &&
u64s > ARRAY_SIZE(s->onstack)) {
- s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
+ s->k = mempool_alloc_noprof(&c->large_bkey_pool, GFP_NOFS);
memcpy(s->k, s->onstack, sizeof(s->onstack));
}
+
+ return 0; /* for alloc_hooks() macro */
}
+#define bch2_bkey_buf_realloc(...) alloc_hooks(bch2_bkey_buf_realloc_noprof(__VA_ARGS__))
-static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s,
- struct bch_fs *c,
- struct bkey_s_c k)
+static inline int bch2_bkey_buf_reassemble_noprof(struct bkey_buf *s,
+ struct bch_fs *c,
+ struct bkey_s_c k)
{
bch2_bkey_buf_realloc(s, c, k.k->u64s);
bkey_reassemble(s->k, k);
+ return 0;
}
+#define bch2_bkey_buf_reassemble(...) alloc_hooks(bch2_bkey_buf_reassemble_noprof(__VA_ARGS__))
-static inline void bch2_bkey_buf_copy(struct bkey_buf *s,
- struct bch_fs *c,
- struct bkey_i *src)
+static inline int bch2_bkey_buf_copy_noprof(struct bkey_buf *s,
+ struct bch_fs *c,
+ struct bkey_i *src)
{
bch2_bkey_buf_realloc(s, c, src->k.u64s);
bkey_copy(s->k, src);
+ return 0;
}
+#define bch2_bkey_buf_copy(...) alloc_hooks(bch2_bkey_buf_copy_noprof(__VA_ARGS__))
-static inline void bch2_bkey_buf_unpack(struct bkey_buf *s,
- struct bch_fs *c,
- struct btree *b,
- struct bkey_packed *src)
+static inline int bch2_bkey_buf_unpack_noprof(struct bkey_buf *s,
+ struct bch_fs *c,
+ struct btree *b,
+ struct bkey_packed *src)
{
- bch2_bkey_buf_realloc(s, c, BKEY_U64s +
- bkeyp_val_u64s(&b->format, src));
+ bch2_bkey_buf_realloc(s, c, BKEY_U64s + bkeyp_val_u64s(&b->format, src));
bch2_bkey_unpack(b, s->k, src);
+ return 0;
}
+#define bch2_bkey_buf_unpack(...) alloc_hooks(bch2_bkey_buf_unpack_noprof(__VA_ARGS__))
static inline void bch2_bkey_buf_init(struct bkey_buf *s)
{
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 4213af39eb02..a6f344faf751 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -73,6 +73,16 @@ static size_t bch2_journal_key_search(struct journal_keys *keys,
return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
}
+static inline struct journal_key_range_overwritten *__overwrite_range(struct journal_keys *keys, u32 idx)
+{
+ return idx ? keys->overwrites.data + idx : NULL;
+}
+
+static inline struct journal_key_range_overwritten *overwrite_range(struct journal_keys *keys, u32 idx)
+{
+ return idx ? rcu_dereference(keys->overwrites.data) + idx : NULL;
+}
+
/* Returns first non-overwritten key >= search key: */
const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id,
unsigned level, struct bpos pos,
@@ -106,7 +116,7 @@ search:
if (k->overwritten) {
if (k->overwritten_range)
- *idx = rcu_dereference(k->overwritten_range)->end;
+ *idx = overwrite_range(keys, k->overwritten_range)->end;
else
*idx += 1;
continue;
@@ -169,7 +179,7 @@ search:
if (k->overwritten) {
if (k->overwritten_range)
- *idx = rcu_dereference(k->overwritten_range)->start;
+ *idx = overwrite_range(keys, k->overwritten_range)->start;
if (!*idx)
break;
--(*idx);
@@ -402,9 +412,9 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos
bool next_overwritten = next && next->overwritten;
struct journal_key_range_overwritten *prev_range =
- prev_overwritten ? prev->overwritten_range : NULL;
+ prev_overwritten ? __overwrite_range(keys, prev->overwritten_range) : NULL;
struct journal_key_range_overwritten *next_range =
- next_overwritten ? next->overwritten_range : NULL;
+ next_overwritten ? __overwrite_range(keys, next->overwritten_range) : NULL;
BUG_ON(prev_range && prev_range->end != idx);
BUG_ON(next_range && next_range->start != idx + 1);
@@ -412,37 +422,47 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos
if (prev_range && next_range) {
prev_range->end = next_range->end;
- keys->data[pos].overwritten_range = prev_range;
+ keys->data[pos].overwritten_range = prev->overwritten_range;
+
+ u32 old = next->overwritten_range;
+
for (size_t i = next_range->start; i < next_range->end; i++) {
struct journal_key *ip = keys->data + idx_to_pos(keys, i);
- BUG_ON(ip->overwritten_range != next_range);
- ip->overwritten_range = prev_range;
+ BUG_ON(ip->overwritten_range != old);
+ ip->overwritten_range = prev->overwritten_range;
}
-
- kfree_rcu_mightsleep(next_range);
} else if (prev_range) {
prev_range->end++;
- k->overwritten_range = prev_range;
+ k->overwritten_range = prev->overwritten_range;
if (next_overwritten) {
prev_range->end++;
- next->overwritten_range = prev_range;
+ next->overwritten_range = prev->overwritten_range;
}
} else if (next_range) {
next_range->start--;
- k->overwritten_range = next_range;
+ k->overwritten_range = next->overwritten_range;
if (prev_overwritten) {
next_range->start--;
- prev->overwritten_range = next_range;
+ prev->overwritten_range = next->overwritten_range;
}
} else if (prev_overwritten || next_overwritten) {
- struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL);
- if (!r)
+ /* 0 is a sentinel value */
+ if (darray_resize_rcu(&keys->overwrites, max(keys->overwrites.nr + 1, 2)))
return;
- r->start = idx - (size_t) prev_overwritten;
- r->end = idx + 1 + (size_t) next_overwritten;
+ if (!keys->overwrites.nr)
+ darray_push(&keys->overwrites, (struct journal_key_range_overwritten) {});
+
+ darray_push(&keys->overwrites, ((struct journal_key_range_overwritten) {
+ .start = idx - (size_t) prev_overwritten,
+ .end = idx + 1 + (size_t) next_overwritten,
+ }));
+
+ smp_wmb();
+ u32 r = keys->overwrites.nr - 1;
+
+ k->overwritten_range = r;
- rcu_assign_pointer(k->overwritten_range, r);
if (prev_overwritten)
prev->overwritten_range = r;
if (next_overwritten)
@@ -456,7 +476,7 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
struct journal_keys *keys = &c->journal_keys;
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
- if (idx >= keys->size ||
+ if (idx >= keys->size ||
keys->data[idx].btree_id != btree ||
keys->data[idx].level != level ||
keys->data[idx].overwritten)
@@ -496,7 +516,7 @@ static struct bkey_s_c bch2_journal_iter_peek(struct bch_fs *c, struct journal_i
return bkey_i_to_s_c(journal_key_k(c, k));
if (k->overwritten_range)
- iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end);
+ iter->idx = idx_to_pos(iter->keys, overwrite_range(iter->keys, k->overwritten_range)->end);
else
bch2_journal_iter_advance(iter);
}
@@ -690,20 +710,16 @@ void bch2_journal_keys_put(struct bch_fs *c)
move_gap(keys, keys->nr);
- darray_for_each(*keys, i) {
- if (i->overwritten_range &&
- (i == &darray_last(*keys) ||
- i->overwritten_range != i[1].overwritten_range))
- kfree(i->overwritten_range);
-
+ darray_for_each(*keys, i)
if (i->allocated)
kfree(i->allocated_k);
- }
kvfree(keys->data);
keys->data = NULL;
keys->nr = keys->gap = keys->size = 0;
+ darray_exit(&keys->overwrites);
+
struct journal_replay **i;
struct genradix_iter iter;
diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h
index 53d866acda25..4495fc92f848 100644
--- a/fs/bcachefs/btree_journal_iter_types.h
+++ b/fs/bcachefs/btree_journal_iter_types.h
@@ -42,8 +42,7 @@ struct journal_key {
bool allocated:1;
bool overwritten:1;
bool rewind:1;
- struct journal_key_range_overwritten __rcu *
- overwritten_range;
+ u32 overwritten_range;
};
struct journal_keys {
@@ -58,7 +57,9 @@ struct journal_keys {
size_t gap;
atomic_t ref;
bool initial_ref_held;
+
struct mutex overwrite_lock;
+ DARRAY(struct journal_key_range_overwritten) overwrites;
};
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index 5ec57b710501..809c76b68ba8 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -734,6 +734,37 @@ invalid_device:
goto fsck_err;
}
+static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, struct journal_key *i)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ struct bkey_i *k = journal_key_k(c, i);
+
+ darray_for_each_from(*keys, j, i + 1) {
+ if (journal_key_cmp(c, i, j))
+ return j;
+
+ struct bkey_i *n = journal_key_k(c, j);
+ if (n->k.type == KEY_TYPE_accounting) {
+ WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0);
+
+ bch2_accounting_accumulate(bkey_i_to_accounting(k),
+ bkey_i_to_s_c_accounting(n));
+ j->overwritten = true;
+ }
+ }
+
+ return &darray_top(*keys);
+}
+
+static struct journal_key *accumulate_and_read_journal_accounting(struct btree_trans *trans, struct journal_key *i)
+{
+ struct bch_fs *c = trans->c;
+ struct journal_key *next = accumulate_newer_accounting_keys(c, i);
+
+ int ret = accounting_read_key(trans, bkey_i_to_s_c(journal_key_k(c, i)));
+ return ret ? ERR_PTR(ret) : next;
+}
+
/*
* At startup time, initialize the in memory accounting from the btree (and
* journal)
@@ -759,6 +790,18 @@ int bch2_accounting_read(struct bch_fs *c)
percpu_memset(c->usage, 0, sizeof(*c->usage));
}
+ struct journal_keys *keys = &c->journal_keys;
+ struct journal_key *jk = keys->data;
+
+ while (jk < &darray_top(*keys) &&
+ __journal_key_cmp(c, BTREE_ID_accounting, 0, POS_MIN, jk) > 0)
+ jk++;
+
+ struct journal_key *end = jk;
+ while (end < &darray_top(*keys) &&
+ __journal_key_cmp(c, BTREE_ID_accounting, 0, SPOS_MAX, end) > 0)
+ end++;
+
struct btree_iter iter;
bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots);
@@ -771,6 +814,21 @@ int bch2_accounting_read(struct bch_fs *c)
if (k.k->type != KEY_TYPE_accounting)
continue;
+ while (jk < end &&
+ __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) > 0)
+ jk = accumulate_and_read_journal_accounting(trans, jk);
+
+ while (jk < end &&
+ __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0 &&
+ bversion_cmp(journal_key_k(c, jk)->k.bversion, k.k->bversion) <= 0) {
+ jk->overwritten = true;
+ jk++;
+ }
+
+ if (jk < end &&
+ __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0)
+ jk = accumulate_and_read_journal_accounting(trans, jk);
+
struct disk_accounting_pos acc_k;
bpos_to_disk_accounting_pos(&acc_k, k.k->p);
@@ -778,10 +836,14 @@ int bch2_accounting_read(struct bch_fs *c)
break;
if (!bch2_accounting_is_mem(&acc_k)) {
- struct disk_accounting_pos next;
- memset(&next, 0, sizeof(next));
- next.type = acc_k.type + 1;
- bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
+ struct disk_accounting_pos next_acc;
+ memset(&next_acc, 0, sizeof(next_acc));
+ next_acc.type = acc_k.type + 1;
+ struct bpos next = disk_accounting_pos_to_bpos(&next_acc);
+ if (jk < end)
+ next = bpos_min(next, journal_key_k(c, jk)->k.p);
+
+ bch2_btree_iter_set_pos(&iter, next);
continue;
}
@@ -791,51 +853,14 @@ int bch2_accounting_read(struct bch_fs *c)
if (ret)
return ret;
- struct journal_keys *keys = &c->journal_keys;
- move_gap(keys, keys->nr);
-
- darray_for_each(*keys, i) {
- if (i->overwritten)
- continue;
-
- struct bkey_i *k = journal_key_k(c, i);
-
- if (k->k.type == KEY_TYPE_accounting) {
- struct disk_accounting_pos acc_k;
- bpos_to_disk_accounting_pos(&acc_k, k->k.p);
-
- if (!bch2_accounting_is_mem(&acc_k))
- continue;
+ while (jk < end)
+ jk = accumulate_and_read_journal_accounting(trans, jk);
- unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr,
- sizeof(acc->k.data[0]),
- accounting_pos_cmp, &k->k.p);
-
- bool applied = idx < acc->k.nr &&
- bversion_cmp(acc->k.data[idx].bversion, k->k.bversion) >= 0;
-
- if (applied)
- continue;
-
- darray_for_each_from(*keys, j, i + 1) {
- if (journal_key_cmp(c, i, j))
- break;
-
- struct bkey_i *n = journal_key_k(c, j);
- if (n->k.type == KEY_TYPE_accounting) {
- WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0);
-
- bch2_accounting_accumulate(bkey_i_to_accounting(k),
- bkey_i_to_s_c_accounting(n));
- j->overwritten = true;
- }
- }
-
- ret = accounting_read_key(trans, bkey_i_to_s_c(k));
- if (ret)
- return ret;
- }
- }
+ struct journal_key *dst = keys->data;
+ darray_for_each(*keys, i)
+ if (!i->overwritten)
+ *dst++ = *i;
+ keys->gap = keys->nr = dst - keys->data;
guard(percpu_write)(&c->mark_lock);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 6e8a89a0f244..06d06e88569d 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -238,6 +238,23 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
return ret;
}
replace:
+ /* Drop overwrites, log entries if we don't need them: */
+ if (!c->opts.retain_recovery_info &&
+ !c->opts.journal_rewind) {
+ struct jset_entry *dst = j->start;
+ vstruct_for_each_safe(j, src) {
+ if (src->type == BCH_JSET_ENTRY_log ||
+ src->type == BCH_JSET_ENTRY_overwrite)
+ continue;
+
+ memcpy(dst, src, vstruct_bytes(src));
+ dst = vstruct_next(dst);
+ }
+
+ j->u64s = cpu_to_le32((u64 *) dst - j->_data);
+ bytes = vstruct_bytes(j);
+ }
+
i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
if (!i)
return bch_err_throw(c, ENOMEM_journal_entry_add);