diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/bkey_buf.h | 38 | ||||
-rw-r--r-- | fs/bcachefs/btree_journal_iter.c | 70 | ||||
-rw-r--r-- | fs/bcachefs/btree_journal_iter_types.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/disk_accounting.c | 121 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 17 |
5 files changed, 159 insertions, 92 deletions
diff --git a/fs/bcachefs/bkey_buf.h b/fs/bcachefs/bkey_buf.h index a30c4ae8eb36..0d0c76b013be 100644 --- a/fs/bcachefs/bkey_buf.h +++ b/fs/bcachefs/bkey_buf.h @@ -10,41 +10,49 @@ struct bkey_buf { u64 onstack[12]; }; -static inline void bch2_bkey_buf_realloc(struct bkey_buf *s, - struct bch_fs *c, unsigned u64s) +static inline int bch2_bkey_buf_realloc_noprof(struct bkey_buf *s, + struct bch_fs *c, unsigned u64s) { if (s->k == (void *) s->onstack && u64s > ARRAY_SIZE(s->onstack)) { - s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS); + s->k = mempool_alloc_noprof(&c->large_bkey_pool, GFP_NOFS); memcpy(s->k, s->onstack, sizeof(s->onstack)); } + + return 0; /* for alloc_hooks() macro */ } +#define bch2_bkey_buf_realloc(...) alloc_hooks(bch2_bkey_buf_realloc_noprof(__VA_ARGS__)) -static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s, - struct bch_fs *c, - struct bkey_s_c k) +static inline int bch2_bkey_buf_reassemble_noprof(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_s_c k) { bch2_bkey_buf_realloc(s, c, k.k->u64s); bkey_reassemble(s->k, k); + return 0; } +#define bch2_bkey_buf_reassemble(...) alloc_hooks(bch2_bkey_buf_reassemble_noprof(__VA_ARGS__)) -static inline void bch2_bkey_buf_copy(struct bkey_buf *s, - struct bch_fs *c, - struct bkey_i *src) +static inline int bch2_bkey_buf_copy_noprof(struct bkey_buf *s, + struct bch_fs *c, + struct bkey_i *src) { bch2_bkey_buf_realloc(s, c, src->k.u64s); bkey_copy(s->k, src); + return 0; } +#define bch2_bkey_buf_copy(...) alloc_hooks(bch2_bkey_buf_copy_noprof(__VA_ARGS__)) -static inline void bch2_bkey_buf_unpack(struct bkey_buf *s, - struct bch_fs *c, - struct btree *b, - struct bkey_packed *src) +static inline int bch2_bkey_buf_unpack_noprof(struct bkey_buf *s, + struct bch_fs *c, + struct btree *b, + struct bkey_packed *src) { - bch2_bkey_buf_realloc(s, c, BKEY_U64s + - bkeyp_val_u64s(&b->format, src)); + bch2_bkey_buf_realloc(s, c, BKEY_U64s + bkeyp_val_u64s(&b->format, src)); bch2_bkey_unpack(b, s->k, src); + return 0; } +#define bch2_bkey_buf_unpack(...) alloc_hooks(bch2_bkey_buf_unpack_noprof(__VA_ARGS__)) static inline void bch2_bkey_buf_init(struct bkey_buf *s) { diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index 4213af39eb02..a6f344faf751 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -73,6 +73,16 @@ static size_t bch2_journal_key_search(struct journal_keys *keys, return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos)); } +static inline struct journal_key_range_overwritten *__overwrite_range(struct journal_keys *keys, u32 idx) +{ + return idx ? keys->overwrites.data + idx : NULL; +} + +static inline struct journal_key_range_overwritten *overwrite_range(struct journal_keys *keys, u32 idx) +{ + return idx ? rcu_dereference(keys->overwrites.data) + idx : NULL; +} + /* Returns first non-overwritten key >= search key: */ const struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, unsigned level, struct bpos pos, @@ -106,7 +116,7 @@ search: if (k->overwritten) { if (k->overwritten_range) - *idx = rcu_dereference(k->overwritten_range)->end; + *idx = overwrite_range(keys, k->overwritten_range)->end; else *idx += 1; continue; @@ -169,7 +179,7 @@ search: if (k->overwritten) { if (k->overwritten_range) - *idx = rcu_dereference(k->overwritten_range)->start; + *idx = overwrite_range(keys, k->overwritten_range)->start; if (!*idx) break; --(*idx); @@ -402,9 +412,9 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos bool next_overwritten = next && next->overwritten; struct journal_key_range_overwritten *prev_range = - prev_overwritten ? prev->overwritten_range : NULL; + prev_overwritten ? __overwrite_range(keys, prev->overwritten_range) : NULL; struct journal_key_range_overwritten *next_range = - next_overwritten ? next->overwritten_range : NULL; + next_overwritten ? __overwrite_range(keys, next->overwritten_range) : NULL; BUG_ON(prev_range && prev_range->end != idx); BUG_ON(next_range && next_range->start != idx + 1); @@ -412,37 +422,47 @@ static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos if (prev_range && next_range) { prev_range->end = next_range->end; - keys->data[pos].overwritten_range = prev_range; + keys->data[pos].overwritten_range = prev->overwritten_range; + + u32 old = next->overwritten_range; + for (size_t i = next_range->start; i < next_range->end; i++) { struct journal_key *ip = keys->data + idx_to_pos(keys, i); - BUG_ON(ip->overwritten_range != next_range); - ip->overwritten_range = prev_range; + BUG_ON(ip->overwritten_range != old); + ip->overwritten_range = prev->overwritten_range; } - - kfree_rcu_mightsleep(next_range); } else if (prev_range) { prev_range->end++; - k->overwritten_range = prev_range; + k->overwritten_range = prev->overwritten_range; if (next_overwritten) { prev_range->end++; - next->overwritten_range = prev_range; + next->overwritten_range = prev->overwritten_range; } } else if (next_range) { next_range->start--; - k->overwritten_range = next_range; + k->overwritten_range = next->overwritten_range; if (prev_overwritten) { next_range->start--; - prev->overwritten_range = next_range; + prev->overwritten_range = next->overwritten_range; } } else if (prev_overwritten || next_overwritten) { - struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL); - if (!r) + /* 0 is a sentinel value */ + if (darray_resize_rcu(&keys->overwrites, max(keys->overwrites.nr + 1, 2))) return; - r->start = idx - (size_t) prev_overwritten; - r->end = idx + 1 + (size_t) next_overwritten; + if (!keys->overwrites.nr) + darray_push(&keys->overwrites, (struct journal_key_range_overwritten) {}); + + darray_push(&keys->overwrites, ((struct journal_key_range_overwritten) { + .start = idx - (size_t) prev_overwritten, + .end = idx + 1 + (size_t) next_overwritten, + })); + + smp_wmb(); + u32 r = keys->overwrites.nr - 1; + + k->overwritten_range = r; - rcu_assign_pointer(k->overwritten_range, r); if (prev_overwritten) prev->overwritten_range = r; if (next_overwritten) @@ -456,7 +476,7 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, btree, level, pos); - if (idx >= keys->size || + if (idx >= keys->size || keys->data[idx].btree_id != btree || keys->data[idx].level != level || keys->data[idx].overwritten) @@ -496,7 +516,7 @@ static struct bkey_s_c bch2_journal_iter_peek(struct bch_fs *c, struct journal_i return bkey_i_to_s_c(journal_key_k(c, k)); if (k->overwritten_range) - iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end); + iter->idx = idx_to_pos(iter->keys, overwrite_range(iter->keys, k->overwritten_range)->end); else bch2_journal_iter_advance(iter); } @@ -690,20 +710,16 @@ void bch2_journal_keys_put(struct bch_fs *c) move_gap(keys, keys->nr); - darray_for_each(*keys, i) { - if (i->overwritten_range && - (i == &darray_last(*keys) || - i->overwritten_range != i[1].overwritten_range)) - kfree(i->overwritten_range); - + darray_for_each(*keys, i) if (i->allocated) kfree(i->allocated_k); - } kvfree(keys->data); keys->data = NULL; keys->nr = keys->gap = keys->size = 0; + darray_exit(&keys->overwrites); + struct journal_replay **i; struct genradix_iter iter; diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h index 53d866acda25..4495fc92f848 100644 --- a/fs/bcachefs/btree_journal_iter_types.h +++ b/fs/bcachefs/btree_journal_iter_types.h @@ -42,8 +42,7 @@ struct journal_key { bool allocated:1; bool overwritten:1; bool rewind:1; - struct journal_key_range_overwritten __rcu * - overwritten_range; + u32 overwritten_range; }; struct journal_keys { @@ -58,7 +57,9 @@ struct journal_keys { size_t gap; atomic_t ref; bool initial_ref_held; + struct mutex overwrite_lock; + DARRAY(struct journal_key_range_overwritten) overwrites; }; #endif /* _BCACHEFS_BTREE_JOURNAL_ITER_TYPES_H */ diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 5ec57b710501..809c76b68ba8 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -734,6 +734,37 @@ invalid_device: goto fsck_err; } +static struct journal_key *accumulate_newer_accounting_keys(struct bch_fs *c, struct journal_key *i) +{ + struct journal_keys *keys = &c->journal_keys; + struct bkey_i *k = journal_key_k(c, i); + + darray_for_each_from(*keys, j, i + 1) { + if (journal_key_cmp(c, i, j)) + return j; + + struct bkey_i *n = journal_key_k(c, j); + if (n->k.type == KEY_TYPE_accounting) { + WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0); + + bch2_accounting_accumulate(bkey_i_to_accounting(k), + bkey_i_to_s_c_accounting(n)); + j->overwritten = true; + } + } + + return &darray_top(*keys); +} + +static struct journal_key *accumulate_and_read_journal_accounting(struct btree_trans *trans, struct journal_key *i) +{ + struct bch_fs *c = trans->c; + struct journal_key *next = accumulate_newer_accounting_keys(c, i); + + int ret = accounting_read_key(trans, bkey_i_to_s_c(journal_key_k(c, i))); + return ret ? ERR_PTR(ret) : next; +} + /* * At startup time, initialize the in memory accounting from the btree (and * journal) @@ -759,6 +790,18 @@ int bch2_accounting_read(struct bch_fs *c) percpu_memset(c->usage, 0, sizeof(*c->usage)); } + struct journal_keys *keys = &c->journal_keys; + struct journal_key *jk = keys->data; + + while (jk < &darray_top(*keys) && + __journal_key_cmp(c, BTREE_ID_accounting, 0, POS_MIN, jk) > 0) + jk++; + + struct journal_key *end = jk; + while (end < &darray_top(*keys) && + __journal_key_cmp(c, BTREE_ID_accounting, 0, SPOS_MAX, end) > 0) + end++; + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots); @@ -771,6 +814,21 @@ int bch2_accounting_read(struct bch_fs *c) if (k.k->type != KEY_TYPE_accounting) continue; + while (jk < end && + __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) > 0) + jk = accumulate_and_read_journal_accounting(trans, jk); + + while (jk < end && + __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0 && + bversion_cmp(journal_key_k(c, jk)->k.bversion, k.k->bversion) <= 0) { + jk->overwritten = true; + jk++; + } + + if (jk < end && + __journal_key_cmp(c, BTREE_ID_accounting, 0, k.k->p, jk) == 0) + jk = accumulate_and_read_journal_accounting(trans, jk); + struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, k.k->p); @@ -778,10 +836,14 @@ int bch2_accounting_read(struct bch_fs *c) break; if (!bch2_accounting_is_mem(&acc_k)) { - struct disk_accounting_pos next; - memset(&next, 0, sizeof(next)); - next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); + struct disk_accounting_pos next_acc; + memset(&next_acc, 0, sizeof(next_acc)); + next_acc.type = acc_k.type + 1; + struct bpos next = disk_accounting_pos_to_bpos(&next_acc); + if (jk < end) + next = bpos_min(next, journal_key_k(c, jk)->k.p); + + bch2_btree_iter_set_pos(&iter, next); continue; } @@ -791,51 +853,14 @@ int bch2_accounting_read(struct bch_fs *c) if (ret) return ret; - struct journal_keys *keys = &c->journal_keys; - move_gap(keys, keys->nr); - - darray_for_each(*keys, i) { - if (i->overwritten) - continue; - - struct bkey_i *k = journal_key_k(c, i); - - if (k->k.type == KEY_TYPE_accounting) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, k->k.p); - - if (!bch2_accounting_is_mem(&acc_k)) - continue; + while (jk < end) + jk = accumulate_and_read_journal_accounting(trans, jk); - unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, - sizeof(acc->k.data[0]), - accounting_pos_cmp, &k->k.p); - - bool applied = idx < acc->k.nr && - bversion_cmp(acc->k.data[idx].bversion, k->k.bversion) >= 0; - - if (applied) - continue; - - darray_for_each_from(*keys, j, i + 1) { - if (journal_key_cmp(c, i, j)) - break; - - struct bkey_i *n = journal_key_k(c, j); - if (n->k.type == KEY_TYPE_accounting) { - WARN_ON(bversion_cmp(k->k.bversion, n->k.bversion) >= 0); - - bch2_accounting_accumulate(bkey_i_to_accounting(k), - bkey_i_to_s_c_accounting(n)); - j->overwritten = true; - } - } - - ret = accounting_read_key(trans, bkey_i_to_s_c(k)); - if (ret) - return ret; - } - } + struct journal_key *dst = keys->data; + darray_for_each(*keys, i) + if (!i->overwritten) + *dst++ = *i; + keys->gap = keys->nr = dst - keys->data; guard(percpu_write)(&c->mark_lock); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 6e8a89a0f244..06d06e88569d 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -238,6 +238,23 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, return ret; } replace: + /* Drop overwrites, log entries if we don't need them: */ + if (!c->opts.retain_recovery_info && + !c->opts.journal_rewind) { + struct jset_entry *dst = j->start; + vstruct_for_each_safe(j, src) { + if (src->type == BCH_JSET_ENTRY_log || + src->type == BCH_JSET_ENTRY_overwrite) + continue; + + memcpy(dst, src, vstruct_bytes(src)); + dst = vstruct_next(dst); + } + + j->u64s = cpu_to_le32((u64 *) dst - j->_data); + bytes = vstruct_bytes(j); + } + i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL); if (!i) return bch_err_throw(c, ENOMEM_journal_entry_add); |