diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-01-05 19:39:57 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2022-01-05 19:39:57 -0500 |
commit | 47c554c31abd26a23906b43d756569e64ff60f8d (patch) | |
tree | 41e25ea13636c3218a39422b05eebf77a4d60553 /libbcachefs/btree_gc.c | |
parent | 931ed5a709c2afa239cbae2e13bc22f13e99713c (diff) |
Update bcachefs sources to 5242db9aec bcachefs: Fix bch2_check_fix_ptrs()
Diffstat (limited to 'libbcachefs/btree_gc.c')
-rw-r--r-- | libbcachefs/btree_gc.c | 408 |
1 files changed, 142 insertions, 266 deletions
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 268ad74d..a201052e 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -9,7 +9,6 @@ #include "alloc_foreground.h" #include "bkey_methods.h" #include "bkey_buf.h" -#include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "btree_io.h" @@ -534,6 +533,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, bkey_for_each_ptr_decode(k->k, ptrs, p, entry) { struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); + struct bucket *g2 = PTR_BUCKET(ca, &p.ptr); enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry->ptr); if (fsck_err_on(!g->gen_valid, c, @@ -544,8 +544,9 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.gen, (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { if (!p.ptr.cached) { - g->_mark.gen = p.ptr.gen; - g->gen_valid = true; + g2->_mark.gen = g->_mark.gen = p.ptr.gen; + g2->gen_valid = g->gen_valid = true; + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); } else { do_update = true; } @@ -559,12 +560,13 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, p.ptr.gen, g->mark.gen, (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { if (!p.ptr.cached) { - g->_mark.gen = p.ptr.gen; - g->gen_valid = true; - g->_mark.data_type = 0; - g->_mark.dirty_sectors = 0; - g->_mark.cached_sectors = 0; + g2->_mark.gen = g->_mark.gen = p.ptr.gen; + g2->gen_valid = g->gen_valid = true; + g2->_mark.data_type = 0; + g2->_mark.dirty_sectors = 0; + g2->_mark.cached_sectors = 0; set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); } else { do_update = true; } @@ -601,8 +603,9 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id, bch2_data_types[data_type], (bch2_bkey_val_to_text(&PBUF(buf), c, *k), buf))) { if (data_type == BCH_DATA_btree) { - g->_mark.data_type = data_type; - set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); + g2->_mark.data_type = g->_mark.data_type = data_type; + g2->gen_valid = g->gen_valid = true; + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); } else { do_update = true; } @@ -1166,14 +1169,13 @@ static int bch2_gc_done(struct bch_fs *c, unsigned i, dev; int ret = 0; - percpu_down_write(&c->mark_lock); - #define copy_field(_f, _msg, ...) \ if (dst->_f != src->_f) { \ if (verify) \ fsck_err(c, _msg ": got %llu, should be %llu" \ , ##__VA_ARGS__, dst->_f, src->_f); \ dst->_f = src->_f; \ + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \ } #define copy_stripe_field(_f, _msg, ...) \ if (dst->_f != src->_f) { \ @@ -1183,6 +1185,18 @@ static int bch2_gc_done(struct bch_fs *c, iter.pos, ##__VA_ARGS__, \ dst->_f, src->_f); \ dst->_f = src->_f; \ + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \ + } +#define copy_bucket_field(_f) \ + if (dst->b[b]._f != src->b[b]._f) { \ + if (verify) \ + fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f \ + ": got %u, should be %u", dev, b, \ + dst->b[b].mark.gen, \ + bch2_data_types[dst->b[b].mark.data_type],\ + dst->b[b]._f, src->b[b]._f); \ + dst->b[b]._f = src->b[b]._f; \ + set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \ } #define copy_dev_field(_f, _msg, ...) \ copy_field(_f, "dev %u has wrong " _msg, dev, ##__VA_ARGS__) @@ -1193,18 +1207,36 @@ static int bch2_gc_done(struct bch_fs *c, bch2_fs_usage_acc_to_base(c, i); for_each_member_device(ca, c, dev) { - struct bch_dev_usage *dst = ca->usage_base; - struct bch_dev_usage *src = (void *) - bch2_acc_percpu_u64s((void *) ca->usage_gc, - dev_usage_u64s()); - - copy_dev_field(buckets_ec, "buckets_ec"); - copy_dev_field(buckets_unavailable, "buckets_unavailable"); - - for (i = 0; i < BCH_DATA_NR; i++) { - copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]); - copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]); - copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]); + struct bucket_array *dst = __bucket_array(ca, 0); + struct bucket_array *src = __bucket_array(ca, 1); + size_t b; + + for (b = 0; b < src->nbuckets; b++) { + copy_bucket_field(_mark.gen); + copy_bucket_field(_mark.data_type); + copy_bucket_field(_mark.stripe); + copy_bucket_field(_mark.dirty_sectors); + copy_bucket_field(_mark.cached_sectors); + copy_bucket_field(stripe_redundancy); + copy_bucket_field(stripe); + + dst->b[b].oldest_gen = src->b[b].oldest_gen; + } + + { + struct bch_dev_usage *dst = ca->usage_base; + struct bch_dev_usage *src = (void *) + bch2_acc_percpu_u64s((void *) ca->usage_gc, + dev_usage_u64s()); + + copy_dev_field(buckets_ec, "buckets_ec"); + copy_dev_field(buckets_unavailable, "buckets_unavailable"); + + for (i = 0; i < BCH_DATA_NR; i++) { + copy_dev_field(d[i].buckets, "%s buckets", bch2_data_types[i]); + copy_dev_field(d[i].sectors, "%s sectors", bch2_data_types[i]); + copy_dev_field(d[i].fragmented, "%s fragmented", bch2_data_types[i]); + } } }; @@ -1246,6 +1278,7 @@ static int bch2_gc_done(struct bch_fs *c, #undef copy_fs_field #undef copy_dev_field +#undef copy_bucket_field #undef copy_stripe_field #undef copy_field fsck_err: @@ -1253,8 +1286,6 @@ fsck_err: percpu_ref_put(&ca->ref); if (ret) bch_err(c, "%s: ret %i", __func__, ret); - - percpu_up_write(&c->mark_lock); return ret; } @@ -1277,6 +1308,15 @@ static int bch2_gc_start(struct bch_fs *c, BUG_ON(ca->buckets[1]); BUG_ON(ca->usage_gc); + ca->buckets[1] = kvpmalloc(sizeof(struct bucket_array) + + ca->mi.nbuckets * sizeof(struct bucket), + GFP_KERNEL|__GFP_ZERO); + if (!ca->buckets[1]) { + percpu_ref_put(&ca->ref); + bch_err(c, "error allocating ca->buckets[gc]"); + return -ENOMEM; + } + ca->usage_gc = alloc_percpu(struct bch_dev_usage); if (!ca->usage_gc) { bch_err(c, "error allocating ca->usage_gc"); @@ -1285,184 +1325,39 @@ static int bch2_gc_start(struct bch_fs *c, } } - return 0; -} - -static int bch2_alloc_write_key(struct btree_trans *trans, - struct btree_iter *iter, - bool initial, bool metadata_only) -{ - struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode); - struct bucket *g; - struct bkey_s_c k; - struct bkey_alloc_unpacked old_u, new_u, gc_u; - struct bkey_alloc_buf *a; - int ret; + percpu_down_write(&c->mark_lock); /* - * For this to be correct at runtime, we'll need to figure out a way for - * it to actually lock the key in the btree key cache: + * indicate to stripe code that we need to allocate for the gc stripes + * radix tree, too */ - - if (!initial) { - ret = bch2_btree_key_cache_flush(trans, - BTREE_ID_alloc, iter->pos); - if (ret) - return ret; - } - - k = bch2_btree_iter_peek_slot(iter); - ret = bkey_err(k); - if (ret) - return ret; - - old_u = new_u = bch2_alloc_unpack(k); - - percpu_down_read(&c->mark_lock); - g = gc_bucket(ca, iter->pos.offset); - gc_u = (struct bkey_alloc_unpacked) { - .dev = iter->pos.inode, - .bucket = iter->pos.offset, - .gen = g->mark.gen, - .oldest_gen = g->oldest_gen, - .data_type = g->mark.data_type, - .dirty_sectors = g->mark.dirty_sectors, - .cached_sectors = g->mark.cached_sectors, - .read_time = g->io_time[READ], - .write_time = g->io_time[WRITE], - .stripe = g->stripe, - .stripe_redundancy = g->stripe_redundancy, - }; - percpu_up_read(&c->mark_lock); - - if (metadata_only && - gc_u.data_type != BCH_DATA_sb && - gc_u.data_type != BCH_DATA_journal && - gc_u.data_type != BCH_DATA_btree) - return 0; - - if (!bkey_alloc_unpacked_cmp(old_u, gc_u) || - gen_after(old_u.gen, gc_u.gen)) - return 0; - -#define copy_bucket_field(_f) \ - if (fsck_err_on(new_u._f != gc_u._f, c, \ - "bucket %llu:%llu gen %u data type %s has wrong " #_f \ - ": got %u, should be %u", \ - iter->pos.inode, iter->pos.offset, \ - new_u.gen, \ - bch2_data_types[new_u.data_type], \ - new_u._f, gc_u._f)) \ - new_u._f = gc_u._f; \ - - copy_bucket_field(gen); - copy_bucket_field(data_type); - copy_bucket_field(stripe); - copy_bucket_field(dirty_sectors); - copy_bucket_field(cached_sectors); - copy_bucket_field(stripe_redundancy); - copy_bucket_field(stripe); -#undef copy_bucket_field - - new_u.oldest_gen = gc_u.oldest_gen; - - if (!bkey_alloc_unpacked_cmp(old_u, new_u)) - return 0; - - a = bch2_alloc_pack(trans, new_u); - if (IS_ERR(a)) - return PTR_ERR(a); - - ret = initial - ? bch2_journal_key_insert(c, BTREE_ID_alloc, 0, &a->k) - : bch2_trans_update(trans, iter, &a->k, BTREE_TRIGGER_NORUN); -fsck_err: - return ret; -} - -static int bch2_gc_alloc_done(struct bch_fs *c, bool initial, bool metadata_only) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bch_dev *ca; - unsigned i; - int ret = 0; - - bch2_trans_init(&trans, c, 0, 0); + gc_pos_set(c, gc_phase(GC_PHASE_START)); for_each_member_device(ca, c, i) { - for_each_btree_key(&trans, iter, BTREE_ID_alloc, - POS(ca->dev_idx, ca->mi.first_bucket), - BTREE_ITER_SLOTS| - BTREE_ITER_PREFETCH, k, ret) { - if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0) - break; + struct bucket_array *dst = __bucket_array(ca, 1); + struct bucket_array *src = __bucket_array(ca, 0); + size_t b; - ret = __bch2_trans_do(&trans, NULL, NULL, - BTREE_INSERT_LAZY_RW, - bch2_alloc_write_key(&trans, &iter, - initial, metadata_only)); - if (ret) - break; - } - bch2_trans_iter_exit(&trans, &iter); + dst->first_bucket = src->first_bucket; + dst->nbuckets = src->nbuckets; - if (ret) { - bch_err(c, "error writing alloc info: %i", ret); - percpu_ref_put(&ca->ref); - break; - } - } + for (b = 0; b < src->nbuckets; b++) { + struct bucket *d = &dst->b[b]; + struct bucket *s = &src->b[b]; - bch2_trans_exit(&trans); - return ret; -} - -static int bch2_gc_alloc_start(struct bch_fs *c, bool initial, bool metadata_only) -{ - struct bch_dev *ca; - unsigned i; + d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen; + d->gen_valid = s->gen_valid; - for_each_member_device(ca, c, i) { - struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) + - ca->mi.nbuckets * sizeof(struct bucket), - GFP_KERNEL|__GFP_ZERO); - if (!buckets) { - percpu_ref_put(&ca->ref); - percpu_up_write(&c->mark_lock); - bch_err(c, "error allocating ca->buckets[gc]"); - return -ENOMEM; + if (metadata_only && + (s->mark.data_type == BCH_DATA_user || + s->mark.data_type == BCH_DATA_cached)) + d->_mark = s->mark; } - - buckets->first_bucket = ca->mi.first_bucket; - buckets->nbuckets = ca->mi.nbuckets; - rcu_assign_pointer(ca->buckets[1], buckets); }; - return bch2_alloc_read(c, true, metadata_only); -} - -static void bch2_gc_alloc_reset(struct bch_fs *c, bool initial, bool metadata_only) -{ - struct bch_dev *ca; - unsigned i; - - for_each_member_device(ca, c, i) { - struct bucket_array *buckets = __bucket_array(ca, true); - struct bucket *g; + percpu_up_write(&c->mark_lock); - for_each_bucket(g, buckets) { - if (metadata_only && - (g->mark.data_type == BCH_DATA_user || - g->mark.data_type == BCH_DATA_cached || - g->mark.data_type == BCH_DATA_parity)) - continue; - g->_mark.dirty_sectors = 0; - g->_mark.cached_sectors = 0; - } - }; + return 0; } static int bch2_gc_reflink_done(struct bch_fs *c, bool initial, @@ -1535,55 +1430,6 @@ fsck_err: return ret; } -static void bch2_gc_reflink_reset(struct bch_fs *c, bool initial, - bool metadata_only) -{ - struct genradix_iter iter; - struct reflink_gc *r; - - genradix_for_each(&c->reflink_gc_table, iter, r) - r->refcount = 0; -} - -static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, - bool metadata_only) -{ - struct btree_trans trans; - struct btree_iter iter; - struct bkey_s_c k; - struct reflink_gc *r; - int ret = 0; - - if (metadata_only) - return 0; - - bch2_trans_init(&trans, c, 0, 0); - c->reflink_gc_nr = 0; - - for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { - const __le64 *refcount = bkey_refcount_c(k); - - if (!refcount) - continue; - - r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, - GFP_KERNEL); - if (!r) { - ret = -ENOMEM; - break; - } - - r->offset = k.k->p.offset; - r->size = k.k->size; - r->refcount = 0; - } - bch2_trans_iter_exit(&trans, &iter); - - bch2_trans_exit(&trans); - return ret; -} - static int bch2_gc_stripes_done(struct bch_fs *c, bool initial, bool metadata_only) { @@ -1647,10 +1493,43 @@ fsck_err: return ret; } -static void bch2_gc_stripes_reset(struct bch_fs *c, bool initial, - bool metadata_only) +static int bch2_gc_reflink_start(struct bch_fs *c, bool initial, + bool metadata_only) { - genradix_free(&c->gc_stripes); + struct btree_trans trans; + struct btree_iter iter; + struct bkey_s_c k; + struct reflink_gc *r; + int ret = 0; + + if (metadata_only) + return 0; + + bch2_trans_init(&trans, c, 0, 0); + c->reflink_gc_nr = 0; + + for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN, + BTREE_ITER_PREFETCH, k, ret) { + const __le64 *refcount = bkey_refcount_c(k); + + if (!refcount) + continue; + + r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++, + GFP_KERNEL); + if (!r) { + ret = -ENOMEM; + break; + } + + r->offset = k.k->p.offset; + r->size = k.k->size; + r->refcount = 0; + } + bch2_trans_iter_exit(&trans, &iter); + + bch2_trans_exit(&trans); + return ret; } /** @@ -1686,14 +1565,11 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) /* flush interior btree updates: */ closure_wait_event(&c->btree_interior_update_wait, !bch2_btree_interior_updates_nr_pending(c)); - +again: ret = bch2_gc_start(c, metadata_only) ?: - bch2_gc_alloc_start(c, initial, metadata_only) ?: bch2_gc_reflink_start(c, initial, metadata_only); if (ret) goto out; -again: - gc_pos_set(c, gc_phase(GC_PHASE_START)); bch2_mark_superblocks(c); @@ -1731,40 +1607,40 @@ again: if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) || (!iter && bch2_test_restart_gc)) { - if (iter++ > 2) { - bch_info(c, "Unable to fix bucket gens, looping"); - ret = -EINVAL; - goto out; - } - /* * XXX: make sure gens we fixed got saved */ - bch_info(c, "Second GC pass needed, restarting:"); - clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); - __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); + if (iter++ <= 2) { + bch_info(c, "Second GC pass needed, restarting:"); + clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags); + __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); + + percpu_down_write(&c->mark_lock); + bch2_gc_free(c); + percpu_up_write(&c->mark_lock); + /* flush fsck errors, reset counters */ + bch2_flush_fsck_errs(c); - bch2_gc_stripes_reset(c, initial, metadata_only); - bch2_gc_alloc_reset(c, initial, metadata_only); - bch2_gc_reflink_reset(c, initial, metadata_only); + goto again; + } - /* flush fsck errors, reset counters */ - bch2_flush_fsck_errs(c); - goto again; + bch_info(c, "Unable to fix bucket gens, looping"); + ret = -EINVAL; } out: if (!ret) { bch2_journal_block(&c->journal); - ret = bch2_gc_stripes_done(c, initial, metadata_only) ?: - bch2_gc_reflink_done(c, initial, metadata_only) ?: - bch2_gc_alloc_done(c, initial, metadata_only) ?: + percpu_down_write(&c->mark_lock); + ret = bch2_gc_reflink_done(c, initial, metadata_only) ?: + bch2_gc_stripes_done(c, initial, metadata_only) ?: bch2_gc_done(c, initial, metadata_only); bch2_journal_unblock(&c->journal); + } else { + percpu_down_write(&c->mark_lock); } - percpu_down_write(&c->mark_lock); /* Indicates that gc is no longer in progress: */ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); |