diff options
Diffstat (limited to 'libbcachefs/buckets.c')
-rw-r--r-- | libbcachefs/buckets.c | 371 |
1 files changed, 213 insertions, 158 deletions
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 60377630..401ff825 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -302,7 +302,7 @@ static inline int is_fragmented_bucket(struct bucket_mark m, static inline enum bch_data_type bucket_type(struct bucket_mark m) { return m.cached_sectors && !m.dirty_sectors - ? BCH_DATA_CACHED + ? BCH_DATA_CACHED : m.data_type; } @@ -322,6 +322,8 @@ void bch2_fs_usage_apply(struct bch_fs *c, s64 added = sum.data + sum.reserved; s64 should_not_have_added; + percpu_rwsem_assert_held(&c->usage_lock); + /* * Not allowed to reduce sectors_available except by getting a * reservation: @@ -338,7 +340,6 @@ void bch2_fs_usage_apply(struct bch_fs *c, stats->online_reserved -= added; } - percpu_down_read_preempt_disable(&c->usage_lock); /* online_reserved not subject to gc: */ this_cpu_ptr(c->usage[0])->online_reserved += stats->online_reserved; @@ -350,7 +351,6 @@ void bch2_fs_usage_apply(struct bch_fs *c, bch2_usage_add(this_cpu_ptr(c->usage[1]), stats); bch2_fs_stats_verify(c); - percpu_up_read_preempt_enable(&c->usage_lock); memset(stats, 0, sizeof(*stats)); } @@ -372,14 +372,14 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, dev_usage = this_cpu_ptr(ca->usage[gc]); - if (bucket_type(old) != bucket_type(new)) { - if (bucket_type(old)) { - fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size; - dev_usage->buckets[bucket_type(old)]--; - } else { - fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size; - dev_usage->buckets[bucket_type(new)]++; - } + if (bucket_type(old)) { + fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size; + dev_usage->buckets[bucket_type(old)]--; + } + + if (bucket_type(new)) { + fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size; + dev_usage->buckets[bucket_type(new)]++; } dev_usage->buckets_alloc += @@ -402,11 +402,28 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, bch2_dev_stats_verify(ca); } -#define bucket_data_cmpxchg(c, ca, stats, g, new, expr) \ +void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca) +{ + struct bucket_mark old = { .v.counter = 0 }; + struct bch_fs_usage *fs_usage; + struct bucket_array *buckets; + struct bucket *g; + + percpu_down_read_preempt_disable(&c->usage_lock); + fs_usage = this_cpu_ptr(c->usage[0]); + buckets = bucket_array(ca); + + for_each_bucket(g, buckets) + if (g->mark.data_type) + bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false); + percpu_up_read_preempt_enable(&c->usage_lock); +} + +#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \ ({ \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ \ - bch2_dev_usage_update(c, ca, stats, _old, new, gc); \ + bch2_dev_usage_update(c, ca, fs_usage, _old, new, gc); \ _old; \ }) @@ -486,12 +503,12 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, { struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]); struct bucket *g = __bucket(ca, b, gc); - struct bucket_mark old, new; + struct bucket_mark new; BUG_ON(type != BCH_DATA_SB && type != BCH_DATA_JOURNAL); - old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ + bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ new.data_type = type; checked_add(new.dirty_sectors, sectors); })); @@ -542,7 +559,7 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors) crc.uncompressed_size)); } -static s64 ptr_disk_sectors(struct bkey_s_c_extent e, +static s64 ptr_disk_sectors(const struct bkey *k, struct extent_ptr_decoded p, s64 sectors) { @@ -554,8 +571,8 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e, old_sectors = 0; new_sectors = sectors; } else { - old_sectors = e.k->size; - new_sectors = e.k->size + sectors; + old_sectors = k->size; + new_sectors = k->size + sectors; } sectors = -__disk_sectors(p.crc, old_sectors) @@ -571,7 +588,6 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e, * that with the gc pos seqlock held. */ static void bch2_mark_pointer(struct bch_fs *c, - struct bkey_s_c_extent e, struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type, struct bch_fs_usage *fs_usage, @@ -630,23 +646,25 @@ static void bch2_mark_pointer(struct bch_fs *c, BUG_ON(!gc && bucket_became_unavailable(old, new)); } -static void bch2_mark_stripe_ptr(struct bch_fs *c, - struct bch_extent_stripe_ptr p, - s64 sectors, unsigned flags, - s64 *adjusted_disk_sectors, - unsigned *redundancy) +static int bch2_mark_stripe_ptr(struct bch_fs *c, + struct bch_extent_stripe_ptr p, + s64 sectors, unsigned flags, + s64 *adjusted_disk_sectors, + unsigned *redundancy, + bool gc) { - struct ec_stripe *m; + struct stripe *m; unsigned old, new, nr_data; int blocks_nonempty_delta; s64 parity_sectors; - m = genradix_ptr(&c->ec_stripes, p.idx); - if (WARN_ON(!m)) - return; + m = genradix_ptr(&c->stripes[gc], p.idx); - if (WARN_ON(!m->alive)) - return; + if (!m || !m->alive) { + bch_err_ratelimited(c, "pointer to nonexistent stripe %llu", + (u64) p.idx); + return -1; + } nr_data = m->nr_blocks - m->nr_redundant; @@ -664,81 +682,74 @@ static void bch2_mark_stripe_ptr(struct bch_fs *c, blocks_nonempty_delta = (int) !!new - (int) !!old; if (!blocks_nonempty_delta) - return; + return 0; atomic_add(blocks_nonempty_delta, &m->blocks_nonempty); BUG_ON(atomic_read(&m->blocks_nonempty) < 0); - bch2_stripes_heap_update(c, m, p.idx); + if (!gc) + bch2_stripes_heap_update(c, m, p.idx); + + return 0; } -static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, - s64 sectors, enum bch_data_type data_type, - struct bch_fs_usage *stats, - u64 journal_seq, unsigned flags, - bool gc) +static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, + s64 sectors, enum bch_data_type data_type, + struct bch_fs_usage *stats, + u64 journal_seq, unsigned flags, + bool gc) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + s64 cached_sectors = 0; + s64 dirty_sectors = 0; + s64 ec_sectors = 0; + unsigned replicas = 0; + unsigned ec_redundancy = 0; + unsigned i; + int ret; + BUG_ON(!sectors); - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - s64 cached_sectors = 0; - s64 dirty_sectors = 0; - s64 ec_sectors = 0; - unsigned replicas = 0; - unsigned ec_redundancy = 0; - unsigned i; - - extent_for_each_ptr_decode(e, p, entry) { - s64 disk_sectors = ptr_disk_sectors(e, p, sectors); - s64 adjusted_disk_sectors = disk_sectors; - - bch2_mark_pointer(c, e, p, disk_sectors, data_type, - stats, journal_seq, flags, gc); - - if (!p.ptr.cached) - for (i = 0; i < p.ec_nr; i++) - bch2_mark_stripe_ptr(c, p.ec[i], - disk_sectors, flags, - &adjusted_disk_sectors, - &ec_redundancy); - if (!p.ptr.cached) - replicas++; - - if (p.ptr.cached) - cached_sectors += adjusted_disk_sectors; - else if (!p.ec_nr) - dirty_sectors += adjusted_disk_sectors; - else - ec_sectors += adjusted_disk_sectors; - } + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + s64 disk_sectors = ptr_disk_sectors(k.k, p, sectors); + s64 adjusted_disk_sectors = disk_sectors; - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); - ec_redundancy = clamp_t(unsigned, ec_redundancy, - 1, ARRAY_SIZE(stats->replicas)); + bch2_mark_pointer(c, p, disk_sectors, data_type, + stats, journal_seq, flags, gc); - stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; - stats->replicas[replicas - 1].data[data_type] += dirty_sectors; - stats->replicas[ec_redundancy - 1].ec_data += ec_sectors; - break; + if (!p.ptr.cached) + for (i = 0; i < p.ec_nr; i++) { + ret = bch2_mark_stripe_ptr(c, p.ec[i], + disk_sectors, flags, + &adjusted_disk_sectors, + &ec_redundancy, gc); + if (ret) + return ret; + } + if (!p.ptr.cached) + replicas++; + + if (p.ptr.cached) + cached_sectors += adjusted_disk_sectors; + else if (!p.ec_nr) + dirty_sectors += adjusted_disk_sectors; + else + ec_sectors += adjusted_disk_sectors; } - case BCH_RESERVATION: { - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - sectors *= replicas; - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + ec_redundancy = clamp_t(unsigned, ec_redundancy, + 1, ARRAY_SIZE(stats->replicas)); - stats->replicas[replicas - 1].persistent_reserved += sectors; - break; - } - } + stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; + stats->replicas[replicas - 1].data[data_type] += dirty_sectors; + stats->replicas[ec_redundancy - 1].ec_data += ec_sectors; + + return 0; } static void bucket_set_stripe(struct bch_fs *c, @@ -759,7 +770,7 @@ static void bucket_set_stripe(struct bch_fs *c, BUG_ON(ptr_stale(ca, ptr)); - old = bucket_cmpxchg(g, new, ({ + old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ new.stripe = enabled; if (journal_seq) { new.journal_seq_valid = 1; @@ -768,103 +779,143 @@ static void bucket_set_stripe(struct bch_fs *c, })); BUG_ON(old.stripe == enabled); - - bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); } } -static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, - bool inserting, - struct bch_fs_usage *fs_usage, - u64 journal_seq, unsigned flags, - bool gc) +static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, + bool inserting, + struct bch_fs_usage *fs_usage, + u64 journal_seq, unsigned flags, + bool gc) { - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - size_t idx = s.k->p.offset; - struct ec_stripe *m = genradix_ptr(&c->ec_stripes, idx); - unsigned i; + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + size_t idx = s.k->p.offset; + struct stripe *m = genradix_ptr(&c->stripes[gc], idx); + unsigned i; - BUG_ON(!m); - BUG_ON(m->alive == inserting); + if (!m || (!inserting && !m->alive)) { + bch_err_ratelimited(c, "error marking nonexistent stripe %zu", + idx); + return -1; + } - BUG_ON(atomic_read(&m->blocks_nonempty)); + if (inserting && m->alive) { + bch_err_ratelimited(c, "error marking stripe %zu: already exists", + idx); + return -1; + } - for (i = 0; i < EC_STRIPE_MAX; i++) - BUG_ON(atomic_read(&m->block_sectors[i])); + BUG_ON(atomic_read(&m->blocks_nonempty)); - if (inserting) { - m->sectors = le16_to_cpu(s.v->sectors); - m->algorithm = s.v->algorithm; - m->nr_blocks = s.v->nr_blocks; - m->nr_redundant = s.v->nr_redundant; - } + for (i = 0; i < EC_STRIPE_MAX; i++) + BUG_ON(atomic_read(&m->block_sectors[i])); + + if (inserting) { + m->sectors = le16_to_cpu(s.v->sectors); + m->algorithm = s.v->algorithm; + m->nr_blocks = s.v->nr_blocks; + m->nr_redundant = s.v->nr_redundant; + } + if (!gc) { if (inserting) bch2_stripes_heap_insert(c, m, idx); else bch2_stripes_heap_del(c, m, idx); - - bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc); - break; - } + } else { + m->alive = inserting; } + + bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc); + return 0; } -static void __bch2_mark_key(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, - bool inserting, s64 sectors, - struct bch_fs_usage *stats, - u64 journal_seq, unsigned flags, - bool gc) +static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, + bool inserting, s64 sectors, + struct bch_fs_usage *stats, + u64 journal_seq, unsigned flags, + bool gc) { - switch (type) { - case BKEY_TYPE_BTREE: - bch2_mark_extent(c, k, inserting - ? c->opts.btree_node_size - : -c->opts.btree_node_size, - BCH_DATA_BTREE, - stats, journal_seq, flags, gc); + int ret = 0; + + switch (k.k->type) { + case KEY_TYPE_btree_ptr: + ret = bch2_mark_extent(c, k, inserting + ? c->opts.btree_node_size + : -c->opts.btree_node_size, + BCH_DATA_BTREE, + stats, journal_seq, flags, gc); break; - case BKEY_TYPE_EXTENTS: - bch2_mark_extent(c, k, sectors, BCH_DATA_USER, - stats, journal_seq, flags, gc); + case KEY_TYPE_extent: + ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER, + stats, journal_seq, flags, gc); break; - case BKEY_TYPE_EC: - bch2_mark_stripe(c, k, inserting, - stats, journal_seq, flags, gc); + case KEY_TYPE_stripe: + ret = bch2_mark_stripe(c, k, inserting, + stats, journal_seq, flags, gc); + break; + case KEY_TYPE_reservation: { + unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; + + sectors *= replicas; + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + + stats->replicas[replicas - 1].persistent_reserved += sectors; break; + } default: break; } + + return ret; } -void bch2_mark_key(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, +int bch2_mark_key_locked(struct bch_fs *c, + struct bkey_s_c k, bool inserting, s64 sectors, struct gc_pos pos, struct bch_fs_usage *stats, u64 journal_seq, unsigned flags) { - percpu_down_read_preempt_disable(&c->usage_lock); + int ret; if (!(flags & BCH_BUCKET_MARK_GC)) { if (!stats) stats = this_cpu_ptr(c->usage[0]); - __bch2_mark_key(c, type, k, inserting, sectors, - stats, journal_seq, flags, false); + ret = __bch2_mark_key(c, k, inserting, sectors, + stats, journal_seq, flags, false); + if (ret) + return ret; } if ((flags & BCH_BUCKET_MARK_GC) || gc_visited(c, pos)) { - __bch2_mark_key(c, type, k, inserting, sectors, - this_cpu_ptr(c->usage[1]), - journal_seq, flags, true); + ret = __bch2_mark_key(c, k, inserting, sectors, + this_cpu_ptr(c->usage[1]), + journal_seq, flags, true); + if (ret) + return ret; } + return 0; +} + +int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, + bool inserting, s64 sectors, + struct gc_pos pos, + struct bch_fs_usage *stats, + u64 journal_seq, unsigned flags) +{ + int ret; + + percpu_down_read_preempt_disable(&c->usage_lock); + ret = bch2_mark_key_locked(c, k, inserting, sectors, + pos, stats, journal_seq, flags); percpu_up_read_preempt_enable(&c->usage_lock); + + return ret; } void bch2_mark_update(struct btree_insert *trans, @@ -878,15 +929,19 @@ void bch2_mark_update(struct btree_insert *trans, struct gc_pos pos = gc_pos_btree_node(b); struct bkey_packed *_k; + if (!btree_node_type_needs_gc(iter->btree_id)) + return; + + percpu_down_read_preempt_disable(&c->usage_lock); + if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) - bch2_mark_key(c, btree_node_type(b), bkey_i_to_s_c(insert->k), - true, - bpos_min(insert->k->k.p, b->key.k.p).offset - - bkey_start_offset(&insert->k->k), - pos, &stats, trans->journal_res.seq, 0); + bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, + bpos_min(insert->k->k.p, b->key.k.p).offset - + bkey_start_offset(&insert->k->k), + pos, &stats, trans->journal_res.seq, 0); while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, - KEY_TYPE_DISCARD))) { + KEY_TYPE_discard))) { struct bkey unpacked; struct bkey_s_c k; s64 sectors = 0; @@ -915,9 +970,8 @@ void bch2_mark_update(struct btree_insert *trans, sectors = k.k->p.offset - insert->k->k.p.offset; BUG_ON(sectors <= 0); - bch2_mark_key(c, btree_node_type(b), k, - true, sectors, - pos, &stats, trans->journal_res.seq, 0); + bch2_mark_key_locked(c, k, true, sectors, + pos, &stats, trans->journal_res.seq, 0); sectors = bkey_start_offset(&insert->k->k) - k.k->p.offset; @@ -927,14 +981,15 @@ void bch2_mark_update(struct btree_insert *trans, BUG_ON(sectors >= 0); } - bch2_mark_key(c, btree_node_type(b), k, - false, sectors, - pos, &stats, trans->journal_res.seq, 0); + bch2_mark_key_locked(c, k, false, sectors, + pos, &stats, trans->journal_res.seq, 0); bch2_btree_node_iter_advance(&node_iter, b); } bch2_fs_usage_apply(c, &stats, trans->disk_res, pos); + + percpu_up_read_preempt_enable(&c->usage_lock); } /* Disk reservations: */ |