diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-03-13 19:21:13 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2022-03-13 19:21:13 -0400 |
commit | 3765483ff0cf9abd0243fcafe11aebd0f9beb03d (patch) | |
tree | 232ba4c4c17b2c2579782b01422ae68994c9b5bf /libbcachefs/alloc_foreground.c | |
parent | d34e731082d8fcd710c2af6377a3b7fa927c8451 (diff) |
Update bcachefs sources to f05b3c1af9 bcachefs: Improve bucket_alloc_fail tracepoint
Diffstat (limited to 'libbcachefs/alloc_foreground.c')
-rw-r--r-- | libbcachefs/alloc_foreground.c | 358 |
1 files changed, 287 insertions, 71 deletions
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 9b81ed2..178d7c0 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -14,13 +14,18 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" +#include "btree_iter.h" +#include "btree_update.h" #include "btree_gc.h" #include "buckets.h" +#include "buckets_waiting_for_journal.h" #include "clock.h" #include "debug.h" #include "disk_groups.h" #include "ec.h" +#include "error.h" #include "io.h" +#include "journal.h" #include <linux/math64.h> #include <linux/rculist.h> @@ -78,7 +83,6 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) percpu_down_read(&c->mark_lock); spin_lock(&ob->lock); - bch2_mark_alloc_bucket(c, ca, ob->bucket, false); ob->valid = false; ob->data_type = 0; @@ -178,39 +182,28 @@ static inline unsigned open_buckets_reserved(enum alloc_reserve reserve) } } -/** - * bch_bucket_alloc - allocate a single bucket from a specific device - * - * Returns index of bucket on success, 0 on failure - * */ -struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, - enum alloc_reserve reserve, - bool may_alloc_partial, - struct closure *cl) +static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, + enum alloc_reserve reserve, + struct bkey_alloc_unpacked a, + size_t *need_journal_commit, + struct closure *cl) { struct open_bucket *ob; - long b = 0; - spin_lock(&c->freelist_lock); + if (unlikely(ca->buckets_nouse && test_bit(a.bucket, ca->buckets_nouse))) + return NULL; - if (may_alloc_partial) { - int i; - - for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) { - ob = c->open_buckets + ca->open_buckets_partial[i]; - - if (reserve <= ob->alloc_reserve) { - array_remove_item(ca->open_buckets_partial, - ca->open_buckets_partial_nr, - i); - ob->on_partial_list = false; - ob->alloc_reserve = reserve; - spin_unlock(&c->freelist_lock); - return ob; - } - } + if (bch2_bucket_is_open(c, ca->dev_idx, a.bucket)) + return NULL; + + if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, + c->journal.flushed_seq_ondisk, ca->dev_idx, a.bucket)) { + (*need_journal_commit)++; + return NULL; } + spin_lock(&c->freelist_lock); + if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) { if (cl) closure_wait(&c->open_buckets_wait, cl); @@ -219,36 +212,17 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, c->blocked_allocate_open_bucket = local_clock(); spin_unlock(&c->freelist_lock); + trace_open_bucket_alloc_fail(ca, reserve); return ERR_PTR(-OPEN_BUCKETS_EMPTY); } - if (likely(fifo_pop(&ca->free[RESERVE_NONE], b))) - goto out; - - switch (reserve) { - case RESERVE_BTREE_MOVINGGC: - case RESERVE_MOVINGGC: - if (fifo_pop(&ca->free[RESERVE_MOVINGGC], b)) - goto out; - break; - default: - break; + /* Recheck under lock: */ + if (bch2_bucket_is_open(c, ca->dev_idx, a.bucket)) { + spin_unlock(&c->freelist_lock); + return NULL; } - if (cl) - closure_wait(&c->freelist_wait, cl); - - if (!c->blocked_allocate) - c->blocked_allocate = local_clock(); - - spin_unlock(&c->freelist_lock); - - trace_bucket_alloc_fail(ca, reserve); - return ERR_PTR(-FREELIST_EMPTY); -out: - verify_not_on_freelist(c, ca, b); - ob = bch2_open_bucket_alloc(c); spin_lock(&ob->lock); @@ -257,8 +231,8 @@ out: ob->sectors_free = ca->mi.bucket_size; ob->alloc_reserve = reserve; ob->dev = ca->dev_idx; - ob->gen = *bucket_gen(ca, b); - ob->bucket = b; + ob->gen = a.gen; + ob->bucket = a.bucket; spin_unlock(&ob->lock); ca->nr_open_buckets++; @@ -280,12 +254,246 @@ out: spin_unlock(&c->freelist_lock); - bch2_wake_allocator(ca); - trace_bucket_alloc(ca, reserve); return ob; } +static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca, + enum alloc_reserve reserve, u64 free_entry, + size_t *need_journal_commit, + struct closure *cl) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter; + struct bkey_s_c k; + struct open_bucket *ob; + struct bkey_alloc_unpacked a; + u64 b = free_entry & ~(~0ULL << 56); + unsigned genbits = free_entry >> 56; + struct printbuf buf = PRINTBUF; + int ret; + + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, b), BTREE_ITER_CACHED); + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) { + ob = ERR_PTR(ret); + goto err; + } + + a = bch2_alloc_unpack(k); + + if (bch2_fs_inconsistent_on(bucket_state(a) != BUCKET_free, c, + "non free bucket in freespace btree (state %s)\n" + " %s\n" + " at %llu (genbits %u)", + bch2_bucket_states[bucket_state(a)], + (bch2_bkey_val_to_text(&buf, c, k), buf.buf), + free_entry, genbits)) { + ob = ERR_PTR(-EIO); + goto err; + } + + if (bch2_fs_inconsistent_on(genbits != (alloc_freespace_genbits(a) >> 56), c, + "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" + " %s", + genbits, alloc_freespace_genbits(a) >> 56, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { + ob = ERR_PTR(-EIO); + goto err; + } + + if (bch2_fs_inconsistent_on(b < ca->mi.first_bucket || b >= ca->mi.nbuckets, c, + "freespace btree has bucket outside allowed range (got %llu, valid %u-%llu)", + b, ca->mi.first_bucket, ca->mi.nbuckets)) { + ob = ERR_PTR(-EIO); + goto err; + } + + ob = __try_alloc_bucket(c, ca, reserve, a, need_journal_commit, cl); +err: + bch2_trans_iter_exit(trans, &iter); + printbuf_exit(&buf); + return ob; +} + +static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch_dev *ca, + enum alloc_reserve reserve) +{ + struct open_bucket *ob; + int i; + + spin_lock(&c->freelist_lock); + + for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) { + ob = c->open_buckets + ca->open_buckets_partial[i]; + + if (reserve <= ob->alloc_reserve) { + array_remove_item(ca->open_buckets_partial, + ca->open_buckets_partial_nr, + i); + ob->on_partial_list = false; + ob->alloc_reserve = reserve; + spin_unlock(&c->freelist_lock); + return ob; + } + } + + spin_unlock(&c->freelist_lock); + return NULL; +} + +/* + * This path is for before the freespace btree is initialized: + * + * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock & + * journal buckets - journal buckets will be < ca->new_fs_bucket_idx + */ +static noinline struct open_bucket * +bch2_bucket_alloc_trans_early(struct btree_trans *trans, + struct bch_dev *ca, + enum alloc_reserve reserve, + u64 *b, + size_t *need_journal_commit, + struct closure *cl) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct open_bucket *ob = NULL; + int ret; + + *b = max_t(u64, *b, ca->mi.first_bucket); + *b = max_t(u64, *b, ca->new_fs_bucket_idx); + + for_each_btree_key(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *b), + BTREE_ITER_SLOTS, k, ret) { + struct bkey_alloc_unpacked a; + + if (bkey_cmp(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0) + break; + + if (ca->new_fs_bucket_idx && + is_superblock_bucket(ca, k.k->p.offset)) + continue; + + a = bch2_alloc_unpack(k); + + if (bucket_state(a) != BUCKET_free) + continue; + + ob = __try_alloc_bucket(trans->c, ca, reserve, a, + need_journal_commit, cl); + if (ob) + break; + } + bch2_trans_iter_exit(trans, &iter); + + *b = iter.pos.offset; + + return ob ?: ERR_PTR(ret ?: -FREELIST_EMPTY); +} + +static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, + struct bch_dev *ca, + enum alloc_reserve reserve, + u64 *b, + size_t *need_journal_commit, + struct closure *cl) +{ + struct btree_iter iter; + struct bkey_s_c k; + struct open_bucket *ob = NULL; + int ret; + + if (unlikely(!ca->mi.freespace_initialized)) + return bch2_bucket_alloc_trans_early(trans, ca, reserve, b, + need_journal_commit, cl); + + BUG_ON(ca->new_fs_bucket_idx); + + for_each_btree_key(trans, iter, BTREE_ID_freespace, + POS(ca->dev_idx, *b), 0, k, ret) { + if (k.k->p.inode != ca->dev_idx) + break; + + for (*b = max(*b, bkey_start_offset(k.k)); + *b != k.k->p.offset && !ob; + (*b)++) { + if (btree_trans_too_many_iters(trans)) { + ob = ERR_PTR(-EINTR); + break; + } + + ob = try_alloc_bucket(trans, ca, reserve, *b, + need_journal_commit, cl); + } + if (ob) + break; + } + bch2_trans_iter_exit(trans, &iter); + + return ob ?: ERR_PTR(ret); +} + +/** + * bch_bucket_alloc - allocate a single bucket from a specific device + * + * Returns index of bucket on success, 0 on failure + * */ +struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, + enum alloc_reserve reserve, + bool may_alloc_partial, + struct closure *cl) +{ + struct open_bucket *ob = NULL; + size_t need_journal_commit = 0; + u64 avail = dev_buckets_available(ca, reserve); + u64 b = 0; + int ret; + + if (may_alloc_partial) { + ob = try_alloc_partial_bucket(c, ca, reserve); + if (ob) + return ob; + } +again: + if (!avail) { + if (cl) { + closure_wait(&c->freelist_wait, cl); + /* recheck after putting ourself on waitlist */ + avail = dev_buckets_available(ca, reserve); + if (avail) { + closure_wake_up(&c->freelist_wait); + goto again; + } + } + + if (!c->blocked_allocate) + c->blocked_allocate = local_clock(); + + ob = ERR_PTR(-FREELIST_EMPTY); + goto err; + } + + ret = bch2_trans_do(c, NULL, NULL, 0, + PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, + ca, reserve, &b, + &need_journal_commit, cl))); + + if (need_journal_commit * 2 > avail) + bch2_journal_flush_async(&c->journal, NULL); +err: + if (!ob) + ob = ERR_PTR(ret ?: -FREELIST_EMPTY); + + if (ob == ERR_PTR(-FREELIST_EMPTY)) { + trace_bucket_alloc_fail(ca, reserve, avail, need_journal_commit); + atomic_long_inc(&c->bucket_alloc_fail); + } + + return ob; +} + static int __dev_stripe_cmp(struct dev_stripe_state *stripe, unsigned l, unsigned r) { @@ -313,7 +521,7 @@ void bch2_dev_stripe_increment(struct bch_dev *ca, struct dev_stripe_state *stripe) { u64 *v = stripe->next_alloc + ca->dev_idx; - u64 free_space = dev_buckets_available(ca); + u64 free_space = dev_buckets_available(ca, RESERVE_NONE); u64 free_space_inv = free_space ? div64_u64(1ULL << 48, free_space) : 1ULL << 48; @@ -364,6 +572,7 @@ int bch2_bucket_alloc_set(struct bch_fs *c, { struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc); + unsigned dev; struct bch_dev *ca; int ret = -INSUFFICIENT_DEVICES; unsigned i; @@ -373,30 +582,43 @@ int bch2_bucket_alloc_set(struct bch_fs *c, for (i = 0; i < devs_sorted.nr; i++) { struct open_bucket *ob; - ca = rcu_dereference(c->devs[devs_sorted.devs[i]]); + dev = devs_sorted.devs[i]; + + rcu_read_lock(); + ca = rcu_dereference(c->devs[dev]); + if (ca) + percpu_ref_get(&ca->ref); + rcu_read_unlock(); + if (!ca) continue; - if (!ca->mi.durability && *have_cache) + if (!ca->mi.durability && *have_cache) { + percpu_ref_put(&ca->ref); continue; + } ob = bch2_bucket_alloc(c, ca, reserve, flags & BUCKET_MAY_ALLOC_PARTIAL, cl); + if (!IS_ERR(ob)) + bch2_dev_stripe_increment(ca, stripe); + percpu_ref_put(&ca->ref); + if (IS_ERR(ob)) { ret = PTR_ERR(ob); if (cl) - return ret; + break; continue; } add_new_bucket(c, ptrs, devs_may_alloc, nr_effective, have_cache, flags, ob); - bch2_dev_stripe_increment(ca, stripe); - - if (*nr_effective >= nr_replicas) - return 0; + if (*nr_effective >= nr_replicas) { + ret = 0; + break; + } } return ret; @@ -564,9 +786,6 @@ static int open_bucket_add_buckets(struct bch_fs *c, if (*nr_effective >= nr_replicas) return 0; - percpu_down_read(&c->mark_lock); - rcu_read_lock(); - retry_blocking: /* * Try nonblocking first, so that if one device is full we'll try from @@ -580,9 +799,6 @@ retry_blocking: goto retry_blocking; } - rcu_read_unlock(); - percpu_up_read(&c->mark_lock); - return ret; } @@ -863,7 +1079,7 @@ err: case -INSUFFICIENT_DEVICES: return ERR_PTR(-EROFS); default: - BUG(); + return ERR_PTR(ret); } } |