summaryrefslogtreecommitdiff
path: root/libbcachefs/alloc_foreground.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-03-13 19:21:13 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2022-03-13 19:21:13 -0400
commit3765483ff0cf9abd0243fcafe11aebd0f9beb03d (patch)
tree232ba4c4c17b2c2579782b01422ae68994c9b5bf /libbcachefs/alloc_foreground.c
parentd34e731082d8fcd710c2af6377a3b7fa927c8451 (diff)
Update bcachefs sources to f05b3c1af9 bcachefs: Improve bucket_alloc_fail tracepoint
Diffstat (limited to 'libbcachefs/alloc_foreground.c')
-rw-r--r--libbcachefs/alloc_foreground.c358
1 files changed, 287 insertions, 71 deletions
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 9b81ed2..178d7c0 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -14,13 +14,18 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "btree_iter.h"
+#include "btree_update.h"
#include "btree_gc.h"
#include "buckets.h"
+#include "buckets_waiting_for_journal.h"
#include "clock.h"
#include "debug.h"
#include "disk_groups.h"
#include "ec.h"
+#include "error.h"
#include "io.h"
+#include "journal.h"
#include <linux/math64.h>
#include <linux/rculist.h>
@@ -78,7 +83,6 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
percpu_down_read(&c->mark_lock);
spin_lock(&ob->lock);
- bch2_mark_alloc_bucket(c, ca, ob->bucket, false);
ob->valid = false;
ob->data_type = 0;
@@ -178,39 +182,28 @@ static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
}
}
-/**
- * bch_bucket_alloc - allocate a single bucket from a specific device
- *
- * Returns index of bucket on success, 0 on failure
- * */
-struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
- enum alloc_reserve reserve,
- bool may_alloc_partial,
- struct closure *cl)
+static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
+ enum alloc_reserve reserve,
+ struct bkey_alloc_unpacked a,
+ size_t *need_journal_commit,
+ struct closure *cl)
{
struct open_bucket *ob;
- long b = 0;
- spin_lock(&c->freelist_lock);
+ if (unlikely(ca->buckets_nouse && test_bit(a.bucket, ca->buckets_nouse)))
+ return NULL;
- if (may_alloc_partial) {
- int i;
-
- for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
- ob = c->open_buckets + ca->open_buckets_partial[i];
-
- if (reserve <= ob->alloc_reserve) {
- array_remove_item(ca->open_buckets_partial,
- ca->open_buckets_partial_nr,
- i);
- ob->on_partial_list = false;
- ob->alloc_reserve = reserve;
- spin_unlock(&c->freelist_lock);
- return ob;
- }
- }
+ if (bch2_bucket_is_open(c, ca->dev_idx, a.bucket))
+ return NULL;
+
+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+ c->journal.flushed_seq_ondisk, ca->dev_idx, a.bucket)) {
+ (*need_journal_commit)++;
+ return NULL;
}
+ spin_lock(&c->freelist_lock);
+
if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
if (cl)
closure_wait(&c->open_buckets_wait, cl);
@@ -219,36 +212,17 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
c->blocked_allocate_open_bucket = local_clock();
spin_unlock(&c->freelist_lock);
+
trace_open_bucket_alloc_fail(ca, reserve);
return ERR_PTR(-OPEN_BUCKETS_EMPTY);
}
- if (likely(fifo_pop(&ca->free[RESERVE_NONE], b)))
- goto out;
-
- switch (reserve) {
- case RESERVE_BTREE_MOVINGGC:
- case RESERVE_MOVINGGC:
- if (fifo_pop(&ca->free[RESERVE_MOVINGGC], b))
- goto out;
- break;
- default:
- break;
+ /* Recheck under lock: */
+ if (bch2_bucket_is_open(c, ca->dev_idx, a.bucket)) {
+ spin_unlock(&c->freelist_lock);
+ return NULL;
}
- if (cl)
- closure_wait(&c->freelist_wait, cl);
-
- if (!c->blocked_allocate)
- c->blocked_allocate = local_clock();
-
- spin_unlock(&c->freelist_lock);
-
- trace_bucket_alloc_fail(ca, reserve);
- return ERR_PTR(-FREELIST_EMPTY);
-out:
- verify_not_on_freelist(c, ca, b);
-
ob = bch2_open_bucket_alloc(c);
spin_lock(&ob->lock);
@@ -257,8 +231,8 @@ out:
ob->sectors_free = ca->mi.bucket_size;
ob->alloc_reserve = reserve;
ob->dev = ca->dev_idx;
- ob->gen = *bucket_gen(ca, b);
- ob->bucket = b;
+ ob->gen = a.gen;
+ ob->bucket = a.bucket;
spin_unlock(&ob->lock);
ca->nr_open_buckets++;
@@ -280,12 +254,246 @@ out:
spin_unlock(&c->freelist_lock);
- bch2_wake_allocator(ca);
-
trace_bucket_alloc(ca, reserve);
return ob;
}
+static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca,
+ enum alloc_reserve reserve, u64 free_entry,
+ size_t *need_journal_commit,
+ struct closure *cl)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct open_bucket *ob;
+ struct bkey_alloc_unpacked a;
+ u64 b = free_entry & ~(~0ULL << 56);
+ unsigned genbits = free_entry >> 56;
+ struct printbuf buf = PRINTBUF;
+ int ret;
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, b), BTREE_ITER_CACHED);
+ k = bch2_btree_iter_peek_slot(&iter);
+ ret = bkey_err(k);
+ if (ret) {
+ ob = ERR_PTR(ret);
+ goto err;
+ }
+
+ a = bch2_alloc_unpack(k);
+
+ if (bch2_fs_inconsistent_on(bucket_state(a) != BUCKET_free, c,
+ "non free bucket in freespace btree (state %s)\n"
+ " %s\n"
+ " at %llu (genbits %u)",
+ bch2_bucket_states[bucket_state(a)],
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+ free_entry, genbits)) {
+ ob = ERR_PTR(-EIO);
+ goto err;
+ }
+
+ if (bch2_fs_inconsistent_on(genbits != (alloc_freespace_genbits(a) >> 56), c,
+ "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
+ " %s",
+ genbits, alloc_freespace_genbits(a) >> 56,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+ ob = ERR_PTR(-EIO);
+ goto err;
+ }
+
+ if (bch2_fs_inconsistent_on(b < ca->mi.first_bucket || b >= ca->mi.nbuckets, c,
+ "freespace btree has bucket outside allowed range (got %llu, valid %u-%llu)",
+ b, ca->mi.first_bucket, ca->mi.nbuckets)) {
+ ob = ERR_PTR(-EIO);
+ goto err;
+ }
+
+ ob = __try_alloc_bucket(c, ca, reserve, a, need_journal_commit, cl);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ printbuf_exit(&buf);
+ return ob;
+}
+
+static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch_dev *ca,
+ enum alloc_reserve reserve)
+{
+ struct open_bucket *ob;
+ int i;
+
+ spin_lock(&c->freelist_lock);
+
+ for (i = ca->open_buckets_partial_nr - 1; i >= 0; --i) {
+ ob = c->open_buckets + ca->open_buckets_partial[i];
+
+ if (reserve <= ob->alloc_reserve) {
+ array_remove_item(ca->open_buckets_partial,
+ ca->open_buckets_partial_nr,
+ i);
+ ob->on_partial_list = false;
+ ob->alloc_reserve = reserve;
+ spin_unlock(&c->freelist_lock);
+ return ob;
+ }
+ }
+
+ spin_unlock(&c->freelist_lock);
+ return NULL;
+}
+
+/*
+ * This path is for before the freespace btree is initialized:
+ *
+ * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock &
+ * journal buckets - journal buckets will be < ca->new_fs_bucket_idx
+ */
+static noinline struct open_bucket *
+bch2_bucket_alloc_trans_early(struct btree_trans *trans,
+ struct bch_dev *ca,
+ enum alloc_reserve reserve,
+ u64 *b,
+ size_t *need_journal_commit,
+ struct closure *cl)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct open_bucket *ob = NULL;
+ int ret;
+
+ *b = max_t(u64, *b, ca->mi.first_bucket);
+ *b = max_t(u64, *b, ca->new_fs_bucket_idx);
+
+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *b),
+ BTREE_ITER_SLOTS, k, ret) {
+ struct bkey_alloc_unpacked a;
+
+ if (bkey_cmp(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
+ break;
+
+ if (ca->new_fs_bucket_idx &&
+ is_superblock_bucket(ca, k.k->p.offset))
+ continue;
+
+ a = bch2_alloc_unpack(k);
+
+ if (bucket_state(a) != BUCKET_free)
+ continue;
+
+ ob = __try_alloc_bucket(trans->c, ca, reserve, a,
+ need_journal_commit, cl);
+ if (ob)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ *b = iter.pos.offset;
+
+ return ob ?: ERR_PTR(ret ?: -FREELIST_EMPTY);
+}
+
+static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
+ struct bch_dev *ca,
+ enum alloc_reserve reserve,
+ u64 *b,
+ size_t *need_journal_commit,
+ struct closure *cl)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct open_bucket *ob = NULL;
+ int ret;
+
+ if (unlikely(!ca->mi.freespace_initialized))
+ return bch2_bucket_alloc_trans_early(trans, ca, reserve, b,
+ need_journal_commit, cl);
+
+ BUG_ON(ca->new_fs_bucket_idx);
+
+ for_each_btree_key(trans, iter, BTREE_ID_freespace,
+ POS(ca->dev_idx, *b), 0, k, ret) {
+ if (k.k->p.inode != ca->dev_idx)
+ break;
+
+ for (*b = max(*b, bkey_start_offset(k.k));
+ *b != k.k->p.offset && !ob;
+ (*b)++) {
+ if (btree_trans_too_many_iters(trans)) {
+ ob = ERR_PTR(-EINTR);
+ break;
+ }
+
+ ob = try_alloc_bucket(trans, ca, reserve, *b,
+ need_journal_commit, cl);
+ }
+ if (ob)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ob ?: ERR_PTR(ret);
+}
+
+/**
+ * bch_bucket_alloc - allocate a single bucket from a specific device
+ *
+ * Returns index of bucket on success, 0 on failure
+ * */
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+ enum alloc_reserve reserve,
+ bool may_alloc_partial,
+ struct closure *cl)
+{
+ struct open_bucket *ob = NULL;
+ size_t need_journal_commit = 0;
+ u64 avail = dev_buckets_available(ca, reserve);
+ u64 b = 0;
+ int ret;
+
+ if (may_alloc_partial) {
+ ob = try_alloc_partial_bucket(c, ca, reserve);
+ if (ob)
+ return ob;
+ }
+again:
+ if (!avail) {
+ if (cl) {
+ closure_wait(&c->freelist_wait, cl);
+ /* recheck after putting ourself on waitlist */
+ avail = dev_buckets_available(ca, reserve);
+ if (avail) {
+ closure_wake_up(&c->freelist_wait);
+ goto again;
+ }
+ }
+
+ if (!c->blocked_allocate)
+ c->blocked_allocate = local_clock();
+
+ ob = ERR_PTR(-FREELIST_EMPTY);
+ goto err;
+ }
+
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans,
+ ca, reserve, &b,
+ &need_journal_commit, cl)));
+
+ if (need_journal_commit * 2 > avail)
+ bch2_journal_flush_async(&c->journal, NULL);
+err:
+ if (!ob)
+ ob = ERR_PTR(ret ?: -FREELIST_EMPTY);
+
+ if (ob == ERR_PTR(-FREELIST_EMPTY)) {
+ trace_bucket_alloc_fail(ca, reserve, avail, need_journal_commit);
+ atomic_long_inc(&c->bucket_alloc_fail);
+ }
+
+ return ob;
+}
+
static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
unsigned l, unsigned r)
{
@@ -313,7 +521,7 @@ void bch2_dev_stripe_increment(struct bch_dev *ca,
struct dev_stripe_state *stripe)
{
u64 *v = stripe->next_alloc + ca->dev_idx;
- u64 free_space = dev_buckets_available(ca);
+ u64 free_space = dev_buckets_available(ca, RESERVE_NONE);
u64 free_space_inv = free_space
? div64_u64(1ULL << 48, free_space)
: 1ULL << 48;
@@ -364,6 +572,7 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
{
struct dev_alloc_list devs_sorted =
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
+ unsigned dev;
struct bch_dev *ca;
int ret = -INSUFFICIENT_DEVICES;
unsigned i;
@@ -373,30 +582,43 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
for (i = 0; i < devs_sorted.nr; i++) {
struct open_bucket *ob;
- ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
+ dev = devs_sorted.devs[i];
+
+ rcu_read_lock();
+ ca = rcu_dereference(c->devs[dev]);
+ if (ca)
+ percpu_ref_get(&ca->ref);
+ rcu_read_unlock();
+
if (!ca)
continue;
- if (!ca->mi.durability && *have_cache)
+ if (!ca->mi.durability && *have_cache) {
+ percpu_ref_put(&ca->ref);
continue;
+ }
ob = bch2_bucket_alloc(c, ca, reserve,
flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
+ if (!IS_ERR(ob))
+ bch2_dev_stripe_increment(ca, stripe);
+ percpu_ref_put(&ca->ref);
+
if (IS_ERR(ob)) {
ret = PTR_ERR(ob);
if (cl)
- return ret;
+ break;
continue;
}
add_new_bucket(c, ptrs, devs_may_alloc,
nr_effective, have_cache, flags, ob);
- bch2_dev_stripe_increment(ca, stripe);
-
- if (*nr_effective >= nr_replicas)
- return 0;
+ if (*nr_effective >= nr_replicas) {
+ ret = 0;
+ break;
+ }
}
return ret;
@@ -564,9 +786,6 @@ static int open_bucket_add_buckets(struct bch_fs *c,
if (*nr_effective >= nr_replicas)
return 0;
- percpu_down_read(&c->mark_lock);
- rcu_read_lock();
-
retry_blocking:
/*
* Try nonblocking first, so that if one device is full we'll try from
@@ -580,9 +799,6 @@ retry_blocking:
goto retry_blocking;
}
- rcu_read_unlock();
- percpu_up_read(&c->mark_lock);
-
return ret;
}
@@ -863,7 +1079,7 @@ err:
case -INSUFFICIENT_DEVICES:
return ERR_PTR(-EROFS);
default:
- BUG();
+ return ERR_PTR(ret);
}
}