summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/alloc_foreground.c103
-rw-r--r--fs/bcachefs/alloc_foreground.h5
-rw-r--r--fs/bcachefs/ec.c205
-rw-r--r--fs/bcachefs/ec.h6
4 files changed, 180 insertions, 139 deletions
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 04c1c1b592bc..1675f0dfca8a 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -344,10 +344,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
struct bch_devs_mask *devs)
{
struct dev_alloc_list ret = { .nr = 0 };
- struct bch_dev *ca;
unsigned i;
- for_each_member_device_rcu(ca, c, i, devs)
+ for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
ret.devs[ret.nr++] = i;
bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
@@ -396,16 +395,16 @@ static void add_new_bucket(struct bch_fs *c,
ob_push(c, ptrs, ob);
}
-static int bch2_bucket_alloc_set(struct bch_fs *c,
- struct open_buckets *ptrs,
- struct dev_stripe_state *stripe,
- struct bch_devs_mask *devs_may_alloc,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- enum alloc_reserve reserve,
- unsigned flags,
- struct closure *cl)
+int bch2_bucket_alloc_set(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct dev_stripe_state *stripe,
+ struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ enum alloc_reserve reserve,
+ unsigned flags,
+ struct closure *cl)
{
struct dev_alloc_list devs_sorted =
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
@@ -456,74 +455,6 @@ static int bch2_bucket_alloc_set(struct bch_fs *c,
/* Allocate from stripes: */
/*
- * XXX: use a higher watermark for allocating open buckets here:
- */
-static int ec_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
-{
- struct bch_devs_mask devs;
- struct open_bucket *ob;
- unsigned i, nr_have = 0, nr_data =
- min_t(unsigned, h->nr_active_devs,
- EC_STRIPE_MAX) - h->redundancy;
- bool have_cache = true;
- int ret = 0;
-
- BUG_ON(h->blocks.nr > nr_data);
- BUG_ON(h->parity.nr > h->redundancy);
-
- devs = h->devs;
-
- open_bucket_for_each(c, &h->parity, ob, i)
- __clear_bit(ob->ptr.dev, devs.d);
- open_bucket_for_each(c, &h->blocks, ob, i)
- __clear_bit(ob->ptr.dev, devs.d);
-
- percpu_down_read(&c->mark_lock);
- rcu_read_lock();
-
- if (h->parity.nr < h->redundancy) {
- nr_have = h->parity.nr;
-
- ret = bch2_bucket_alloc_set(c, &h->parity,
- &h->parity_stripe,
- &devs,
- h->redundancy,
- &nr_have,
- &have_cache,
- RESERVE_NONE,
- 0,
- NULL);
- if (ret)
- goto err;
- }
-
- if (h->blocks.nr < nr_data) {
- nr_have = h->blocks.nr;
-
- ret = bch2_bucket_alloc_set(c, &h->blocks,
- &h->block_stripe,
- &devs,
- nr_data,
- &nr_have,
- &have_cache,
- RESERVE_NONE,
- 0,
- NULL);
- if (ret)
- goto err;
- }
-
- rcu_read_unlock();
- percpu_up_read(&c->mark_lock);
-
- return bch2_ec_stripe_new_alloc(c, h);
-err:
- rcu_read_unlock();
- percpu_up_read(&c->mark_lock);
- return -1;
-}
-
-/*
* if we can't allocate a new stripe because there are already too many
* partially filled stripes, force allocating from an existing stripe even when
* it's to a device we don't want:
@@ -555,27 +486,23 @@ static void bucket_alloc_from_stripe(struct bch_fs *c,
if (ec_open_bucket(c, ptrs))
return;
- h = bch2_ec_stripe_head_get(c, target, erasure_code, nr_replicas - 1);
+ h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1);
if (!h)
return;
- if (!h->s && ec_stripe_alloc(c, h))
- goto out_put_head;
-
- rcu_read_lock();
devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
- rcu_read_unlock();
for (i = 0; i < devs_sorted.nr; i++)
open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
if (ob->ptr.dev == devs_sorted.devs[i] &&
- !test_and_set_bit(ec_idx, h->s->blocks_allocated))
+ !test_and_set_bit(h->s->data_block_idx[ec_idx],
+ h->s->blocks_allocated))
goto got_bucket;
goto out_put_head;
got_bucket:
ca = bch_dev_bkey_exists(c, ob->ptr.dev);
- ob->ec_idx = ec_idx;
+ ob->ec_idx = h->s->data_block_idx[ec_idx];
ob->ec = h->s;
add_new_bucket(c, ptrs, devs_may_alloc,
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 687f973e4b3a..17a6869bb8cd 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -92,6 +92,11 @@ static inline void bch2_open_bucket_get(struct bch_fs *c,
}
}
+int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
+ struct dev_stripe_state *, struct bch_devs_mask *,
+ unsigned, unsigned *, bool *, enum alloc_reserve,
+ unsigned, struct closure *);
+
struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
unsigned, unsigned,
struct write_point_specifier,
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index b1084b74778a..8d8683f8b2df 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -200,40 +200,6 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
return false;
}
-static void ec_stripe_key_init(struct bch_fs *c,
- struct bkey_i_stripe *s,
- struct open_buckets *blocks,
- struct open_buckets *parity,
- unsigned stripe_size)
-{
- struct open_bucket *ob;
- unsigned i, u64s;
-
- bkey_stripe_init(&s->k_i);
- s->v.sectors = cpu_to_le16(stripe_size);
- s->v.algorithm = 0;
- s->v.nr_blocks = parity->nr + blocks->nr;
- s->v.nr_redundant = parity->nr;
- s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max);
- s->v.csum_type = BCH_CSUM_CRC32C;
- s->v.pad = 0;
-
- open_bucket_for_each(c, blocks, ob, i)
- s->v.ptrs[i] = ob->ptr;
-
- open_bucket_for_each(c, parity, ob, i)
- s->v.ptrs[blocks->nr + i] = ob->ptr;
-
- while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
- BUG_ON(1 << s->v.csum_granularity_bits >=
- le16_to_cpu(s->v.sectors) ||
- s->v.csum_granularity_bits == U8_MAX);
- s->v.csum_granularity_bits++;
- }
-
- set_bkey_val_u64s(&s->k, u64s);
-}
-
/* Checksumming: */
static void ec_generate_checksums(struct ec_stripe_buf *buf)
@@ -866,6 +832,8 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err;
}
+ BUG_ON(!s->allocated);
+
if (!percpu_ref_tryget(&c->writes))
goto err;
@@ -953,6 +921,8 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s = h->s;
+ BUG_ON(!s->allocated && !s->err);
+
h->s = NULL;
s->pending = true;
@@ -1063,14 +1033,38 @@ static unsigned pick_blocksize(struct bch_fs *c,
return best.size;
}
-int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
+static void ec_stripe_key_init(struct bch_fs *c,
+ struct bkey_i_stripe *s,
+ unsigned nr_data,
+ unsigned nr_parity,
+ unsigned stripe_size)
+{
+ unsigned u64s;
+
+ bkey_stripe_init(&s->k_i);
+ s->v.sectors = cpu_to_le16(stripe_size);
+ s->v.algorithm = 0;
+ s->v.nr_blocks = nr_data + nr_parity;
+ s->v.nr_redundant = nr_parity;
+ s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max);
+ s->v.csum_type = BCH_CSUM_CRC32C;
+ s->v.pad = 0;
+
+ while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
+ BUG_ON(1 << s->v.csum_granularity_bits >=
+ le16_to_cpu(s->v.sectors) ||
+ s->v.csum_granularity_bits == U8_MAX);
+ s->v.csum_granularity_bits++;
+ }
+
+ set_bkey_val_u64s(&s->k, u64s);
+}
+
+static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s;
unsigned i;
- BUG_ON(h->parity.nr != h->redundancy);
- BUG_ON(!h->blocks.nr);
- BUG_ON(h->parity.nr + h->blocks.nr > EC_STRIPE_MAX);
lockdep_assert_held(&h->lock);
s = kzalloc(sizeof(*s), GFP_KERNEL);
@@ -1081,11 +1075,9 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
atomic_set(&s->pin, 1);
s->c = c;
s->h = h;
- s->blocks = h->blocks;
- s->parity = h->parity;
-
- memset(&h->blocks, 0, sizeof(h->blocks));
- memset(&h->parity, 0, sizeof(h->parity));
+ s->nr_data = min_t(unsigned, h->nr_active_devs,
+ EC_STRIPE_MAX) - h->redundancy;
+ s->nr_parity = h->redundancy;
bch2_keylist_init(&s->keys, s->inline_keys);
@@ -1093,9 +1085,8 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
s->stripe.size = h->blocksize;
memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
- ec_stripe_key_init(c, &s->stripe.key,
- &s->blocks, &s->parity,
- h->blocksize);
+ ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
+ s->nr_parity, h->blocksize);
for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
@@ -1153,6 +1144,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
{
if (h->s &&
+ h->s->allocated &&
bitmap_weight(h->s->blocks_allocated,
h->s->blocks.nr) == h->s->blocks.nr)
ec_stripe_set_pending(c, h);
@@ -1160,7 +1152,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
mutex_unlock(&h->lock);
}
-struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
+struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target,
unsigned algo,
unsigned redundancy)
@@ -1185,6 +1177,122 @@ found:
return h;
}
+/*
+ * XXX: use a higher watermark for allocating open buckets here:
+ */
+static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
+{
+ struct bch_devs_mask devs;
+ struct open_bucket *ob;
+ unsigned i, nr_have, nr_data =
+ min_t(unsigned, h->nr_active_devs,
+ EC_STRIPE_MAX) - h->redundancy;
+ bool have_cache = true;
+ int ret = 0;
+
+ devs = h->devs;
+
+ for_each_set_bit(i, h->s->blocks_allocated, EC_STRIPE_MAX) {
+ __clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
+ --nr_data;
+ }
+
+ BUG_ON(h->s->blocks.nr > nr_data);
+ BUG_ON(h->s->parity.nr > h->redundancy);
+
+ open_bucket_for_each(c, &h->s->parity, ob, i)
+ __clear_bit(ob->ptr.dev, devs.d);
+ open_bucket_for_each(c, &h->s->blocks, ob, i)
+ __clear_bit(ob->ptr.dev, devs.d);
+
+ percpu_down_read(&c->mark_lock);
+ rcu_read_lock();
+
+ if (h->s->parity.nr < h->redundancy) {
+ nr_have = h->s->parity.nr;
+
+ ret = bch2_bucket_alloc_set(c, &h->s->parity,
+ &h->parity_stripe,
+ &devs,
+ h->redundancy,
+ &nr_have,
+ &have_cache,
+ RESERVE_NONE,
+ 0,
+ NULL);
+ if (ret)
+ goto err;
+ }
+
+ if (h->s->blocks.nr < nr_data) {
+ nr_have = h->s->blocks.nr;
+
+ ret = bch2_bucket_alloc_set(c, &h->s->blocks,
+ &h->block_stripe,
+ &devs,
+ nr_data,
+ &nr_have,
+ &have_cache,
+ RESERVE_NONE,
+ 0,
+ NULL);
+ if (ret)
+ goto err;
+ }
+err:
+ rcu_read_unlock();
+ percpu_up_read(&c->mark_lock);
+ return ret;
+}
+
+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
+ unsigned target,
+ unsigned algo,
+ unsigned redundancy)
+{
+ struct closure cl;
+ struct ec_stripe_head *h;
+ struct open_bucket *ob;
+ unsigned i, data_idx = 0;
+
+ closure_init_stack(&cl);
+
+ h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
+ if (!h)
+ return NULL;
+
+ if (!h->s && ec_new_stripe_alloc(c, h)) {
+ bch2_ec_stripe_head_put(c, h);
+ return NULL;
+ }
+
+ if (!h->s->allocated) {
+ if (new_stripe_alloc_buckets(c, h)) {
+ bch2_ec_stripe_head_put(c, h);
+ h = NULL;
+ goto out;
+ }
+
+ open_bucket_for_each(c, &h->s->blocks, ob, i) {
+ data_idx = find_next_zero_bit(h->s->blocks_allocated,
+ h->s->nr_data, data_idx);
+ BUG_ON(data_idx >= h->s->nr_data);
+
+ h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
+ h->s->data_block_idx[i] = data_idx;
+ data_idx++;
+ }
+
+ open_bucket_for_each(c, &h->s->parity, ob, i)
+ h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
+
+ h->s->allocated = true;
+ }
+out:
+ closure_sync(&cl);
+ return h;
+}
+
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
{
struct ec_stripe_head *h;
@@ -1195,9 +1303,6 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
mutex_lock(&h->lock);
- bch2_open_buckets_stop_dev(c, ca, &h->blocks);
- bch2_open_buckets_stop_dev(c, ca, &h->parity);
-
if (!h->s)
goto unlock;
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 6f9354f82656..d7396885792e 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -92,11 +92,15 @@ struct ec_stripe_new {
atomic_t pin;
int err;
- bool pending;
+ u8 nr_data;
+ u8 nr_parity;
+ bool allocated;
+ bool pending;
unsigned long blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)];
struct open_buckets blocks;
+ u8 data_block_idx[EC_STRIPE_MAX];
struct open_buckets parity;
struct keylist keys;