summaryrefslogtreecommitdiff
path: root/libbcachefs/buckets.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/buckets.c')
-rw-r--r--libbcachefs/buckets.c284
1 files changed, 189 insertions, 95 deletions
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 6fc93b56..4d55ef51 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -144,6 +144,7 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
unsigned journal_seq,
bool gc)
{
+ percpu_rwsem_assert_held(&c->mark_lock);
BUG_ON(!gc && !journal_seq);
return this_cpu_ptr(gc
@@ -371,8 +372,6 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
if (!journal_seq && !test_bit(BCH_FS_INITIALIZED, &c->flags))
journal_seq = 1;
- percpu_rwsem_assert_held(&c->mark_lock);
-
preempt_disable();
fs_usage = fs_usage_ptr(c, journal_seq, gc);
u = dev_usage_ptr(ca, journal_seq, gc);
@@ -418,25 +417,48 @@ static inline int __update_replicas(struct bch_fs *c,
return 0;
}
-static inline int update_replicas(struct bch_fs *c,
+static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
struct bch_replicas_entry *r, s64 sectors,
unsigned journal_seq, bool gc)
{
struct bch_fs_usage __percpu *fs_usage;
- int idx = bch2_replicas_entry_idx(c, r);
+ int idx, ret = 0;
+ char buf[200];
- if (idx < 0)
- return -1;
+ percpu_down_read(&c->mark_lock);
+
+ idx = bch2_replicas_entry_idx(c, r);
+ if (idx < 0 &&
+ (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
+ fsck_err(c, "no replicas entry\n"
+ " while marking %s",
+ (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)))) {
+ percpu_up_read(&c->mark_lock);
+ ret = bch2_mark_replicas(c, r);
+ if (ret)
+ return ret;
+
+ percpu_down_read(&c->mark_lock);
+ idx = bch2_replicas_entry_idx(c, r);
+ }
+ if (idx < 0) {
+ ret = -1;
+ goto err;
+ }
preempt_disable();
fs_usage = fs_usage_ptr(c, journal_seq, gc);
fs_usage_data_type_to_base(fs_usage, r->data_type, sectors);
fs_usage->replicas[idx] += sectors;
preempt_enable();
- return 0;
+err:
+fsck_err:
+ percpu_up_read(&c->mark_lock);
+ return ret;
}
static inline int update_cached_sectors(struct bch_fs *c,
+ struct bkey_s_c k,
unsigned dev, s64 sectors,
unsigned journal_seq, bool gc)
{
@@ -444,7 +466,7 @@ static inline int update_cached_sectors(struct bch_fs *c,
bch2_replicas_entry_cached(&r.e, dev);
- return update_replicas(c, &r.e, sectors, journal_seq, gc);
+ return update_replicas(c, k, &r.e, sectors, journal_seq, gc);
}
static struct replicas_delta_list *
@@ -547,6 +569,7 @@ static int bch2_mark_alloc(struct btree_trans *trans,
struct bch_dev *ca;
struct bucket *g;
struct bucket_mark old_m, m;
+ int ret = 0;
/* We don't do anything for deletions - do we?: */
if (!bkey_is_alloc(new.k))
@@ -573,6 +596,7 @@ static int bch2_mark_alloc(struct btree_trans *trans,
if (new.k->p.offset >= ca->mi.nbuckets)
return 0;
+ percpu_down_read(&c->mark_lock);
g = __bucket(ca, new.k->p.offset, gc);
u = bch2_alloc_unpack(new);
@@ -597,6 +621,7 @@ static int bch2_mark_alloc(struct btree_trans *trans,
g->gen_valid = 1;
g->stripe = u.stripe;
g->stripe_redundancy = u.stripe_redundancy;
+ percpu_up_read(&c->mark_lock);
/*
* need to know if we're getting called from the invalidate path or
@@ -605,10 +630,12 @@ static int bch2_mark_alloc(struct btree_trans *trans,
if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
old_m.cached_sectors) {
- if (update_cached_sectors(c, ca->dev_idx, -old_m.cached_sectors,
- journal_seq, gc)) {
+ ret = update_cached_sectors(c, new, ca->dev_idx,
+ -old_m.cached_sectors,
+ journal_seq, gc);
+ if (ret) {
bch2_fs_fatal_error(c, "bch2_mark_alloc(): no replicas entry while updating cached sectors");
- return -1;
+ return ret;
}
trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
@@ -779,43 +806,57 @@ static int mark_stripe_bucket(struct btree_trans *trans,
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
unsigned nr_data = s->nr_blocks - s->nr_redundant;
bool parity = ptr_idx >= nr_data;
+ enum bch_data_type data_type = parity ? BCH_DATA_parity : 0;
+ s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
bool gc = flags & BTREE_TRIGGER_GC;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket *g = PTR_BUCKET(ca, ptr, gc);
+ struct bucket *g;
struct bucket_mark new, old;
char buf[200];
- int ret;
+ int ret = 0;
- if (g->stripe && g->stripe != k.k->p.offset) {
+ /* * XXX doesn't handle deletion */
+
+ percpu_down_read(&c->mark_lock);
+ g = PTR_BUCKET(ca, ptr, gc);
+
+ if (g->mark.dirty_sectors ||
+ (g->stripe && g->stripe != k.k->p.offset)) {
bch2_fs_inconsistent(c,
"bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen,
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf));
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}
old = bucket_cmpxchg(g, new, ({
- ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type,
+ ret = check_bucket_ref(c, k, ptr, sectors, data_type,
+ new.gen, new.data_type,
new.dirty_sectors, new.cached_sectors);
if (ret)
- return ret;
+ goto err;
- if (parity) {
- new.data_type = BCH_DATA_parity;
- new.dirty_sectors = le16_to_cpu(s->sectors);
- }
+ new.dirty_sectors += sectors;
+ if (data_type)
+ new.data_type = data_type;
if (journal_seq) {
new.journal_seq_valid = 1;
new.journal_seq = journal_seq;
}
+
+ new.stripe = true;
}));
g->stripe = k.k->p.offset;
g->stripe_redundancy = s->nr_redundant;
bch2_dev_usage_update(c, ca, old, new, journal_seq, gc);
+err:
+ percpu_up_read(&c->mark_lock);
+
return 0;
}
@@ -856,7 +897,10 @@ static int bch2_mark_pointer(struct btree_trans *trans,
struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
u8 bucket_data_type;
u64 v;
- int ret;
+ int ret = 0;
+
+ percpu_down_read(&c->mark_lock);
+ g = PTR_BUCKET(ca, &p.ptr, gc);
v = atomic64_read(&g->_mark.v);
do {
@@ -869,7 +913,7 @@ static int bch2_mark_pointer(struct btree_trans *trans,
&new.dirty_sectors,
&new.cached_sectors);
if (ret)
- return ret;
+ goto err;
new.data_type = bucket_data_type;
@@ -889,11 +933,14 @@ static int bch2_mark_pointer(struct btree_trans *trans,
bch2_dev_usage_update(c, ca, old, new, journal_seq, gc);
BUG_ON(!gc && bucket_became_unavailable(old, new));
+err:
+ percpu_up_read(&c->mark_lock);
- return 0;
+ return ret;
}
static int bch2_mark_stripe_ptr(struct btree_trans *trans,
+ struct bkey_s_c k,
struct bch_extent_stripe_ptr p,
enum bch_data_type data_type,
s64 sectors,
@@ -933,7 +980,7 @@ static int bch2_mark_stripe_ptr(struct btree_trans *trans,
spin_unlock(&c->ec_stripes_heap_lock);
r.e.data_type = data_type;
- update_replicas(c, &r.e, sectors, trans->journal_res.seq, gc);
+ update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, gc);
return 0;
}
@@ -978,18 +1025,19 @@ static int bch2_mark_extent(struct btree_trans *trans,
stale = ret > 0;
if (p.ptr.cached) {
- if (!stale)
- if (update_cached_sectors(c, p.ptr.dev, disk_sectors,
- journal_seq, gc)) {
+ if (!stale) {
+ ret = update_cached_sectors(c, k, p.ptr.dev,
+ disk_sectors, journal_seq, gc);
+ if (ret) {
bch2_fs_fatal_error(c, "bch2_mark_extent(): no replicas entry while updating cached sectors");
- return -1;
-
+ return ret;
}
+ }
} else if (!p.has_ec) {
dirty_sectors += disk_sectors;
r.e.devs[r.e.nr_devs++] = p.ptr.dev;
} else {
- ret = bch2_mark_stripe_ptr(trans, p.ec, data_type,
+ ret = bch2_mark_stripe_ptr(trans, k, p.ec, data_type,
disk_sectors, flags);
if (ret)
return ret;
@@ -1004,12 +1052,13 @@ static int bch2_mark_extent(struct btree_trans *trans,
}
if (r.e.nr_devs) {
- if (update_replicas(c, &r.e, dirty_sectors, journal_seq, gc)) {
+ ret = update_replicas(c, k, &r.e, dirty_sectors, journal_seq, gc);
+ if (ret) {
char buf[200];
bch2_bkey_val_to_text(&PBUF(buf), c, k);
bch2_fs_fatal_error(c, "no replicas entry for %s", buf);
- return -1;
+ return ret;
}
}
@@ -1079,6 +1128,11 @@ static int bch2_mark_stripe(struct btree_trans *trans,
if (gc) {
/*
+ * This will be wrong when we bring back runtime gc: we should
+ * be unmarking the old key and then marking the new key
+ */
+
+ /*
* gc recalculates this field from stripe ptr
* references:
*/
@@ -1091,14 +1145,15 @@ static int bch2_mark_stripe(struct btree_trans *trans,
return ret;
}
- if (update_replicas(c, &m->r.e,
- ((s64) m->sectors * m->nr_redundant),
- journal_seq, gc)) {
+ ret = update_replicas(c, new, &m->r.e,
+ ((s64) m->sectors * m->nr_redundant),
+ journal_seq, gc);
+ if (ret) {
char buf[200];
bch2_bkey_val_to_text(&PBUF(buf), c, new);
bch2_fs_fatal_error(c, "no replicas entry for %s", buf);
- return -1;
+ return ret;
}
}
@@ -1123,11 +1178,15 @@ static int bch2_mark_inode(struct btree_trans *trans,
}
if (flags & BTREE_TRIGGER_GC) {
+ percpu_down_read(&c->mark_lock);
preempt_disable();
+
fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC);
fs_usage->nr_inodes += bkey_is_inode(new.k);
fs_usage->nr_inodes -= bkey_is_inode(old.k);
+
preempt_enable();
+ percpu_up_read(&c->mark_lock);
}
return 0;
}
@@ -1146,14 +1205,18 @@ static int bch2_mark_reservation(struct btree_trans *trans,
sectors = -sectors;
sectors *= replicas;
+ percpu_down_read(&c->mark_lock);
preempt_disable();
+
fs_usage = fs_usage_ptr(c, trans->journal_res.seq, flags & BTREE_TRIGGER_GC);
replicas = clamp_t(unsigned, replicas, 1,
ARRAY_SIZE(fs_usage->persistent_reserved));
fs_usage->reserved += sectors;
fs_usage->persistent_reserved[replicas - 1] += sectors;
+
preempt_enable();
+ percpu_up_read(&c->mark_lock);
return 0;
}
@@ -1241,10 +1304,10 @@ static int bch2_mark_reflink_p(struct btree_trans *trans,
return ret;
}
-static int bch2_mark_key_locked(struct btree_trans *trans,
- struct bkey_s_c old,
- struct bkey_s_c new,
- unsigned flags)
+int bch2_mark_key(struct btree_trans *trans,
+ struct bkey_s_c old,
+ struct bkey_s_c new,
+ unsigned flags)
{
struct bkey_s_c k = flags & BTREE_TRIGGER_OVERWRITE ? old: new;
@@ -1274,22 +1337,6 @@ static int bch2_mark_key_locked(struct btree_trans *trans,
}
}
-int bch2_mark_key(struct btree_trans *trans, struct bkey_s_c new, unsigned flags)
-{
- struct bch_fs *c = trans->c;
- struct bkey deleted = KEY(0, 0, 0);
- struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
- int ret;
-
- deleted.p = new.k->p;
-
- percpu_down_read(&c->mark_lock);
- ret = bch2_mark_key_locked(trans, old, new, flags);
- percpu_up_read(&c->mark_lock);
-
- return ret;
-}
-
int bch2_mark_update(struct btree_trans *trans, struct btree_path *path,
struct bkey_i *new, unsigned flags)
{
@@ -1311,12 +1358,12 @@ int bch2_mark_update(struct btree_trans *trans, struct btree_path *path,
if (old.k->type == new->k.type &&
((1U << old.k->type) & BTREE_TRIGGER_WANTS_OLD_AND_NEW)) {
- ret = bch2_mark_key_locked(trans, old, bkey_i_to_s_c(new),
+ ret = bch2_mark_key(trans, old, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
} else {
- ret = bch2_mark_key_locked(trans, deleted, bkey_i_to_s_c(new),
+ ret = bch2_mark_key(trans, deleted, bkey_i_to_s_c(new),
BTREE_TRIGGER_INSERT|flags) ?:
- bch2_mark_key_locked(trans, old, deleted,
+ bch2_mark_key(trans, old, deleted,
BTREE_TRIGGER_OVERWRITE|flags);
}
@@ -1359,21 +1406,20 @@ void fs_usage_apply_warn(struct btree_trans *trans,
__WARN();
}
-void bch2_trans_fs_usage_apply(struct btree_trans *trans,
- struct replicas_delta_list *deltas)
+int bch2_trans_fs_usage_apply(struct btree_trans *trans,
+ struct replicas_delta_list *deltas)
{
struct bch_fs *c = trans->c;
static int warned_disk_usage = 0;
bool warn = false;
unsigned disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
- struct replicas_delta *d = deltas->d;
+ struct replicas_delta *d = deltas->d, *d2;
struct replicas_delta *top = (void *) deltas->d + deltas->used;
struct bch_fs_usage *dst;
s64 added = 0, should_not_have_added;
unsigned i;
- percpu_rwsem_assert_held(&c->mark_lock);
-
+ percpu_down_read(&c->mark_lock);
preempt_disable();
dst = fs_usage_ptr(c, trans->journal_res.seq, false);
@@ -1385,7 +1431,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
added += d->delta;
}
- BUG_ON(__update_replicas(c, dst, &d->r, d->delta));
+ if (__update_replicas(c, dst, &d->r, d->delta))
+ goto need_mark;
}
dst->nr_inodes += deltas->nr_inodes;
@@ -1420,9 +1467,19 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
}
preempt_enable();
+ percpu_up_read(&c->mark_lock);
if (unlikely(warn) && !xchg(&warned_disk_usage, 1))
fs_usage_apply_warn(trans, disk_res_sectors, should_not_have_added);
+ return 0;
+need_mark:
+ /* revert changes: */
+ for (d2 = deltas->d; d2 != d; d2 = replicas_delta_next(d2))
+ BUG_ON(__update_replicas(c, dst, &d2->r, -d2->delta));
+
+ preempt_enable();
+ percpu_up_read(&c->mark_lock);
+ return -1;
}
/* trans_mark: */
@@ -1606,50 +1663,75 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
return 0;
}
-static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans,
- struct bkey_s_c_stripe s,
- unsigned idx, bool deleting)
+static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
+ struct bkey_s_c_stripe s,
+ unsigned idx, bool deleting)
{
struct bch_fs *c = trans->c;
const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
struct bkey_alloc_buf *a;
struct btree_iter iter;
struct bkey_alloc_unpacked u;
- bool parity = idx >= s.v->nr_blocks - s.v->nr_redundant;
+ enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
+ ? BCH_DATA_parity : 0;
+ s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
int ret = 0;
+ if (deleting)
+ sectors = -sectors;
+
a = bch2_trans_start_alloc_update(trans, &iter, ptr, &u);
if (IS_ERR(a))
return PTR_ERR(a);
- if (parity) {
- s64 sectors = le16_to_cpu(s.v->sectors);
-
- if (deleting)
- sectors = -sectors;
-
- u.dirty_sectors += sectors;
- u.data_type = u.dirty_sectors
- ? BCH_DATA_parity
- : 0;
- }
+ ret = check_bucket_ref(c, s.s_c, ptr, sectors, data_type,
+ u.gen, u.data_type,
+ u.dirty_sectors, u.cached_sectors);
+ if (ret)
+ goto err;
if (!deleting) {
- if (bch2_fs_inconsistent_on(u.stripe && u.stripe != s.k->p.offset, c,
- "bucket %llu:%llu gen %u: multiple stripes using same bucket (%u, %llu)",
+ if (bch2_fs_inconsistent_on(u.stripe ||
+ u.stripe_redundancy, c,
+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
iter.pos.inode, iter.pos.offset, u.gen,
+ bch2_data_types[u.data_type],
+ u.dirty_sectors,
u.stripe, s.k->p.offset)) {
ret = -EIO;
goto err;
}
+ if (bch2_fs_inconsistent_on(data_type && u.dirty_sectors, c,
+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
+ iter.pos.inode, iter.pos.offset, u.gen,
+ bch2_data_types[u.data_type],
+ u.dirty_sectors,
+ s.k->p.offset)) {
+ ret = -EIO;
+ goto err;
+ }
+
u.stripe = s.k->p.offset;
u.stripe_redundancy = s.v->nr_redundant;
} else {
+ if (bch2_fs_inconsistent_on(u.stripe != s.k->p.offset ||
+ u.stripe_redundancy != s.v->nr_redundant, c,
+ "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
+ iter.pos.inode, iter.pos.offset, u.gen,
+ s.k->p.offset, u.stripe)) {
+ ret = -EIO;
+ goto err;
+ }
+
u.stripe = 0;
u.stripe_redundancy = 0;
}
+ u.dirty_sectors += sectors;
+ if (data_type)
+ u.data_type = !deleting ? data_type : 0;
+
bch2_alloc_pack(c, a, u);
bch2_trans_update(trans, &iter, &a->k, 0);
err:
@@ -1664,7 +1746,7 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
struct bkey_s_c_stripe old_s = { .k = NULL };
struct bkey_s_c_stripe new_s = { .k = NULL };
struct bch_replicas_padded r;
- unsigned i;
+ unsigned i, nr_blocks;
int ret = 0;
if (old.k->type == KEY_TYPE_stripe)
@@ -1682,18 +1764,17 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
new_s.v->nr_blocks * sizeof(struct bch_extent_ptr)))
return 0;
+ BUG_ON(new_s.k && old_s.k &&
+ (new_s.v->nr_blocks != old_s.v->nr_blocks ||
+ new_s.v->nr_redundant != old_s.v->nr_redundant));
+
+ nr_blocks = new_s.k ? new_s.v->nr_blocks : old_s.v->nr_blocks;
+
if (new_s.k) {
s64 sectors = le16_to_cpu(new_s.v->sectors);
bch2_bkey_to_replicas(&r.e, new);
update_replicas_list(trans, &r.e, sectors * new_s.v->nr_redundant);
-
- for (i = 0; i < new_s.v->nr_blocks; i++) {
- ret = bch2_trans_mark_stripe_alloc_ref(trans, new_s,
- i, false);
- if (ret)
- return ret;
- }
}
if (old_s.k) {
@@ -1701,12 +1782,25 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans,
bch2_bkey_to_replicas(&r.e, old);
update_replicas_list(trans, &r.e, sectors * old_s.v->nr_redundant);
+ }
- for (i = 0; i < old_s.v->nr_blocks; i++) {
- ret = bch2_trans_mark_stripe_alloc_ref(trans, old_s,
- i, true);
+ for (i = 0; i < nr_blocks; i++) {
+ if (new_s.k && old_s.k &&
+ !memcmp(&new_s.v->ptrs[i],
+ &old_s.v->ptrs[i],
+ sizeof(new_s.v->ptrs[i])))
+ continue;
+
+ if (new_s.k) {
+ ret = bch2_trans_mark_stripe_bucket(trans, new_s, i, false);
if (ret)
- return ret;
+ break;
+ }
+
+ if (old_s.k) {
+ ret = bch2_trans_mark_stripe_bucket(trans, old_s, i, true);
+ if (ret)
+ break;
}
}