summaryrefslogtreecommitdiff
path: root/fs/bcachefs/buckets.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/buckets.c')
-rw-r--r--fs/bcachefs/buckets.c448
1 files changed, 291 insertions, 157 deletions
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index b6b3ac5111ca..6a4773a92029 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -405,7 +405,8 @@ int bch2_fs_usage_apply(struct bch_fs *c,
*/
should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
if (WARN_ONCE(should_not_have_added > 0,
- "disk usage increased without a reservation")) {
+ "disk usage increased by %lli without a reservation",
+ should_not_have_added)) {
atomic64_sub(should_not_have_added, &c->sectors_available);
added -= should_not_have_added;
ret = -1;
@@ -444,12 +445,6 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
percpu_rwsem_assert_held(&c->mark_lock);
- bch2_fs_inconsistent_on(old.data_type && new.data_type &&
- old.data_type != new.data_type, c,
- "different types of data in same bucket: %s, %s",
- bch2_data_types[old.data_type],
- bch2_data_types[new.data_type]);
-
preempt_disable();
dev_usage = this_cpu_ptr(ca->usage[gc]);
@@ -504,14 +499,6 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c)
}
}
-#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
-({ \
- struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
- \
- bch2_dev_usage_update(c, ca, fs_usage, _old, new, gc); \
- _old; \
-})
-
static inline void update_replicas(struct bch_fs *c,
struct bch_fs_usage *fs_usage,
struct bch_replicas_entry *r,
@@ -520,7 +507,6 @@ static inline void update_replicas(struct bch_fs *c,
int idx = bch2_replicas_entry_idx(c, r);
BUG_ON(idx < 0);
- BUG_ON(!sectors);
switch (r->data_type) {
case BCH_DATA_BTREE:
@@ -569,8 +555,12 @@ static inline void update_replicas_list(struct btree_trans *trans,
{
struct replicas_delta_list *d;
struct replicas_delta *n;
- unsigned b = replicas_entry_bytes(r) + 8;
+ unsigned b;
+
+ if (!sectors)
+ return;
+ b = replicas_entry_bytes(r) + 8;
d = replicas_deltas_realloc(trans, b);
n = (void *) d->d + d->used;
@@ -629,17 +619,18 @@ static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
- old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
+ old = bucket_cmpxchg(g, new, ({
BUG_ON(!is_available_bucket(new));
new.owned_by_allocator = true;
- new.dirty = true;
new.data_type = 0;
new.cached_sectors = 0;
new.dirty_sectors = 0;
new.gen++;
}));
+ bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
+
if (old.cached_sectors)
update_cached_sectors(c, fs_usage, ca->dev_idx,
-((s64) old.cached_sectors));
@@ -668,10 +659,12 @@ static int __bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
- old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
+ old = bucket_cmpxchg(g, new, ({
new.owned_by_allocator = owned_by_allocator;
}));
+ bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
+
BUG_ON(!gc &&
!owned_by_allocator && !old.owned_by_allocator);
@@ -773,11 +766,16 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
type != BCH_DATA_JOURNAL);
old = bucket_cmpxchg(g, new, ({
- new.dirty = true;
new.data_type = type;
overflow = checked_add(new.dirty_sectors, sectors);
}));
+ bch2_fs_inconsistent_on(old.data_type &&
+ old.data_type != type, c,
+ "different types of data in same bucket: %s, %s",
+ bch2_data_types[old.data_type],
+ bch2_data_types[type]);
+
bch2_fs_inconsistent_on(overflow, c,
"bucket sector count overflow: %u + %u > U16_MAX",
old.dirty_sectors, sectors);
@@ -810,23 +808,24 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
}
static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
- s64 delta)
+ unsigned offset, s64 delta,
+ unsigned flags)
{
- if (delta > 0) {
- /*
- * marking a new extent, which _will have size_ @delta
- *
- * in the bch2_mark_update -> BCH_EXTENT_OVERLAP_MIDDLE
- * case, we haven't actually created the key we'll be inserting
- * yet (for the split) - so we don't want to be using
- * k->size/crc.live_size here:
- */
- return __ptr_disk_sectors(p, delta);
+ if (flags & BCH_BUCKET_MARK_OVERWRITE_SPLIT) {
+ BUG_ON(offset + -delta > p.crc.live_size);
+
+ return -((s64) ptr_disk_sectors(p)) +
+ __ptr_disk_sectors(p, offset) +
+ __ptr_disk_sectors(p, p.crc.live_size -
+ offset + delta);
+ } else if (flags & BCH_BUCKET_MARK_OVERWRITE) {
+ BUG_ON(offset + -delta > p.crc.live_size);
+
+ return -((s64) ptr_disk_sectors(p)) +
+ __ptr_disk_sectors(p, p.crc.live_size +
+ delta);
} else {
- BUG_ON(-delta > p.crc.live_size);
-
- return (s64) __ptr_disk_sectors(p, p.crc.live_size + delta) -
- (s64) ptr_disk_sectors(p);
+ return ptr_disk_sectors(p);
}
}
@@ -846,16 +845,35 @@ static void bucket_set_stripe(struct bch_fs *c,
struct bucket *g = PTR_BUCKET(ca, ptr, gc);
struct bucket_mark new, old;
- BUG_ON(ptr_stale(ca, ptr));
-
- old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
- new.dirty = true;
+ old = bucket_cmpxchg(g, new, ({
new.stripe = enabled;
if (journal_seq) {
new.journal_seq_valid = 1;
new.journal_seq = journal_seq;
}
}));
+
+ bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
+
+ /*
+ * XXX write repair code for these, flag stripe as possibly bad
+ */
+ if (old.gen != ptr->gen)
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ "stripe with stale pointer");
+#if 0
+ /*
+ * We'd like to check for these, but these checks don't work
+ * yet:
+ */
+ if (old.stripe && enabled)
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ "multiple stripes using same bucket");
+
+ if (!old.stripe && !enabled)
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ "deleting stripe but bucket not marked as stripe bucket");
+#endif
}
}
@@ -876,17 +894,23 @@ static bool bch2_mark_pointer(struct bch_fs *c,
do {
new.v.counter = old.v.counter = v;
- new.dirty = true;
-
/*
* Check this after reading bucket mark to guard against
* the allocator invalidating a bucket after we've already
* checked the gen
*/
- if (gen_after(new.gen, p.ptr.gen)) {
- BUG_ON(!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags));
- EBUG_ON(!p.ptr.cached &&
- test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
+ if (gen_after(p.ptr.gen, new.gen)) {
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ "pointer gen in the future");
+ return true;
+ }
+
+ if (new.gen != p.ptr.gen) {
+ /* XXX write repair code for this */
+ if (!p.ptr.cached &&
+ test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ "stale dirty pointer");
return true;
}
@@ -915,6 +939,14 @@ static bool bch2_mark_pointer(struct bch_fs *c,
old.v.counter,
new.v.counter)) != old.v.counter);
+ if (old.data_type && old.data_type != data_type)
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ "bucket %u:%zu gen %u different types of data in same bucket: %s, %s",
+ p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+ new.gen,
+ bch2_data_types[old.data_type],
+ bch2_data_types[data_type]);
+
bch2_fs_inconsistent_on(overflow, c,
"bucket sector count overflow: %u + %lli > U16_MAX",
!p.ptr.cached
@@ -950,7 +982,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
spin_unlock(&c->ec_stripes_heap_lock);
bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
(u64) p.idx);
- return -1;
+ return -EIO;
}
BUG_ON(m->r.e.data_type != data_type);
@@ -985,7 +1017,8 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
}
static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
- s64 sectors, enum bch_data_type data_type,
+ unsigned offset, s64 sectors,
+ enum bch_data_type data_type,
struct bch_fs_usage *fs_usage,
unsigned journal_seq, unsigned flags)
{
@@ -1006,12 +1039,12 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
s64 disk_sectors = data_type == BCH_DATA_BTREE
? sectors
- : ptr_disk_sectors_delta(p, sectors);
+ : ptr_disk_sectors_delta(p, offset, sectors, flags);
bool stale = bch2_mark_pointer(c, p, disk_sectors, data_type,
fs_usage, journal_seq, flags);
if (p.ptr.cached) {
- if (disk_sectors && !stale)
+ if (!stale)
update_cached_sectors(c, fs_usage, p.ptr.dev,
disk_sectors);
} else if (!p.ec_nr) {
@@ -1030,8 +1063,7 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
}
}
- if (dirty_sectors)
- update_replicas(c, fs_usage, &r.e, dirty_sectors);
+ update_replicas(c, fs_usage, &r.e, dirty_sectors);
return 0;
}
@@ -1095,7 +1127,8 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
}
int bch2_mark_key_locked(struct bch_fs *c,
- struct bkey_s_c k, s64 sectors,
+ struct bkey_s_c k,
+ unsigned offset, s64 sectors,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
@@ -1116,11 +1149,12 @@ int bch2_mark_key_locked(struct bch_fs *c,
? c->opts.btree_node_size
: -c->opts.btree_node_size;
- ret = bch2_mark_extent(c, k, sectors, BCH_DATA_BTREE,
+ ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_BTREE,
fs_usage, journal_seq, flags);
break;
case KEY_TYPE_extent:
- ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
+ case KEY_TYPE_reflink_v:
+ ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_USER,
fs_usage, journal_seq, flags);
break;
case KEY_TYPE_stripe:
@@ -1151,14 +1185,14 @@ int bch2_mark_key_locked(struct bch_fs *c,
}
int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
- s64 sectors,
+ unsigned offset, s64 sectors,
struct bch_fs_usage *fs_usage,
u64 journal_seq, unsigned flags)
{
int ret;
percpu_down_read(&c->mark_lock);
- ret = bch2_mark_key_locked(c, k, sectors,
+ ret = bch2_mark_key_locked(c, k, offset, sectors,
fs_usage, journal_seq, flags);
percpu_up_read(&c->mark_lock);
@@ -1174,8 +1208,11 @@ inline int bch2_mark_overwrite(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree *b = iter->l[0].b;
+ unsigned offset = 0;
s64 sectors = 0;
+ flags |= BCH_BUCKET_MARK_OVERWRITE;
+
if (btree_node_is_extents(b)
? bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0
: bkey_cmp(new->k.p, old.k->p))
@@ -1184,35 +1221,33 @@ inline int bch2_mark_overwrite(struct btree_trans *trans,
if (btree_node_is_extents(b)) {
switch (bch2_extent_overlap(&new->k, old.k)) {
case BCH_EXTENT_OVERLAP_ALL:
+ offset = 0;
sectors = -((s64) old.k->size);
break;
case BCH_EXTENT_OVERLAP_BACK:
+ offset = bkey_start_offset(&new->k) -
+ bkey_start_offset(old.k);
sectors = bkey_start_offset(&new->k) -
old.k->p.offset;
break;
case BCH_EXTENT_OVERLAP_FRONT:
+ offset = 0;
sectors = bkey_start_offset(old.k) -
new->k.p.offset;
break;
case BCH_EXTENT_OVERLAP_MIDDLE:
- sectors = old.k->p.offset - new->k.p.offset;
- BUG_ON(sectors <= 0);
-
- bch2_mark_key_locked(c, old, sectors,
- fs_usage, trans->journal_res.seq,
- BCH_BUCKET_MARK_INSERT|flags);
-
- sectors = bkey_start_offset(&new->k) -
- old.k->p.offset;
+ offset = bkey_start_offset(&new->k) -
+ bkey_start_offset(old.k);
+ sectors = -((s64) new->k.size);
+ flags |= BCH_BUCKET_MARK_OVERWRITE_SPLIT;
break;
}
BUG_ON(sectors >= 0);
}
- return bch2_mark_key_locked(c, old, sectors, fs_usage,
- trans->journal_res.seq,
- BCH_BUCKET_MARK_OVERWRITE|flags) ?: 1;
+ return bch2_mark_key_locked(c, old, offset, sectors, fs_usage,
+ trans->journal_res.seq, flags) ?: 1;
}
int bch2_mark_update(struct btree_trans *trans,
@@ -1230,12 +1265,10 @@ int bch2_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id))
return 0;
- if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
- bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
- bpos_min(insert->k->k.p, b->key.k.p).offset -
- bkey_start_offset(&insert->k->k),
- fs_usage, trans->journal_res.seq,
- BCH_BUCKET_MARK_INSERT|flags);
+ bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
+ 0, insert->k->k.size,
+ fs_usage, trans->journal_res.seq,
+ BCH_BUCKET_MARK_INSERT|flags);
if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
return 0;
@@ -1280,7 +1313,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
xchg(&warned_disk_usage, 1))
return;
- pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
+ bch_err(c, "disk usage increased more than %llu sectors reserved",
+ disk_res_sectors);
trans_for_each_update_iter(trans, i) {
struct btree_iter *iter = i->iter;
@@ -1295,7 +1329,7 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
node_iter = iter->l[0].iter;
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
- KEY_TYPE_discard))) {
+ KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k;
@@ -1321,16 +1355,18 @@ static int trans_get_key(struct btree_trans *trans,
struct btree_iter **iter,
struct bkey_s_c *k)
{
- unsigned i;
+ struct btree_insert_entry *i;
int ret;
- for (i = 0; i < trans->nr_updates; i++)
- if (!trans->updates[i].deferred &&
- trans->updates[i].iter->btree_id == btree_id &&
- !bkey_cmp(pos, trans->updates[i].iter->pos)) {
- *iter = trans->updates[i].iter;
- *k = bkey_i_to_s_c(trans->updates[i].k);
- return 0;
+ trans_for_each_update_iter(trans, i)
+ if (i->iter->btree_id == btree_id &&
+ (btree_node_type_is_extents(btree_id)
+ ? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
+ bkey_cmp(pos, i->k->k.p) < 0
+ : !bkey_cmp(pos, i->iter->pos))) {
+ *iter = i->iter;
+ *k = bkey_i_to_s_c(i->k);
+ return 1;
}
*iter = __bch2_trans_get_iter(trans, btree_id, pos,
@@ -1338,6 +1374,8 @@ static int trans_get_key(struct btree_trans *trans,
if (IS_ERR(*iter))
return PTR_ERR(*iter);
+ bch2_trans_iter_free_on_commit(trans, *iter);
+
*k = bch2_btree_iter_peek_slot(*iter);
ret = bkey_err(*k);
if (ret)
@@ -1349,8 +1387,8 @@ static void *trans_update_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned u64s)
{
+ struct btree_insert_entry *i;
struct bkey_i *new_k;
- unsigned i;
new_k = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
if (IS_ERR(new_k))
@@ -1359,19 +1397,13 @@ static void *trans_update_key(struct btree_trans *trans,
bkey_init(&new_k->k);
new_k->k.p = iter->pos;
- for (i = 0; i < trans->nr_updates; i++)
- if (!trans->updates[i].deferred &&
- trans->updates[i].iter == iter) {
- trans->updates[i].k = new_k;
+ trans_for_each_update_iter(trans, i)
+ if (i->iter == iter) {
+ i->k = new_k;
return new_k;
}
- bch2_trans_update(trans, ((struct btree_insert_entry) {
- .iter = iter,
- .k = new_k,
- .triggered = true,
- }));
-
+ bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, new_k));
return new_k;
}
@@ -1385,43 +1417,76 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
struct bkey_s_c k;
struct bkey_alloc_unpacked u;
struct bkey_i_alloc *a;
+ unsigned old;
bool overflow;
int ret;
ret = trans_get_key(trans, BTREE_ID_ALLOC,
POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)),
&iter, &k);
- if (ret)
+ if (ret < 0)
return ret;
- if (k.k->type != KEY_TYPE_alloc) {
- bch_err_ratelimited(c, "pointer to nonexistent bucket %u:%zu",
- p.ptr.dev,
- PTR_BUCKET_NR(ca, &p.ptr));
- ret = -1;
- goto out;
- }
+ if (!ret) {
+ /*
+ * During journal replay, and if gc repairs alloc info at
+ * runtime, the alloc info in the btree might not be up to date
+ * yet - so, trust the in memory mark:
+ */
+ struct bucket *g;
+ struct bucket_mark m;
- u = bch2_alloc_unpack(k);
+ percpu_down_read(&c->mark_lock);
+ g = bucket(ca, iter->pos.offset);
+ m = READ_ONCE(g->mark);
+ u = alloc_mem_to_key(g, m);
+ percpu_up_read(&c->mark_lock);
+ } else {
+ /*
+ * Unless we're already updating that key:
+ */
+ if (k.k->type != KEY_TYPE_alloc) {
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ "pointer to nonexistent bucket %llu:%llu",
+ iter->pos.inode, iter->pos.offset);
+ ret = -1;
+ goto out;
+ }
+
+ u = bch2_alloc_unpack(k);
+ }
if (gen_after(u.gen, p.ptr.gen)) {
ret = 1;
goto out;
}
- if (!p.ptr.cached)
+ if (u.data_type && u.data_type != data_type) {
+ bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+ "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s",
+ iter->pos.inode, iter->pos.offset,
+ u.gen,
+ bch2_data_types[u.data_type],
+ bch2_data_types[data_type]);
+ ret = -1;
+ goto out;
+ }
+
+ if (!p.ptr.cached) {
+ old = u.dirty_sectors;
overflow = checked_add(u.dirty_sectors, sectors);
- else
+ } else {
+ old = u.cached_sectors;
overflow = checked_add(u.cached_sectors, sectors);
+ }
u.data_type = u.dirty_sectors || u.cached_sectors
? data_type : 0;
bch2_fs_inconsistent_on(overflow, c,
"bucket sector count overflow: %u + %lli > U16_MAX",
- !p.ptr.cached
- ? u.dirty_sectors
- : u.cached_sectors, sectors);
+ old, sectors);
+ BUG_ON(overflow);
a = trans_update_key(trans, iter, BKEY_ALLOC_U64s_MAX);
ret = PTR_ERR_OR_ZERO(a);
@@ -1440,6 +1505,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
struct bch_extent_stripe_ptr p,
s64 sectors, enum bch_data_type data_type)
{
+ struct bch_fs *c = trans->c;
struct bch_replicas_padded r;
struct btree_iter *iter;
struct bkey_i *new_k;
@@ -1449,17 +1515,15 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
s64 parity_sectors;
int ret = 0;
- BUG_ON(!sectors);
-
ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx), &iter, &k);
- if (ret)
+ if (ret < 0)
return ret;
if (k.k->type != KEY_TYPE_stripe) {
- bch_err_ratelimited(trans->c,
- "pointer to nonexistent stripe %llu",
- (u64) p.idx);
- ret = -1;
+ bch2_fs_inconsistent(c,
+ "pointer to nonexistent stripe %llu",
+ (u64) p.idx);
+ ret = -EIO;
goto out;
}
@@ -1491,8 +1555,9 @@ out:
}
static int bch2_trans_mark_extent(struct btree_trans *trans,
- struct bkey_s_c k,
- s64 sectors, enum bch_data_type data_type)
+ struct bkey_s_c k, unsigned offset,
+ s64 sectors, unsigned flags,
+ enum bch_data_type data_type)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@@ -1512,7 +1577,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
s64 disk_sectors = data_type == BCH_DATA_BTREE
? sectors
- : ptr_disk_sectors_delta(p, sectors);
+ : ptr_disk_sectors_delta(p, offset, sectors, flags);
ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
data_type);
@@ -1522,7 +1587,7 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
stale = ret > 0;
if (p.ptr.cached) {
- if (disk_sectors && !stale)
+ if (!stale)
update_cached_sectors_list(trans, p.ptr.dev,
disk_sectors);
} else if (!p.ec_nr) {
@@ -1540,15 +1605,92 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
}
}
- if (dirty_sectors)
- update_replicas_list(trans, &r.e, dirty_sectors);
+ update_replicas_list(trans, &r.e, dirty_sectors);
return 0;
}
-int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
+static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
+ struct bkey_s_c_reflink_p p,
+ u64 idx, unsigned sectors,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter *iter;
+ struct bkey_i *new_k;
+ struct bkey_s_c k;
+ struct bkey_i_reflink_v *r_v;
+ s64 ret;
+
+ ret = trans_get_key(trans, BTREE_ID_REFLINK,
+ POS(0, idx), &iter, &k);
+ if (ret < 0)
+ return ret;
+
+ if (k.k->type != KEY_TYPE_reflink_v) {
+ bch2_fs_inconsistent(c,
+ "%llu:%llu len %u points to nonexistent indirect extent %llu",
+ p.k->p.inode, p.k->p.offset, p.k->size, idx);
+ ret = -EIO;
+ goto err;
+ }
+
+ if ((flags & BCH_BUCKET_MARK_OVERWRITE) &&
+ (bkey_start_offset(k.k) < idx ||
+ k.k->p.offset > idx + sectors))
+ goto out;
+
+ bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
+ BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
+
+ new_k = trans_update_key(trans, iter, k.k->u64s);
+ ret = PTR_ERR_OR_ZERO(new_k);
+ if (ret)
+ goto err;
+
+ bkey_reassemble(new_k, k);
+ r_v = bkey_i_to_reflink_v(new_k);
+
+ le64_add_cpu(&r_v->v.refcount,
+ !(flags & BCH_BUCKET_MARK_OVERWRITE) ? 1 : -1);
+
+ if (!r_v->v.refcount) {
+ r_v->k.type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(&r_v->k, 0);
+ }
+out:
+ ret = k.k->p.offset - idx;
+err:
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
+static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+ struct bkey_s_c_reflink_p p, unsigned offset,
s64 sectors, unsigned flags)
{
+ u64 idx = le64_to_cpu(p.v->idx) + offset;
+ s64 ret = 0;
+
+ sectors = abs(sectors);
+ BUG_ON(offset + sectors > p.k->size);
+
+ while (sectors) {
+ ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
+ if (ret < 0)
+ break;
+
+ idx += ret;
+ sectors = max_t(s64, 0LL, sectors - ret);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
+ unsigned offset, s64 sectors, unsigned flags)
+{
struct replicas_delta_list *d;
struct bch_fs *c = trans->c;
@@ -1558,11 +1700,12 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
? c->opts.btree_node_size
: -c->opts.btree_node_size;
- return bch2_trans_mark_extent(trans, k, sectors,
- BCH_DATA_BTREE);
+ return bch2_trans_mark_extent(trans, k, offset, sectors,
+ flags, BCH_DATA_BTREE);
case KEY_TYPE_extent:
- return bch2_trans_mark_extent(trans, k, sectors,
- BCH_DATA_USER);
+ case KEY_TYPE_reflink_v:
+ return bch2_trans_mark_extent(trans, k, offset, sectors,
+ flags, BCH_DATA_USER);
case KEY_TYPE_inode:
d = replicas_deltas_realloc(trans, 0);
@@ -1584,6 +1727,10 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
d->fs_usage.persistent_reserved[replicas - 1] += sectors;
return 0;
}
+ case KEY_TYPE_reflink_p:
+ return bch2_trans_mark_reflink_p(trans,
+ bkey_s_c_to_reflink_p(k),
+ offset, sectors, flags);
default:
return 0;
}
@@ -1601,19 +1748,21 @@ int bch2_trans_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id))
return 0;
- ret = bch2_trans_mark_key(trans,
- bkey_i_to_s_c(insert),
- bpos_min(insert->k.p, b->key.k.p).offset -
- bkey_start_offset(&insert->k),
- BCH_BUCKET_MARK_INSERT);
+ ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert),
+ 0, insert->k.size, BCH_BUCKET_MARK_INSERT);
if (ret)
return ret;
+ if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
+ return 0;
+
while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
KEY_TYPE_discard))) {
struct bkey unpacked;
struct bkey_s_c k;
+ unsigned offset = 0;
s64 sectors = 0;
+ unsigned flags = BCH_BUCKET_MARK_OVERWRITE;
k = bkey_disassemble(b, _k, &unpacked);
@@ -1625,35 +1774,32 @@ int bch2_trans_mark_update(struct btree_trans *trans,
if (btree_node_is_extents(b)) {
switch (bch2_extent_overlap(&insert->k, k.k)) {
case BCH_EXTENT_OVERLAP_ALL:
+ offset = 0;
sectors = -((s64) k.k->size);
break;
case BCH_EXTENT_OVERLAP_BACK:
+ offset = bkey_start_offset(&insert->k) -
+ bkey_start_offset(k.k);
sectors = bkey_start_offset(&insert->k) -
k.k->p.offset;
break;
case BCH_EXTENT_OVERLAP_FRONT:
+ offset = 0;
sectors = bkey_start_offset(k.k) -
insert->k.p.offset;
break;
case BCH_EXTENT_OVERLAP_MIDDLE:
- sectors = k.k->p.offset - insert->k.p.offset;
- BUG_ON(sectors <= 0);
-
- ret = bch2_trans_mark_key(trans, k, sectors,
- BCH_BUCKET_MARK_INSERT);
- if (ret)
- return ret;
-
- sectors = bkey_start_offset(&insert->k) -
- k.k->p.offset;
+ offset = bkey_start_offset(&insert->k) -
+ bkey_start_offset(k.k);
+ sectors = -((s64) insert->k.size);
+ flags |= BCH_BUCKET_MARK_OVERWRITE_SPLIT;
break;
}
BUG_ON(sectors >= 0);
}
- ret = bch2_trans_mark_key(trans, k, sectors,
- BCH_BUCKET_MARK_OVERWRITE);
+ ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
if (ret)
return ret;
@@ -1761,7 +1907,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
{
struct bucket_array *buckets = NULL, *old_buckets = NULL;
unsigned long *buckets_nouse = NULL;
- unsigned long *buckets_written = NULL;
alloc_fifo free[RESERVE_NR];
alloc_fifo free_inc;
alloc_heap alloc_heap;
@@ -1790,9 +1935,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) ||
- !(buckets_written = kvpmalloc(BITS_TO_LONGS(nbuckets) *
- sizeof(unsigned long),
- GFP_KERNEL|__GFP_ZERO)) ||
!init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_MOVINGGC],
copygc_reserve, GFP_KERNEL) ||
@@ -1824,16 +1966,12 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
memcpy(buckets_nouse,
ca->buckets_nouse,
BITS_TO_LONGS(n) * sizeof(unsigned long));
- memcpy(buckets_written,
- ca->buckets_written,
- BITS_TO_LONGS(n) * sizeof(unsigned long));
}
rcu_assign_pointer(ca->buckets[0], buckets);
buckets = old_buckets;
swap(ca->buckets_nouse, buckets_nouse);
- swap(ca->buckets_written, buckets_written);
if (resize)
percpu_up_write(&c->mark_lock);
@@ -1873,8 +2011,6 @@ err:
free_fifo(&free[i]);
kvpfree(buckets_nouse,
BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
- kvpfree(buckets_written,
- BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
if (buckets)
call_rcu(&old_buckets->rcu, buckets_free_rcu);
@@ -1890,8 +2026,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
free_fifo(&ca->free_inc);
for (i = 0; i < RESERVE_NR; i++)
free_fifo(&ca->free[i]);
- kvpfree(ca->buckets_written,
- BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(ca->buckets_nouse,
BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
kvpfree(rcu_dereference_protected(ca->buckets[0], 1),