summaryrefslogtreecommitdiff
path: root/fs/bcachefs/extent_update.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/extent_update.c')
-rw-r--r--fs/bcachefs/extent_update.c379
1 files changed, 208 insertions, 171 deletions
diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
index 742b4d78cb3a..846d77dc2530 100644
--- a/fs/bcachefs/extent_update.c
+++ b/fs/bcachefs/extent_update.c
@@ -166,54 +166,72 @@ int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
enum btree_insert_ret
bch2_extent_can_insert(struct btree_trans *trans,
- struct btree_insert_entry *insert,
+ struct btree_iter *iter,
+ struct bkey_i *insert,
unsigned *u64s)
{
- struct btree_iter_level *l = &insert->iter->l[0];
+ struct btree_iter_level *l = &iter->l[0];
struct btree_node_iter node_iter = l->iter;
- enum bch_extent_overlap overlap;
struct bkey_packed *_k;
struct bkey unpacked;
- struct bkey_s_c k;
int sectors;
- /*
- * We avoid creating whiteouts whenever possible when deleting, but
- * those optimizations mean we may potentially insert two whiteouts
- * instead of one (when we overlap with the front of one extent and the
- * back of another):
- */
- if (bkey_whiteout(&insert->k->k))
- *u64s += BKEY_U64s;
-
- _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
- KEY_TYPE_discard);
- if (!_k)
- return BTREE_INSERT_OK;
-
- k = bkey_disassemble(l->b, _k, &unpacked);
-
- overlap = bch2_extent_overlap(&insert->k->k, k.k);
-
- /* account for having to split existing extent: */
- if (overlap == BCH_EXTENT_OVERLAP_MIDDLE)
- *u64s += _k->u64s;
-
- if (overlap == BCH_EXTENT_OVERLAP_MIDDLE &&
- (sectors = bch2_bkey_sectors_compressed(k))) {
- int flags = trans->flags & BTREE_INSERT_NOFAIL
- ? BCH_DISK_RESERVATION_NOFAIL : 0;
-
- switch (bch2_disk_reservation_add(trans->c,
- trans->disk_res,
- sectors, flags)) {
- case 0:
+ while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
+ KEY_TYPE_discard))) {
+ struct bkey_s_c k = bkey_disassemble(l->b, _k, &unpacked);
+ enum bch_extent_overlap overlap =
+ bch2_extent_overlap(&insert->k, k.k);
+
+ if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
break;
- case -ENOSPC:
- return BTREE_INSERT_ENOSPC;
- default:
- BUG();
+
+ overlap = bch2_extent_overlap(&insert->k, k.k);
+
+ /*
+ * If we're overwriting an existing extent, we may need to emit
+ * a whiteout - unless we're inserting a new extent at the same
+ * position:
+ */
+ if (k.k->needs_whiteout &&
+ (!bkey_whiteout(&insert->k) ||
+ bkey_cmp(k.k->p, insert->k.p)))
+ *u64s += BKEY_U64s;
+
+ /*
+ * If we're partially overwriting an existing extent which has
+ * been written out to disk, we'll need to emit a new version of
+ * that extent:
+ */
+ if (bkey_written(l->b, _k) &&
+ overlap != BCH_EXTENT_OVERLAP_ALL)
+ *u64s += _k->u64s;
+
+ /* And we may be splitting an existing extent: */
+ if (overlap == BCH_EXTENT_OVERLAP_MIDDLE)
+ *u64s += _k->u64s;
+
+ if (overlap == BCH_EXTENT_OVERLAP_MIDDLE &&
+ (sectors = bch2_bkey_sectors_compressed(k))) {
+ int flags = trans->flags & BTREE_INSERT_NOFAIL
+ ? BCH_DISK_RESERVATION_NOFAIL : 0;
+
+ switch (bch2_disk_reservation_add(trans->c,
+ trans->disk_res,
+ sectors, flags)) {
+ case 0:
+ break;
+ case -ENOSPC:
+ return BTREE_INSERT_ENOSPC;
+ default:
+ BUG();
+ }
}
+
+ if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
+ overlap == BCH_EXTENT_OVERLAP_MIDDLE)
+ break;
+
+ bch2_btree_node_iter_advance(&node_iter, l->b);
}
return BTREE_INSERT_OK;
@@ -284,6 +302,52 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
}
+static void pack_push_whiteout(struct bch_fs *c, struct btree *b,
+ struct bpos pos)
+{
+ struct bkey_packed k;
+
+ if (!bkey_pack_pos(&k, pos, b)) {
+ struct bkey_i tmp;
+
+ bkey_init(&tmp.k);
+ tmp.k.p = pos;
+ bkey_copy(&k, &tmp);
+ }
+
+ k.needs_whiteout = true;
+ push_whiteout(c, b, &k);
+}
+
+static void
+extent_drop(struct bch_fs *c, struct btree_iter *iter,
+ struct bkey_packed *_k, struct bkey_s k)
+{
+ struct btree_iter_level *l = &iter->l[0];
+
+ if (!bkey_whiteout(k.k))
+ btree_account_key_drop(l->b, _k);
+
+ k.k->size = 0;
+ k.k->type = KEY_TYPE_deleted;
+
+ if (!btree_node_old_extent_overwrite(l->b) &&
+ k.k->needs_whiteout) {
+ pack_push_whiteout(c, l->b, k.k->p);
+ k.k->needs_whiteout = false;
+ }
+
+ if (_k >= btree_bset_last(l->b)->start) {
+ unsigned u64s = _k->u64s;
+
+ bch2_bset_delete(l->b, _k, _k->u64s);
+ bch2_btree_node_iter_fix(iter, l->b, &l->iter, _k, u64s, 0);
+ } else {
+ extent_save(l->b, _k, k.k);
+ bch2_btree_iter_fix_key_modified(iter, l->b, _k);
+ }
+}
+
static void
extent_squash(struct bch_fs *c, struct btree_iter *iter,
struct bkey_i *insert,
@@ -291,95 +355,117 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
enum bch_extent_overlap overlap)
{
struct btree_iter_level *l = &iter->l[0];
- int u64s_delta;
+ struct bkey_on_stack tmp, split;
+
+ bkey_on_stack_init(&tmp);
+ bkey_on_stack_init(&split);
+
+ if (!btree_node_old_extent_overwrite(l->b)) {
+ if (!bkey_whiteout(&insert->k) &&
+ !bkey_cmp(k.k->p, insert->k.p)) {
+ insert->k.needs_whiteout = k.k->needs_whiteout;
+ k.k->needs_whiteout = false;
+ }
+ } else {
+ insert->k.needs_whiteout |= k.k->needs_whiteout;
+ }
switch (overlap) {
case BCH_EXTENT_OVERLAP_FRONT:
- /* insert overlaps with start of k: */
- u64s_delta = bch2_cut_front_s(insert->k.p, k);
- btree_keys_account_val_delta(l->b, _k, u64s_delta);
+ if (bkey_written(l->b, _k)) {
+ bkey_on_stack_reassemble(&tmp, c, k.s_c);
+ bch2_cut_front(insert->k.p, tmp.k);
+
+ /*
+ * needs_whiteout was propagated to new version of @k,
+ * @tmp:
+ */
+ if (!btree_node_old_extent_overwrite(l->b))
+ k.k->needs_whiteout = false;
+
+ extent_drop(c, iter, _k, k);
+ extent_bset_insert(c, iter, tmp.k);
+ } else {
+ btree_keys_account_val_delta(l->b, _k,
+ bch2_cut_front_s(insert->k.p, k));
- EBUG_ON(bkey_deleted(k.k));
- extent_save(l->b, _k, k.k);
- bch2_btree_iter_fix_key_modified(iter, l->b, _k);
+ extent_save(l->b, _k, k.k);
+ /*
+ * No need to call bset_fix_invalidated_key, start of
+ * extent changed but extents are indexed by where they
+ * end
+ */
+ bch2_btree_iter_fix_key_modified(iter, l->b, _k);
+ }
break;
-
case BCH_EXTENT_OVERLAP_BACK:
- /* insert overlaps with end of k: */
- u64s_delta = bch2_cut_back_s(bkey_start_pos(&insert->k), k);
- btree_keys_account_val_delta(l->b, _k, u64s_delta);
+ if (bkey_written(l->b, _k)) {
+ bkey_on_stack_reassemble(&tmp, c, k.s_c);
+ bch2_cut_back(bkey_start_pos(&insert->k), tmp.k);
+
+ /*
+ * @tmp has different position than @k, needs_whiteout
+ * should not be propagated:
+ */
+ if (!btree_node_old_extent_overwrite(l->b))
+ tmp.k->k.needs_whiteout = false;
+
+ extent_drop(c, iter, _k, k);
+ extent_bset_insert(c, iter, tmp.k);
+ } else {
+ /*
+ * position of @k is changing, emit a whiteout if
+ * needs_whiteout is set:
+ */
+ if (!btree_node_old_extent_overwrite(l->b) &&
+ k.k->needs_whiteout) {
+ pack_push_whiteout(c, l->b, k.k->p);
+ k.k->needs_whiteout = false;
+ }
- EBUG_ON(bkey_deleted(k.k));
- extent_save(l->b, _k, k.k);
+ btree_keys_account_val_delta(l->b, _k,
+ bch2_cut_back_s(bkey_start_pos(&insert->k), k));
+ extent_save(l->b, _k, k.k);
- /*
- * As the auxiliary tree is indexed by the end of the
- * key and we've just changed the end, update the
- * auxiliary tree.
- */
- bch2_bset_fix_invalidated_key(l->b, _k);
- bch2_btree_node_iter_fix(iter, l->b, &l->iter,
- _k, _k->u64s, _k->u64s);
+ bch2_bset_fix_invalidated_key(l->b, _k);
+ bch2_btree_node_iter_fix(iter, l->b, &l->iter,
+ _k, _k->u64s, _k->u64s);
+ }
+ break;
+ case BCH_EXTENT_OVERLAP_ALL:
+ extent_drop(c, iter, _k, k);
break;
+ case BCH_EXTENT_OVERLAP_MIDDLE:
+ bkey_on_stack_reassemble(&split, c, k.s_c);
+ bch2_cut_back(bkey_start_pos(&insert->k), split.k);
- case BCH_EXTENT_OVERLAP_ALL: {
- /* The insert key completely covers k, invalidate k */
- if (!bkey_whiteout(k.k))
- btree_account_key_drop(l->b, _k);
+ if (!btree_node_old_extent_overwrite(l->b))
+ split.k->k.needs_whiteout = false;
- k.k->size = 0;
- k.k->type = KEY_TYPE_deleted;
+ /* this is identical to BCH_EXTENT_OVERLAP_FRONT: */
+ if (bkey_written(l->b, _k)) {
+ bkey_on_stack_reassemble(&tmp, c, k.s_c);
+ bch2_cut_front(insert->k.p, tmp.k);
- if (_k >= btree_bset_last(l->b)->start) {
- unsigned u64s = _k->u64s;
+ if (!btree_node_old_extent_overwrite(l->b))
+ k.k->needs_whiteout = false;
- bch2_bset_delete(l->b, _k, _k->u64s);
- bch2_btree_node_iter_fix(iter, l->b, &l->iter,
- _k, u64s, 0);
+ extent_drop(c, iter, _k, k);
+ extent_bset_insert(c, iter, tmp.k);
} else {
+ btree_keys_account_val_delta(l->b, _k,
+ bch2_cut_front_s(insert->k.p, k));
+
extent_save(l->b, _k, k.k);
bch2_btree_iter_fix_key_modified(iter, l->b, _k);
}
- break;
- }
- case BCH_EXTENT_OVERLAP_MIDDLE: {
- struct bkey_on_stack split;
-
- bkey_on_stack_init(&split);
- bkey_on_stack_reassemble(&split, c, k.s_c);
-
- /*
- * The insert key falls 'in the middle' of k
- * The insert key splits k in 3:
- * - start only in k, preserve
- * - middle common section, invalidate in k
- * - end only in k, preserve
- *
- * We update the old key to preserve the start,
- * insert will be the new common section,
- * we manually insert the end that we are preserving.
- *
- * modify k _before_ doing the insert (which will move
- * what k points to)
- */
- split.k->k.needs_whiteout |= bkey_written(l->b, _k);
-
- bch2_cut_back(bkey_start_pos(&insert->k), split.k);
- BUG_ON(bkey_deleted(&split.k->k));
-
- u64s_delta = bch2_cut_front_s(insert->k.p, k);
- btree_keys_account_val_delta(l->b, _k, u64s_delta);
-
- BUG_ON(bkey_deleted(k.k));
- extent_save(l->b, _k, k.k);
- bch2_btree_iter_fix_key_modified(iter, l->b, _k);
-
extent_bset_insert(c, iter, split.k);
- bkey_on_stack_exit(&split, c);
break;
}
- }
+
+ bkey_on_stack_exit(&split, c);
+ bkey_on_stack_exit(&tmp, c);
}
/**
@@ -422,17 +508,13 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
* key insertion needs to continue/be retried.
*/
void bch2_insert_fixup_extent(struct btree_trans *trans,
- struct btree_insert_entry *insert_entry)
+ struct btree_iter *iter,
+ struct bkey_i *insert)
{
struct bch_fs *c = trans->c;
- struct btree_iter *iter = insert_entry->iter;
- struct bkey_i *insert = insert_entry->k;
struct btree_iter_level *l = &iter->l[0];
struct btree_node_iter node_iter = l->iter;
- bool deleting = bkey_whiteout(&insert->k);
- bool update_journal = !deleting;
- bool update_btree = !deleting;
- struct bkey_i whiteout = *insert;
+ bool do_update = !bkey_whiteout(&insert->k);
struct bkey_packed *_k;
struct bkey unpacked;
@@ -443,7 +525,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
KEY_TYPE_discard))) {
struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked);
- struct bpos cur_end = bpos_min(insert->k.p, k.k->p);
enum bch_extent_overlap overlap =
bch2_extent_overlap(&insert->k, k.k);
@@ -451,52 +532,17 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
break;
if (!bkey_whiteout(k.k))
- update_journal = true;
+ do_update = true;
+
+ if (!do_update) {
+ struct bpos cur_end = bpos_min(insert->k.p, k.k->p);
- if (!update_journal) {
bch2_cut_front(cur_end, insert);
- bch2_cut_front(cur_end, &whiteout);
bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
- goto next;
- }
-
- /*
- * When deleting, if possible just do it by switching the type
- * of the key we're deleting, instead of creating and inserting
- * a new whiteout:
- */
- if (deleting &&
- !update_btree &&
- !bkey_cmp(insert->k.p, k.k->p) &&
- !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
- if (!bkey_whiteout(k.k)) {
- btree_account_key_drop(l->b, _k);
- _k->type = KEY_TYPE_discard;
- reserve_whiteout(l->b, _k);
- bch2_btree_iter_fix_key_modified(iter,
- l->b, _k);
- }
- break;
- }
-
- if (k.k->needs_whiteout || bkey_written(l->b, _k)) {
- insert->k.needs_whiteout = true;
- update_btree = true;
- }
-
- if (update_btree &&
- overlap == BCH_EXTENT_OVERLAP_ALL &&
- bkey_whiteout(k.k) &&
- k.k->needs_whiteout) {
- unreserve_whiteout(l->b, _k);
- _k->needs_whiteout = false;
+ } else {
+ extent_squash(c, iter, insert, _k, k, overlap);
}
- extent_squash(c, iter, insert, _k, k, overlap);
-
- if (!update_btree)
- bch2_cut_front(cur_end, insert);
-next:
node_iter = l->iter;
if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
@@ -507,24 +553,15 @@ next:
l->iter = node_iter;
bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p);
- if (update_btree) {
- if (deleting)
+ if (do_update) {
+ if (insert->k.type == KEY_TYPE_deleted)
insert->k.type = KEY_TYPE_discard;
- EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
-
- extent_bset_insert(c, iter, insert);
- }
-
- if (update_journal) {
- struct bkey_i *k = !deleting ? insert : &whiteout;
-
- if (deleting)
- k->k.type = KEY_TYPE_discard;
-
- EBUG_ON(bkey_deleted(&k->k) || !k->k.size);
+ if (!bkey_whiteout(&insert->k) ||
+ btree_node_old_extent_overwrite(l->b))
+ extent_bset_insert(c, iter, insert);
- bch2_btree_journal_key(trans, iter, k);
+ bch2_btree_journal_key(trans, iter, insert);
}
bch2_cut_front(insert->k.p, insert);