diff options
Diffstat (limited to 'libbcachefs/extent_update.c')
-rw-r--r-- | libbcachefs/extent_update.c | 410 |
1 files changed, 27 insertions, 383 deletions
diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c index 846d77dc..fa6c0698 100644 --- a/libbcachefs/extent_update.c +++ b/libbcachefs/extent_update.c @@ -39,6 +39,12 @@ static int count_iters_for_insert(struct btree_trans *trans, { int ret = 0; + /* + * The extent update path requires an _additional_ iterator for each + * extent we're inserting and overwriting: + */ + *nr_iters += 1; + switch (k.k->type) { case KEY_TYPE_extent: case KEY_TYPE_reflink_v: @@ -167,402 +173,40 @@ int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter) enum btree_insert_ret bch2_extent_can_insert(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_i *insert, - unsigned *u64s) + struct bkey_i *insert) { struct btree_iter_level *l = &iter->l[0]; struct btree_node_iter node_iter = l->iter; struct bkey_packed *_k; + struct bkey_s_c k; struct bkey unpacked; int sectors; - while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, - KEY_TYPE_discard))) { - struct bkey_s_c k = bkey_disassemble(l->b, _k, &unpacked); - enum bch_extent_overlap overlap = - bch2_extent_overlap(&insert->k, k.k); - - if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) - break; - - overlap = bch2_extent_overlap(&insert->k, k.k); - - /* - * If we're overwriting an existing extent, we may need to emit - * a whiteout - unless we're inserting a new extent at the same - * position: - */ - if (k.k->needs_whiteout && - (!bkey_whiteout(&insert->k) || - bkey_cmp(k.k->p, insert->k.p))) - *u64s += BKEY_U64s; - - /* - * If we're partially overwriting an existing extent which has - * been written out to disk, we'll need to emit a new version of - * that extent: - */ - if (bkey_written(l->b, _k) && - overlap != BCH_EXTENT_OVERLAP_ALL) - *u64s += _k->u64s; - - /* And we may be splitting an existing extent: */ - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE) - *u64s += _k->u64s; - - if (overlap == BCH_EXTENT_OVERLAP_MIDDLE && - (sectors = bch2_bkey_sectors_compressed(k))) { - int flags = trans->flags & BTREE_INSERT_NOFAIL - ? BCH_DISK_RESERVATION_NOFAIL : 0; - - switch (bch2_disk_reservation_add(trans->c, - trans->disk_res, - sectors, flags)) { - case 0: - break; - case -ENOSPC: - return BTREE_INSERT_ENOSPC; - default: - BUG(); - } - } - - if (overlap == BCH_EXTENT_OVERLAP_FRONT || - overlap == BCH_EXTENT_OVERLAP_MIDDLE) - break; - - bch2_btree_node_iter_advance(&node_iter, l->b); - } - - return BTREE_INSERT_OK; -} - -static void verify_extent_nonoverlapping(struct bch_fs *c, - struct btree *b, - struct btree_node_iter *_iter, - struct bkey_i *insert) -{ -#ifdef CONFIG_BCACHEFS_DEBUG - struct btree_node_iter iter; - struct bkey_packed *k; - struct bkey uk; - - if (!expensive_debug_checks(c)) - return; - - iter = *_iter; - k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); - BUG_ON(k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0)); - - iter = *_iter; - k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard); -#if 0 - BUG_ON(k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0); -#else - if (k && - (uk = bkey_unpack_key(b, k), - bkey_cmp(insert->k.p, bkey_start_pos(&uk))) > 0) { - char buf1[100]; - char buf2[100]; - - bch2_bkey_to_text(&PBUF(buf1), &insert->k); - bch2_bkey_to_text(&PBUF(buf2), &uk); - - bch2_dump_btree_node(b); - panic("insert > next :\n" - "insert %s\n" - "next %s\n", - buf1, buf2); - } -#endif - -#endif -} - -static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, - struct bkey_i *insert) -{ - struct btree_iter_level *l = &iter->l[0]; - struct bkey_packed *k = - bch2_btree_node_iter_bset_pos(&l->iter, l->b, bset_tree_last(l->b)); - - BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b)); - - EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size); - verify_extent_nonoverlapping(c, l->b, &l->iter, insert); - - if (debug_check_bkeys(c)) - bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert)); - - bch2_bset_insert(l->b, &l->iter, k, insert, 0); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s); -} - -static void pack_push_whiteout(struct bch_fs *c, struct btree *b, - struct bpos pos) -{ - struct bkey_packed k; - - if (!bkey_pack_pos(&k, pos, b)) { - struct bkey_i tmp; - - bkey_init(&tmp.k); - tmp.k.p = pos; - bkey_copy(&k, &tmp); - } - - k.needs_whiteout = true; - push_whiteout(c, b, &k); -} - -static void -extent_drop(struct bch_fs *c, struct btree_iter *iter, - struct bkey_packed *_k, struct bkey_s k) -{ - struct btree_iter_level *l = &iter->l[0]; - - if (!bkey_whiteout(k.k)) - btree_account_key_drop(l->b, _k); - - k.k->size = 0; - k.k->type = KEY_TYPE_deleted; - - if (!btree_node_old_extent_overwrite(l->b) && - k.k->needs_whiteout) { - pack_push_whiteout(c, l->b, k.k->p); - k.k->needs_whiteout = false; - } - - if (_k >= btree_bset_last(l->b)->start) { - unsigned u64s = _k->u64s; - - bch2_bset_delete(l->b, _k, _k->u64s); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, _k, u64s, 0); - } else { - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - } -} - -static void -extent_squash(struct bch_fs *c, struct btree_iter *iter, - struct bkey_i *insert, - struct bkey_packed *_k, struct bkey_s k, - enum bch_extent_overlap overlap) -{ - struct btree_iter_level *l = &iter->l[0]; - struct bkey_on_stack tmp, split; - - bkey_on_stack_init(&tmp); - bkey_on_stack_init(&split); - - if (!btree_node_old_extent_overwrite(l->b)) { - if (!bkey_whiteout(&insert->k) && - !bkey_cmp(k.k->p, insert->k.p)) { - insert->k.needs_whiteout = k.k->needs_whiteout; - k.k->needs_whiteout = false; - } - } else { - insert->k.needs_whiteout |= k.k->needs_whiteout; - } - - switch (overlap) { - case BCH_EXTENT_OVERLAP_FRONT: - if (bkey_written(l->b, _k)) { - bkey_on_stack_reassemble(&tmp, c, k.s_c); - bch2_cut_front(insert->k.p, tmp.k); - - /* - * needs_whiteout was propagated to new version of @k, - * @tmp: - */ - if (!btree_node_old_extent_overwrite(l->b)) - k.k->needs_whiteout = false; - - extent_drop(c, iter, _k, k); - extent_bset_insert(c, iter, tmp.k); - } else { - btree_keys_account_val_delta(l->b, _k, - bch2_cut_front_s(insert->k.p, k)); - - extent_save(l->b, _k, k.k); - /* - * No need to call bset_fix_invalidated_key, start of - * extent changed but extents are indexed by where they - * end - */ - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - } - break; - case BCH_EXTENT_OVERLAP_BACK: - if (bkey_written(l->b, _k)) { - bkey_on_stack_reassemble(&tmp, c, k.s_c); - bch2_cut_back(bkey_start_pos(&insert->k), tmp.k); - - /* - * @tmp has different position than @k, needs_whiteout - * should not be propagated: - */ - if (!btree_node_old_extent_overwrite(l->b)) - tmp.k->k.needs_whiteout = false; - - extent_drop(c, iter, _k, k); - extent_bset_insert(c, iter, tmp.k); - } else { - /* - * position of @k is changing, emit a whiteout if - * needs_whiteout is set: - */ - if (!btree_node_old_extent_overwrite(l->b) && - k.k->needs_whiteout) { - pack_push_whiteout(c, l->b, k.k->p); - k.k->needs_whiteout = false; - } - - btree_keys_account_val_delta(l->b, _k, - bch2_cut_back_s(bkey_start_pos(&insert->k), k)); - extent_save(l->b, _k, k.k); - - bch2_bset_fix_invalidated_key(l->b, _k); - bch2_btree_node_iter_fix(iter, l->b, &l->iter, - _k, _k->u64s, _k->u64s); - } - break; - case BCH_EXTENT_OVERLAP_ALL: - extent_drop(c, iter, _k, k); - break; - case BCH_EXTENT_OVERLAP_MIDDLE: - bkey_on_stack_reassemble(&split, c, k.s_c); - bch2_cut_back(bkey_start_pos(&insert->k), split.k); - - if (!btree_node_old_extent_overwrite(l->b)) - split.k->k.needs_whiteout = false; - - /* this is identical to BCH_EXTENT_OVERLAP_FRONT: */ - if (bkey_written(l->b, _k)) { - bkey_on_stack_reassemble(&tmp, c, k.s_c); - bch2_cut_front(insert->k.p, tmp.k); - - if (!btree_node_old_extent_overwrite(l->b)) - k.k->needs_whiteout = false; - - extent_drop(c, iter, _k, k); - extent_bset_insert(c, iter, tmp.k); - } else { - btree_keys_account_val_delta(l->b, _k, - bch2_cut_front_s(insert->k.p, k)); - - extent_save(l->b, _k, k.k); - bch2_btree_iter_fix_key_modified(iter, l->b, _k); - } - - extent_bset_insert(c, iter, split.k); - break; - } - - bkey_on_stack_exit(&split, c); - bkey_on_stack_exit(&tmp, c); -} + _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, + KEY_TYPE_discard); + if (!_k) + return BTREE_INSERT_OK; -/** - * bch_extent_insert_fixup - insert a new extent and deal with overlaps - * - * this may result in not actually doing the insert, or inserting some subset - * of the insert key. For cmpxchg operations this is where that logic lives. - * - * All subsets of @insert that need to be inserted are inserted using - * bch2_btree_insert_and_journal(). If @b or @res fills up, this function - * returns false, setting @iter->pos for the prefix of @insert that actually got - * inserted. - * - * BSET INVARIANTS: this function is responsible for maintaining all the - * invariants for bsets of extents in memory. things get really hairy with 0 - * size extents - * - * within one bset: - * - * bkey_start_pos(bkey_next(k)) >= k - * or bkey_start_offset(bkey_next(k)) >= k->offset - * - * i.e. strict ordering, no overlapping extents. - * - * multiple bsets (i.e. full btree node): - * - * ∀ k, j - * k.size != 0 ∧ j.size != 0 → - * ¬ (k > bkey_start_pos(j) ∧ k < j) - * - * i.e. no two overlapping keys _of nonzero size_ - * - * We can't realistically maintain this invariant for zero size keys because of - * the key merging done in bch2_btree_insert_key() - for two mergeable keys k, j - * there may be another 0 size key between them in another bset, and it will - * thus overlap with the merged key. - * - * In addition, the end of iter->pos indicates how much has been processed. - * If the end of iter->pos is not the same as the end of insert, then - * key insertion needs to continue/be retried. - */ -void bch2_insert_fixup_extent(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_i *insert) -{ - struct bch_fs *c = trans->c; - struct btree_iter_level *l = &iter->l[0]; - struct btree_node_iter node_iter = l->iter; - bool do_update = !bkey_whiteout(&insert->k); - struct bkey_packed *_k; - struct bkey unpacked; + k = bkey_disassemble(l->b, _k, &unpacked); - EBUG_ON(iter->level); - EBUG_ON(!insert->k.size); - EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k))); + /* Check if we're splitting a compressed extent: */ - while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b, - KEY_TYPE_discard))) { - struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked); - enum bch_extent_overlap overlap = - bch2_extent_overlap(&insert->k, k.k); + if (bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k)) > 0 && + bkey_cmp(insert->k.p, k.k->p) < 0 && + (sectors = bch2_bkey_sectors_compressed(k))) { + int flags = trans->flags & BTREE_INSERT_NOFAIL + ? BCH_DISK_RESERVATION_NOFAIL : 0; - if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0) + switch (bch2_disk_reservation_add(trans->c, trans->disk_res, + sectors, flags)) { + case 0: break; - - if (!bkey_whiteout(k.k)) - do_update = true; - - if (!do_update) { - struct bpos cur_end = bpos_min(insert->k.p, k.k->p); - - bch2_cut_front(cur_end, insert); - bch2_btree_iter_set_pos_same_leaf(iter, cur_end); - } else { - extent_squash(c, iter, insert, _k, k, overlap); + case -ENOSPC: + return BTREE_INSERT_ENOSPC; + default: + BUG(); } - - node_iter = l->iter; - - if (overlap == BCH_EXTENT_OVERLAP_FRONT || - overlap == BCH_EXTENT_OVERLAP_MIDDLE) - break; } - l->iter = node_iter; - bch2_btree_iter_set_pos_same_leaf(iter, insert->k.p); - - if (do_update) { - if (insert->k.type == KEY_TYPE_deleted) - insert->k.type = KEY_TYPE_discard; - - if (!bkey_whiteout(&insert->k) || - btree_node_old_extent_overwrite(l->b)) - extent_bset_insert(c, iter, insert); - - bch2_btree_journal_key(trans, iter, insert); - } - - bch2_cut_front(insert->k.p, insert); + return BTREE_INSERT_OK; } |