diff options
Diffstat (limited to 'libbcachefs/io.c')
-rw-r--r-- | libbcachefs/io.c | 168 |
1 files changed, 127 insertions, 41 deletions
diff --git a/libbcachefs/io.c b/libbcachefs/io.c index e3ef662e..ca891b52 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -8,6 +8,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "bkey_on_stack.h" #include "bset.h" #include "btree_update.h" #include "buckets.h" @@ -18,7 +19,7 @@ #include "disk_groups.h" #include "ec.h" #include "error.h" -#include "extents.h" +#include "extent_update.h" #include "inode.h" #include "io.h" #include "journal.h" @@ -191,8 +192,8 @@ static int sum_sector_overwrites(struct btree_trans *trans, for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) { if (!may_allocate && - bch2_bkey_nr_ptrs_allocated(old) < - bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(new))) { + bch2_bkey_nr_ptrs_fully_allocated(old) < + bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new))) { ret = -ENOSPC; break; } @@ -334,7 +335,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, /* create the biggest key we can */ bch2_key_resize(&delete.k, max_sectors); - bch2_cut_back(end, &delete.k); + bch2_cut_back(end, &delete); bch2_trans_begin_updates(trans); @@ -384,12 +385,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end, int bch2_write_index_default(struct bch_write_op *op) { struct bch_fs *c = op->c; + struct bkey_on_stack sk; struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); struct btree_trans trans; struct btree_iter *iter; int ret; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -397,13 +400,15 @@ int bch2_write_index_default(struct bch_write_op *op) BTREE_ITER_SLOTS|BTREE_ITER_INTENT); do { - BKEY_PADDED(k) tmp; + k = bch2_keylist_front(keys); - bkey_copy(&tmp.k, bch2_keylist_front(keys)); + bkey_on_stack_realloc(&sk, c, k->k.u64s); + bkey_copy(sk.k, k); + bch2_cut_front(iter->pos, sk.k); bch2_trans_begin_updates(&trans); - ret = bch2_extent_update(&trans, iter, &tmp.k, + ret = bch2_extent_update(&trans, iter, sk.k, &op->res, op_journal_seq(op), op->new_i_size, &op->i_sectors_delta); if (ret == -EINTR) @@ -411,13 +416,12 @@ int bch2_write_index_default(struct bch_write_op *op) if (ret) break; - if (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) < 0) - bch2_cut_front(iter->pos, bch2_keylist_front(keys)); - else + if (bkey_cmp(iter->pos, k->k.p) >= 0) bch2_keylist_pop_front(keys); } while (!bch2_keylist_empty(keys)); bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return ret; } @@ -519,16 +523,19 @@ static void __bch2_write_index(struct bch_write_op *op) for (src = keys->keys; src != keys->top; src = n) { n = bkey_next(src); - bkey_copy(dst, src); - bch2_bkey_drop_ptrs(bkey_i_to_s(dst), ptr, - test_bit(ptr->dev, op->failed.d)); + if (bkey_extent_is_direct_data(&src->k)) { + bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr, + test_bit(ptr->dev, op->failed.d)); - if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(dst))) { - ret = -EIO; - goto err; + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) { + ret = -EIO; + goto err; + } } + if (dst != src) + memmove_u64s_down(dst, src, src->u64s); dst = bkey_next(dst); } @@ -1086,7 +1093,7 @@ again: bio->bi_end_io = bch2_write_endio; bio->bi_private = &op->cl; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf |= REQ_OP_WRITE; if (!skip_put) closure_get(bio->bi_private); @@ -1123,6 +1130,47 @@ flush_io: goto again; } +static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) +{ + struct closure *cl = &op->cl; + struct bio *bio = &op->wbio.bio; + struct bvec_iter iter; + struct bkey_i_inline_data *id; + unsigned sectors; + int ret; + + ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys, + ARRAY_SIZE(op->inline_keys), + BKEY_U64s + DIV_ROUND_UP(data_len, 8)); + if (ret) { + op->error = ret; + goto err; + } + + sectors = bio_sectors(bio); + op->pos.offset += sectors; + + id = bkey_inline_data_init(op->insert_keys.top); + id->k.p = op->pos; + id->k.version = op->version; + id->k.size = sectors; + + iter = bio->bi_iter; + iter.bi_size = data_len; + memcpy_from_bio(id->v.data, bio, iter); + + while (data_len & 7) + id->v.data[data_len++] = '\0'; + set_bkey_val_bytes(&id->k, data_len); + bch2_keylist_push(&op->insert_keys); + + op->flags |= BCH_WRITE_WROTE_DATA_INLINE; + continue_at_nobarrier(cl, bch2_write_index, NULL); + return; +err: + bch2_write_done(&op->cl); +} + /** * bch_write - handle a write to a cache device or flash only volume * @@ -1144,22 +1192,22 @@ void bch2_write(struct closure *cl) struct bch_write_op *op = container_of(cl, struct bch_write_op, cl); struct bio *bio = &op->wbio.bio; struct bch_fs *c = op->c; + unsigned data_len; BUG_ON(!op->nr_replicas); BUG_ON(!op->write_point.v); BUG_ON(!bkey_cmp(op->pos, POS_MAX)); + op->start_time = local_clock(); + bch2_keylist_init(&op->insert_keys, op->inline_keys); + wbio_init(bio)->put_bio = false; + if (bio_sectors(bio) & (c->opts.block_size - 1)) { __bcache_io_error(c, "misaligned write"); op->error = -EIO; goto err; } - op->start_time = local_clock(); - - bch2_keylist_init(&op->insert_keys, op->inline_keys); - wbio_init(bio)->put_bio = false; - if (c->opts.nochanges || !percpu_ref_tryget(&c->writes)) { __bcache_io_error(c, "read only"); @@ -1169,12 +1217,25 @@ void bch2_write(struct closure *cl) bch2_increment_clock(c, bio_sectors(bio), WRITE); + data_len = min_t(u64, bio->bi_iter.bi_size, + op->new_i_size - (op->pos.offset << 9)); + + if (data_len <= min(block_bytes(c) / 2, 1024U)) { + bch2_write_data_inline(op, data_len); + return; + } + continue_at_nobarrier(cl, __bch2_write, NULL); return; err: if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION)) bch2_disk_reservation_put(c, &op->res); - closure_return(cl); + if (op->end_io) + op->end_io(op); + if (cl->parent) + closure_return(cl); + else + closure_debug_destroy(cl); } /* Cache promotion on read */ @@ -1456,13 +1517,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio { struct btree_trans trans; struct btree_iter *iter; - BKEY_PADDED(k) tmp; + struct bkey_on_stack sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, @@ -1474,11 +1536,12 @@ retry: if (bkey_err(k)) goto err; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); bch2_trans_unlock(&trans); - if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k), + if (!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, rbio->pos.offset - rbio->pick.crc.offset)) { @@ -1495,6 +1558,7 @@ retry: out: bch2_rbio_done(rbio); bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return; err: rbio->bio.bi_status = BLK_STS_IOERR; @@ -1507,12 +1571,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, { struct btree_trans trans; struct btree_iter *iter; + struct bkey_on_stack sk; struct bkey_s_c k; int ret; flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -1520,18 +1586,18 @@ retry: for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS, k, ret) { - BKEY_PADDED(k) tmp; unsigned bytes, sectors, offset_into_extent; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, &tmp.k); + &offset_into_extent, sk.k); if (ret) break; @@ -1570,6 +1636,7 @@ err: rbio->bio.bi_status = BLK_STS_IOERR; out: bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); bch2_rbio_done(rbio); } @@ -1626,7 +1693,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - BKEY_PADDED(k) new; + struct bkey_on_stack new; struct bch_extent_crc_unpacked new_crc; u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset; int ret; @@ -1634,6 +1701,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) if (rbio->pick.crc.compression_type) return; + bkey_on_stack_init(&new); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -1644,8 +1712,9 @@ retry: if (IS_ERR_OR_NULL(k.k)) goto out; - bkey_reassemble(&new.k, k); - k = bkey_i_to_s_c(&new.k); + bkey_on_stack_realloc(&new, c, k.k->u64s); + bkey_reassemble(new.k, k); + k = bkey_i_to_s_c(new.k); if (bversion_cmp(k.k->version, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) @@ -1664,10 +1733,10 @@ retry: goto out; } - if (!bch2_bkey_narrow_crcs(&new.k, new_crc)) + if (!bch2_bkey_narrow_crcs(new.k, new_crc)) goto out; - bch2_trans_update(&trans, iter, &new.k); + bch2_trans_update(&trans, iter, new.k); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| @@ -1676,6 +1745,7 @@ retry: goto retry; out: bch2_trans_exit(&trans); + bkey_on_stack_exit(&new, c); } /* Inner part that may run in process context */ @@ -1872,6 +1942,19 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, struct bpos pos = bkey_start_pos(k.k); int pick_ret; + if (k.k->type == KEY_TYPE_inline_data) { + struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k); + unsigned bytes = min_t(unsigned, iter.bi_size, + bkey_val_bytes(d.k)); + + swap(iter.bi_size, bytes); + memcpy_to_bio(&orig->bio, iter, d.v->data); + swap(iter.bi_size, bytes); + bio_advance_iter(&orig->bio, &iter, bytes); + zero_fill_bio_iter(&orig->bio, iter); + goto out_read_done; + } + pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); /* hole or reservation - just zero fill: */ @@ -2100,6 +2183,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) { struct btree_trans trans; struct btree_iter *iter; + struct bkey_on_stack sk; struct bkey_s_c k; unsigned flags = BCH_READ_RETRY_IF_STALE| BCH_READ_MAY_PROMOTE| @@ -2113,6 +2197,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) rbio->c = c; rbio->start_time = local_clock(); + bkey_on_stack_init(&sk); bch2_trans_init(&trans, c, 0, 0); retry: bch2_trans_begin(&trans); @@ -2121,7 +2206,6 @@ retry: POS(inode, rbio->bio.bi_iter.bi_sector), BTREE_ITER_SLOTS); while (1) { - BKEY_PADDED(k) tmp; unsigned bytes, sectors, offset_into_extent; bch2_btree_iter_set_pos(iter, @@ -2132,15 +2216,16 @@ retry: if (ret) goto err; - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); - offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); sectors = k.k->size - offset_into_extent; + bkey_on_stack_realloc(&sk, c, k.k->u64s); + bkey_reassemble(sk.k, k); + k = bkey_i_to_s_c(sk.k); + ret = bch2_read_indirect_extent(&trans, - &offset_into_extent, &tmp.k); + &offset_into_extent, sk.k); if (ret) goto err; @@ -2172,6 +2257,7 @@ retry: } out: bch2_trans_exit(&trans); + bkey_on_stack_exit(&sk, c); return; err: if (ret == -EINTR) |