diff options
Diffstat (limited to 'libbcachefs/io.c')
-rw-r--r-- | libbcachefs/io.c | 213 |
1 files changed, 145 insertions, 68 deletions
diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 4d81b6e6..c5d9a0c5 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -259,6 +259,8 @@ int bch2_write_index_default(struct bch_write_op *op) bch2_verify_keylist_sorted(keys); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256); +retry: + bch2_trans_begin(&trans); iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, bkey_start_pos(&bch2_keylist_front(keys)->k), @@ -269,7 +271,9 @@ int bch2_write_index_default(struct bch_write_op *op) bkey_copy(&split.k, bch2_keylist_front(keys)); - bch2_extent_trim_atomic(&split.k, iter); + ret = bch2_extent_trim_atomic(&split.k, iter); + if (ret) + break; bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &split.k)); @@ -286,6 +290,11 @@ int bch2_write_index_default(struct bch_write_op *op) bch2_keylist_pop_front(keys); } while (!bch2_keylist_empty(keys)); + if (ret == -EINTR) { + ret = 0; + goto retry; + } + bch2_trans_exit(&trans); return ret; @@ -426,7 +435,7 @@ static void init_append_extent(struct bch_write_op *op, p.ptr.cached = !ca->mi.durability || (op->flags & BCH_WRITE_CACHED) != 0; p.ptr.offset += ca->mi.bucket_size - ob->sectors_free; - bch2_extent_ptr_decoded_append(e, &p); + bch2_extent_ptr_decoded_append(&e->k_i, &p); BUG_ON(crc.compressed_size > ob->sectors_free); ob->sectors_free -= crc.compressed_size; @@ -954,17 +963,13 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts opts, unsigned flags) { - if (!bkey_extent_is_data(k.k)) - return false; - if (!(flags & BCH_READ_MAY_PROMOTE)) return false; if (!opts.promote_target) return false; - if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), - opts.promote_target)) + if (bch2_bkey_has_target(c, k, opts.promote_target)) return false; if (bch2_target_congested(c, opts.promote_target)) { @@ -1028,6 +1033,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) noinline static struct promote_op *__promote_alloc(struct bch_fs *c, + enum btree_id btree_id, struct bpos pos, struct extent_ptr_decoded *pick, struct bch_io_opts opts, @@ -1084,6 +1090,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, (struct data_opts) { .target = opts.promote_target }, + btree_id, bkey_s_c_null); BUG_ON(ret); @@ -1121,7 +1128,11 @@ static inline struct promote_op *promote_alloc(struct bch_fs *c, if (!should_promote(c, k, pos, opts, flags)) return NULL; - promote = __promote_alloc(c, pos, pick, opts, sectors, rbio); + promote = __promote_alloc(c, + k.k->type == KEY_TYPE_reflink_v + ? BTREE_ID_REFLINK + : BTREE_ID_EXTENTS, + pos, pick, opts, sectors, rbio); if (!promote) return NULL; @@ -1222,17 +1233,16 @@ retry: k = bkey_i_to_s_c(&tmp.k); bch2_trans_unlock(&trans); - if (!bkey_extent_is_data(k.k) || - !bch2_extent_matches_ptr(c, bkey_i_to_s_c_extent(&tmp.k), - rbio->pick.ptr, - rbio->pos.offset - - rbio->pick.crc.offset)) { + if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k), + rbio->pick.ptr, + rbio->pos.offset - + rbio->pick.crc.offset)) { /* extent we wanted to read no longer exists: */ rbio->hole = true; goto out; } - ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags); + ret = __bch2_read_extent(c, rbio, bvec_iter, k, 0, failed, flags); if (ret == READ_RETRY) goto retry; if (ret) @@ -1255,26 +1265,40 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio, struct bkey_s_c k; int ret; - bch2_trans_init(&trans, c, 0, 0); - flags &= ~BCH_READ_LAST_FRAGMENT; flags |= BCH_READ_MUST_CLONE; + + bch2_trans_init(&trans, c, 0, 0); retry: + bch2_trans_begin(&trans); + for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, POS(inode, bvec_iter.bi_sector), BTREE_ITER_SLOTS, k, ret) { BKEY_PADDED(k) tmp; - unsigned bytes; + unsigned bytes, sectors, offset_into_extent; bkey_reassemble(&tmp.k, k); k = bkey_i_to_s_c(&tmp.k); + + offset_into_extent = iter->pos.offset - + bkey_start_offset(k.k); + sectors = k.k->size - offset_into_extent; + + ret = bch2_read_indirect_extent(&trans, iter, + &offset_into_extent, &tmp.k); + if (ret) + break; + + sectors = min(sectors, k.k->size - offset_into_extent); + bch2_trans_unlock(&trans); - bytes = min_t(unsigned, bvec_iter.bi_size, - (k.k->p.offset - bvec_iter.bi_sector) << 9); + bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; swap(bvec_iter.bi_size, bytes); - ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags); + ret = __bch2_read_extent(c, rbio, bvec_iter, k, + offset_into_extent, failed, flags); switch (ret) { case READ_RETRY: goto retry; @@ -1355,7 +1379,6 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) struct btree_trans trans; struct btree_iter *iter; struct bkey_s_c k; - struct bkey_i_extent *e; BKEY_PADDED(k) new; struct bch_extent_crc_unpacked new_crc; u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset; @@ -1374,34 +1397,30 @@ retry: if (IS_ERR_OR_NULL(k.k)) goto out; - if (!bkey_extent_is_data(k.k)) - goto out; - bkey_reassemble(&new.k, k); - e = bkey_i_to_extent(&new.k); + k = bkey_i_to_s_c(&new.k); - if (!bch2_extent_matches_ptr(c, extent_i_to_s_c(e), - rbio->pick.ptr, data_offset) || - bversion_cmp(e->k.version, rbio->version)) + if (bversion_cmp(k.k->version, rbio->version) || + !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) goto out; /* Extent was merged? */ - if (bkey_start_offset(&e->k) < data_offset || - e->k.p.offset > data_offset + rbio->pick.crc.uncompressed_size) + if (bkey_start_offset(k.k) < data_offset || + k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) goto out; if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, rbio->pick.crc, NULL, &new_crc, - bkey_start_offset(&e->k) - data_offset, e->k.size, + bkey_start_offset(k.k) - data_offset, k.k->size, rbio->pick.crc.csum_type)) { bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); goto out; } - if (!bch2_extent_narrow_crcs(e, new_crc)) + if (!bch2_bkey_narrow_crcs(&new.k, new_crc)) goto out; - bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &e->k_i)); + bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &new.k)); ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| @@ -1412,15 +1431,6 @@ out: bch2_trans_exit(&trans); } -static bool should_narrow_crcs(struct bkey_s_c k, - struct extent_ptr_decoded *pick, - unsigned flags) -{ - return !(flags & BCH_READ_IN_RETRY) && - bkey_extent_is_data(k.k) && - bch2_can_narrow_extent_crcs(bkey_s_c_to_extent(k), pick->crc); -} - /* Inner part that may run in process context */ static void __bch2_read_endio(struct work_struct *work) { @@ -1455,7 +1465,7 @@ static void __bch2_read_endio(struct work_struct *work) goto nodecode; /* Adjust crc to point to subset of data we want: */ - crc.offset += rbio->bvec_iter.bi_sector - rbio->pos.offset; + crc.offset += rbio->offset_into_extent; crc.live_size = bvec_iter_sectors(rbio->bvec_iter); if (crc.compression_type != BCH_COMPRESSION_NONE) { @@ -1564,8 +1574,51 @@ static void bch2_read_endio(struct bio *bio) bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); } +int bch2_read_indirect_extent(struct btree_trans *trans, + struct btree_iter *extent_iter, + unsigned *offset_into_extent, + struct bkey_i *orig_k) +{ + struct btree_iter *iter; + struct bkey_s_c k; + u64 reflink_offset; + int ret; + + if (orig_k->k.type != KEY_TYPE_reflink_p) + return 0; + + reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) + + *offset_into_extent; + + iter = __bch2_trans_get_iter(trans, BTREE_ID_REFLINK, + POS(0, reflink_offset), + BTREE_ITER_SLOTS, 1); + ret = PTR_ERR_OR_ZERO(iter); + if (ret) + return ret; + + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type != KEY_TYPE_reflink_v) { + __bcache_io_error(trans->c, + "pointer to nonexistent indirect extent"); + ret = -EIO; + goto err; + } + + *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k); + bkey_reassemble(orig_k, k); +err: + bch2_trans_iter_put(trans, iter); + return ret; +} + int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, struct bvec_iter iter, struct bkey_s_c k, + unsigned offset_into_extent, struct bch_io_failures *failed, unsigned flags) { struct extent_ptr_decoded pick; @@ -1598,7 +1651,6 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) goto hole; - iter.bi_sector = pos.offset; iter.bi_size = pick.crc.compressed_size << 9; goto noclone; } @@ -1607,13 +1659,13 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, bio_flagged(&orig->bio, BIO_CHAIN)) flags |= BCH_READ_MUST_CLONE; - narrow_crcs = should_narrow_crcs(k, &pick, flags); + narrow_crcs = !(flags & BCH_READ_IN_RETRY) && + bch2_can_narrow_extent_crcs(k, pick.crc); if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) flags |= BCH_READ_MUST_BOUNCE; - EBUG_ON(bkey_start_offset(k.k) > iter.bi_sector || - k.k->p.offset < bvec_iter_end_sector(iter)); + BUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); if (pick.crc.compression_type != BCH_COMPRESSION_NONE || (pick.crc.csum_type != BCH_CSUM_NONE && @@ -1634,15 +1686,17 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || bvec_iter_sectors(iter) != pick.crc.live_size || pick.crc.offset || - iter.bi_sector != pos.offset)); + offset_into_extent)); + pos.offset += offset_into_extent; pick.ptr.offset += pick.crc.offset + - (iter.bi_sector - pos.offset); + offset_into_extent; + offset_into_extent = 0; pick.crc.compressed_size = bvec_iter_sectors(iter); pick.crc.uncompressed_size = bvec_iter_sectors(iter); pick.crc.offset = 0; pick.crc.live_size = bvec_iter_sectors(iter); - pos.offset = iter.bi_sector; + offset_into_extent = 0; } if (rbio) { @@ -1697,6 +1751,7 @@ noclone: else rbio->end_io = orig->bio.bi_end_io; rbio->bvec_iter = iter; + rbio->offset_into_extent= offset_into_extent; rbio->flags = flags; rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ); rbio->narrow_crcs = narrow_crcs; @@ -1815,45 +1870,67 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode) rbio->c = c; rbio->start_time = local_clock(); - for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, - POS(inode, rbio->bio.bi_iter.bi_sector), - BTREE_ITER_SLOTS, k, ret) { + iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, + POS(inode, rbio->bio.bi_iter.bi_sector), + BTREE_ITER_SLOTS); + + while (1) { BKEY_PADDED(k) tmp; - unsigned bytes; + unsigned bytes, sectors, offset_into_extent; + + bch2_btree_iter_set_pos(iter, + POS(inode, rbio->bio.bi_iter.bi_sector)); + + k = bch2_btree_iter_peek_slot(iter); + ret = bkey_err(k); + if (ret) + goto err; + + bkey_reassemble(&tmp.k, k); + k = bkey_i_to_s_c(&tmp.k); + + offset_into_extent = iter->pos.offset - + bkey_start_offset(k.k); + sectors = k.k->size - offset_into_extent; + + ret = bch2_read_indirect_extent(&trans, iter, + &offset_into_extent, &tmp.k); + if (ret) + goto err; + + /* + * With indirect extents, the amount of data to read is the min + * of the original extent and the indirect extent: + */ + sectors = min(sectors, k.k->size - offset_into_extent); /* * Unlock the iterator while the btree node's lock is still in * cache, before doing the IO: */ - bkey_reassemble(&tmp.k, k); - k = bkey_i_to_s_c(&tmp.k); bch2_trans_unlock(&trans); - bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size, - (k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9); + bytes = min(sectors, bio_sectors(&rbio->bio)) << 9; swap(rbio->bio.bi_iter.bi_size, bytes); if (rbio->bio.bi_iter.bi_size == bytes) flags |= BCH_READ_LAST_FRAGMENT; - bch2_read_extent(c, rbio, k, flags); + bch2_read_extent(c, rbio, k, offset_into_extent, flags); if (flags & BCH_READ_LAST_FRAGMENT) - return; + break; swap(rbio->bio.bi_iter.bi_size, bytes); bio_advance(&rbio->bio, bytes); } - - /* - * If we get here, it better have been because there was an error - * reading a btree node - */ - BUG_ON(!ret); - bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret); - +out: bch2_trans_exit(&trans); + return; +err: + bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret); bch2_rbio_done(rbio); + goto out; } void bch2_fs_io_exit(struct bch_fs *c) |