summaryrefslogtreecommitdiff
path: root/libbcachefs/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/io.c')
-rw-r--r--libbcachefs/io.c213
1 files changed, 145 insertions, 68 deletions
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index 4d81b6e6..c5d9a0c5 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -259,6 +259,8 @@ int bch2_write_index_default(struct bch_write_op *op)
bch2_verify_keylist_sorted(keys);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
+retry:
+ bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
bkey_start_pos(&bch2_keylist_front(keys)->k),
@@ -269,7 +271,9 @@ int bch2_write_index_default(struct bch_write_op *op)
bkey_copy(&split.k, bch2_keylist_front(keys));
- bch2_extent_trim_atomic(&split.k, iter);
+ ret = bch2_extent_trim_atomic(&split.k, iter);
+ if (ret)
+ break;
bch2_trans_update(&trans,
BTREE_INSERT_ENTRY(iter, &split.k));
@@ -286,6 +290,11 @@ int bch2_write_index_default(struct bch_write_op *op)
bch2_keylist_pop_front(keys);
} while (!bch2_keylist_empty(keys));
+ if (ret == -EINTR) {
+ ret = 0;
+ goto retry;
+ }
+
bch2_trans_exit(&trans);
return ret;
@@ -426,7 +435,7 @@ static void init_append_extent(struct bch_write_op *op,
p.ptr.cached = !ca->mi.durability ||
(op->flags & BCH_WRITE_CACHED) != 0;
p.ptr.offset += ca->mi.bucket_size - ob->sectors_free;
- bch2_extent_ptr_decoded_append(e, &p);
+ bch2_extent_ptr_decoded_append(&e->k_i, &p);
BUG_ON(crc.compressed_size > ob->sectors_free);
ob->sectors_free -= crc.compressed_size;
@@ -954,17 +963,13 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
struct bch_io_opts opts,
unsigned flags)
{
- if (!bkey_extent_is_data(k.k))
- return false;
-
if (!(flags & BCH_READ_MAY_PROMOTE))
return false;
if (!opts.promote_target)
return false;
- if (bch2_extent_has_target(c, bkey_s_c_to_extent(k),
- opts.promote_target))
+ if (bch2_bkey_has_target(c, k, opts.promote_target))
return false;
if (bch2_target_congested(c, opts.promote_target)) {
@@ -1028,6 +1033,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
noinline
static struct promote_op *__promote_alloc(struct bch_fs *c,
+ enum btree_id btree_id,
struct bpos pos,
struct extent_ptr_decoded *pick,
struct bch_io_opts opts,
@@ -1084,6 +1090,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
(struct data_opts) {
.target = opts.promote_target
},
+ btree_id,
bkey_s_c_null);
BUG_ON(ret);
@@ -1121,7 +1128,11 @@ static inline struct promote_op *promote_alloc(struct bch_fs *c,
if (!should_promote(c, k, pos, opts, flags))
return NULL;
- promote = __promote_alloc(c, pos, pick, opts, sectors, rbio);
+ promote = __promote_alloc(c,
+ k.k->type == KEY_TYPE_reflink_v
+ ? BTREE_ID_REFLINK
+ : BTREE_ID_EXTENTS,
+ pos, pick, opts, sectors, rbio);
if (!promote)
return NULL;
@@ -1222,17 +1233,16 @@ retry:
k = bkey_i_to_s_c(&tmp.k);
bch2_trans_unlock(&trans);
- if (!bkey_extent_is_data(k.k) ||
- !bch2_extent_matches_ptr(c, bkey_i_to_s_c_extent(&tmp.k),
- rbio->pick.ptr,
- rbio->pos.offset -
- rbio->pick.crc.offset)) {
+ if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k),
+ rbio->pick.ptr,
+ rbio->pos.offset -
+ rbio->pick.crc.offset)) {
/* extent we wanted to read no longer exists: */
rbio->hole = true;
goto out;
}
- ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
+ ret = __bch2_read_extent(c, rbio, bvec_iter, k, 0, failed, flags);
if (ret == READ_RETRY)
goto retry;
if (ret)
@@ -1255,26 +1265,40 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
struct bkey_s_c k;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
+
+ bch2_trans_init(&trans, c, 0, 0);
retry:
+ bch2_trans_begin(&trans);
+
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode, bvec_iter.bi_sector),
BTREE_ITER_SLOTS, k, ret) {
BKEY_PADDED(k) tmp;
- unsigned bytes;
+ unsigned bytes, sectors, offset_into_extent;
bkey_reassemble(&tmp.k, k);
k = bkey_i_to_s_c(&tmp.k);
+
+ offset_into_extent = iter->pos.offset -
+ bkey_start_offset(k.k);
+ sectors = k.k->size - offset_into_extent;
+
+ ret = bch2_read_indirect_extent(&trans, iter,
+ &offset_into_extent, &tmp.k);
+ if (ret)
+ break;
+
+ sectors = min(sectors, k.k->size - offset_into_extent);
+
bch2_trans_unlock(&trans);
- bytes = min_t(unsigned, bvec_iter.bi_size,
- (k.k->p.offset - bvec_iter.bi_sector) << 9);
+ bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
swap(bvec_iter.bi_size, bytes);
- ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
+ ret = __bch2_read_extent(c, rbio, bvec_iter, k,
+ offset_into_extent, failed, flags);
switch (ret) {
case READ_RETRY:
goto retry;
@@ -1355,7 +1379,6 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
- struct bkey_i_extent *e;
BKEY_PADDED(k) new;
struct bch_extent_crc_unpacked new_crc;
u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
@@ -1374,34 +1397,30 @@ retry:
if (IS_ERR_OR_NULL(k.k))
goto out;
- if (!bkey_extent_is_data(k.k))
- goto out;
-
bkey_reassemble(&new.k, k);
- e = bkey_i_to_extent(&new.k);
+ k = bkey_i_to_s_c(&new.k);
- if (!bch2_extent_matches_ptr(c, extent_i_to_s_c(e),
- rbio->pick.ptr, data_offset) ||
- bversion_cmp(e->k.version, rbio->version))
+ if (bversion_cmp(k.k->version, rbio->version) ||
+ !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
goto out;
/* Extent was merged? */
- if (bkey_start_offset(&e->k) < data_offset ||
- e->k.p.offset > data_offset + rbio->pick.crc.uncompressed_size)
+ if (bkey_start_offset(k.k) < data_offset ||
+ k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size)
goto out;
if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version,
rbio->pick.crc, NULL, &new_crc,
- bkey_start_offset(&e->k) - data_offset, e->k.size,
+ bkey_start_offset(k.k) - data_offset, k.k->size,
rbio->pick.crc.csum_type)) {
bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)");
goto out;
}
- if (!bch2_extent_narrow_crcs(e, new_crc))
+ if (!bch2_bkey_narrow_crcs(&new.k, new_crc))
goto out;
- bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &e->k_i));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &new.k));
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
@@ -1412,15 +1431,6 @@ out:
bch2_trans_exit(&trans);
}
-static bool should_narrow_crcs(struct bkey_s_c k,
- struct extent_ptr_decoded *pick,
- unsigned flags)
-{
- return !(flags & BCH_READ_IN_RETRY) &&
- bkey_extent_is_data(k.k) &&
- bch2_can_narrow_extent_crcs(bkey_s_c_to_extent(k), pick->crc);
-}
-
/* Inner part that may run in process context */
static void __bch2_read_endio(struct work_struct *work)
{
@@ -1455,7 +1465,7 @@ static void __bch2_read_endio(struct work_struct *work)
goto nodecode;
/* Adjust crc to point to subset of data we want: */
- crc.offset += rbio->bvec_iter.bi_sector - rbio->pos.offset;
+ crc.offset += rbio->offset_into_extent;
crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
if (crc.compression_type != BCH_COMPRESSION_NONE) {
@@ -1564,8 +1574,51 @@ static void bch2_read_endio(struct bio *bio)
bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
}
+int bch2_read_indirect_extent(struct btree_trans *trans,
+ struct btree_iter *extent_iter,
+ unsigned *offset_into_extent,
+ struct bkey_i *orig_k)
+{
+ struct btree_iter *iter;
+ struct bkey_s_c k;
+ u64 reflink_offset;
+ int ret;
+
+ if (orig_k->k.type != KEY_TYPE_reflink_p)
+ return 0;
+
+ reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) +
+ *offset_into_extent;
+
+ iter = __bch2_trans_get_iter(trans, BTREE_ID_REFLINK,
+ POS(0, reflink_offset),
+ BTREE_ITER_SLOTS, 1);
+ ret = PTR_ERR_OR_ZERO(iter);
+ if (ret)
+ return ret;
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ if (k.k->type != KEY_TYPE_reflink_v) {
+ __bcache_io_error(trans->c,
+ "pointer to nonexistent indirect extent");
+ ret = -EIO;
+ goto err;
+ }
+
+ *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
+ bkey_reassemble(orig_k, k);
+err:
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bvec_iter iter, struct bkey_s_c k,
+ unsigned offset_into_extent,
struct bch_io_failures *failed, unsigned flags)
{
struct extent_ptr_decoded pick;
@@ -1598,7 +1651,6 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS)
goto hole;
- iter.bi_sector = pos.offset;
iter.bi_size = pick.crc.compressed_size << 9;
goto noclone;
}
@@ -1607,13 +1659,13 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
bio_flagged(&orig->bio, BIO_CHAIN))
flags |= BCH_READ_MUST_CLONE;
- narrow_crcs = should_narrow_crcs(k, &pick, flags);
+ narrow_crcs = !(flags & BCH_READ_IN_RETRY) &&
+ bch2_can_narrow_extent_crcs(k, pick.crc);
if (narrow_crcs && (flags & BCH_READ_USER_MAPPED))
flags |= BCH_READ_MUST_BOUNCE;
- EBUG_ON(bkey_start_offset(k.k) > iter.bi_sector ||
- k.k->p.offset < bvec_iter_end_sector(iter));
+ BUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
if (pick.crc.compression_type != BCH_COMPRESSION_NONE ||
(pick.crc.csum_type != BCH_CSUM_NONE &&
@@ -1634,15 +1686,17 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
(bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
bvec_iter_sectors(iter) != pick.crc.live_size ||
pick.crc.offset ||
- iter.bi_sector != pos.offset));
+ offset_into_extent));
+ pos.offset += offset_into_extent;
pick.ptr.offset += pick.crc.offset +
- (iter.bi_sector - pos.offset);
+ offset_into_extent;
+ offset_into_extent = 0;
pick.crc.compressed_size = bvec_iter_sectors(iter);
pick.crc.uncompressed_size = bvec_iter_sectors(iter);
pick.crc.offset = 0;
pick.crc.live_size = bvec_iter_sectors(iter);
- pos.offset = iter.bi_sector;
+ offset_into_extent = 0;
}
if (rbio) {
@@ -1697,6 +1751,7 @@ noclone:
else
rbio->end_io = orig->bio.bi_end_io;
rbio->bvec_iter = iter;
+ rbio->offset_into_extent= offset_into_extent;
rbio->flags = flags;
rbio->have_ioref = pick_ret > 0 && bch2_dev_get_ioref(ca, READ);
rbio->narrow_crcs = narrow_crcs;
@@ -1815,45 +1870,67 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
rbio->c = c;
rbio->start_time = local_clock();
- for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
- POS(inode, rbio->bio.bi_iter.bi_sector),
- BTREE_ITER_SLOTS, k, ret) {
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+ POS(inode, rbio->bio.bi_iter.bi_sector),
+ BTREE_ITER_SLOTS);
+
+ while (1) {
BKEY_PADDED(k) tmp;
- unsigned bytes;
+ unsigned bytes, sectors, offset_into_extent;
+
+ bch2_btree_iter_set_pos(iter,
+ POS(inode, rbio->bio.bi_iter.bi_sector));
+
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ bkey_reassemble(&tmp.k, k);
+ k = bkey_i_to_s_c(&tmp.k);
+
+ offset_into_extent = iter->pos.offset -
+ bkey_start_offset(k.k);
+ sectors = k.k->size - offset_into_extent;
+
+ ret = bch2_read_indirect_extent(&trans, iter,
+ &offset_into_extent, &tmp.k);
+ if (ret)
+ goto err;
+
+ /*
+ * With indirect extents, the amount of data to read is the min
+ * of the original extent and the indirect extent:
+ */
+ sectors = min(sectors, k.k->size - offset_into_extent);
/*
* Unlock the iterator while the btree node's lock is still in
* cache, before doing the IO:
*/
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
bch2_trans_unlock(&trans);
- bytes = min_t(unsigned, rbio->bio.bi_iter.bi_size,
- (k.k->p.offset - rbio->bio.bi_iter.bi_sector) << 9);
+ bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
swap(rbio->bio.bi_iter.bi_size, bytes);
if (rbio->bio.bi_iter.bi_size == bytes)
flags |= BCH_READ_LAST_FRAGMENT;
- bch2_read_extent(c, rbio, k, flags);
+ bch2_read_extent(c, rbio, k, offset_into_extent, flags);
if (flags & BCH_READ_LAST_FRAGMENT)
- return;
+ break;
swap(rbio->bio.bi_iter.bi_size, bytes);
bio_advance(&rbio->bio, bytes);
}
-
- /*
- * If we get here, it better have been because there was an error
- * reading a btree node
- */
- BUG_ON(!ret);
- bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
-
+out:
bch2_trans_exit(&trans);
+ return;
+err:
+ bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
bch2_rbio_done(rbio);
+ goto out;
}
void bch2_fs_io_exit(struct bch_fs *c)