summaryrefslogtreecommitdiff
path: root/libbcachefs/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/io.c')
-rw-r--r--libbcachefs/io.c168
1 files changed, 127 insertions, 41 deletions
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index e3ef662e..ca891b52 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -8,6 +8,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_on_stack.h"
#include "bset.h"
#include "btree_update.h"
#include "buckets.h"
@@ -18,7 +19,7 @@
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
-#include "extents.h"
+#include "extent_update.h"
#include "inode.h"
#include "io.h"
#include "journal.h"
@@ -191,8 +192,8 @@ static int sum_sector_overwrites(struct btree_trans *trans,
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
if (!may_allocate &&
- bch2_bkey_nr_ptrs_allocated(old) <
- bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(new))) {
+ bch2_bkey_nr_ptrs_fully_allocated(old) <
+ bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new))) {
ret = -ENOSPC;
break;
}
@@ -334,7 +335,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
/* create the biggest key we can */
bch2_key_resize(&delete.k, max_sectors);
- bch2_cut_back(end, &delete.k);
+ bch2_cut_back(end, &delete);
bch2_trans_begin_updates(trans);
@@ -384,12 +385,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
int bch2_write_index_default(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
+ struct bkey_on_stack sk;
struct keylist *keys = &op->insert_keys;
struct bkey_i *k = bch2_keylist_front(keys);
struct btree_trans trans;
struct btree_iter *iter;
int ret;
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -397,13 +400,15 @@ int bch2_write_index_default(struct bch_write_op *op)
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
do {
- BKEY_PADDED(k) tmp;
+ k = bch2_keylist_front(keys);
- bkey_copy(&tmp.k, bch2_keylist_front(keys));
+ bkey_on_stack_realloc(&sk, c, k->k.u64s);
+ bkey_copy(sk.k, k);
+ bch2_cut_front(iter->pos, sk.k);
bch2_trans_begin_updates(&trans);
- ret = bch2_extent_update(&trans, iter, &tmp.k,
+ ret = bch2_extent_update(&trans, iter, sk.k,
&op->res, op_journal_seq(op),
op->new_i_size, &op->i_sectors_delta);
if (ret == -EINTR)
@@ -411,13 +416,12 @@ int bch2_write_index_default(struct bch_write_op *op)
if (ret)
break;
- if (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) < 0)
- bch2_cut_front(iter->pos, bch2_keylist_front(keys));
- else
+ if (bkey_cmp(iter->pos, k->k.p) >= 0)
bch2_keylist_pop_front(keys);
} while (!bch2_keylist_empty(keys));
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&sk, c);
return ret;
}
@@ -519,16 +523,19 @@ static void __bch2_write_index(struct bch_write_op *op)
for (src = keys->keys; src != keys->top; src = n) {
n = bkey_next(src);
- bkey_copy(dst, src);
- bch2_bkey_drop_ptrs(bkey_i_to_s(dst), ptr,
- test_bit(ptr->dev, op->failed.d));
+ if (bkey_extent_is_direct_data(&src->k)) {
+ bch2_bkey_drop_ptrs(bkey_i_to_s(src), ptr,
+ test_bit(ptr->dev, op->failed.d));
- if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(dst))) {
- ret = -EIO;
- goto err;
+ if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(src))) {
+ ret = -EIO;
+ goto err;
+ }
}
+ if (dst != src)
+ memmove_u64s_down(dst, src, src->u64s);
dst = bkey_next(dst);
}
@@ -1086,7 +1093,7 @@ again:
bio->bi_end_io = bch2_write_endio;
bio->bi_private = &op->cl;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf |= REQ_OP_WRITE;
if (!skip_put)
closure_get(bio->bi_private);
@@ -1123,6 +1130,47 @@ flush_io:
goto again;
}
+static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
+{
+ struct closure *cl = &op->cl;
+ struct bio *bio = &op->wbio.bio;
+ struct bvec_iter iter;
+ struct bkey_i_inline_data *id;
+ unsigned sectors;
+ int ret;
+
+ ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
+ ARRAY_SIZE(op->inline_keys),
+ BKEY_U64s + DIV_ROUND_UP(data_len, 8));
+ if (ret) {
+ op->error = ret;
+ goto err;
+ }
+
+ sectors = bio_sectors(bio);
+ op->pos.offset += sectors;
+
+ id = bkey_inline_data_init(op->insert_keys.top);
+ id->k.p = op->pos;
+ id->k.version = op->version;
+ id->k.size = sectors;
+
+ iter = bio->bi_iter;
+ iter.bi_size = data_len;
+ memcpy_from_bio(id->v.data, bio, iter);
+
+ while (data_len & 7)
+ id->v.data[data_len++] = '\0';
+ set_bkey_val_bytes(&id->k, data_len);
+ bch2_keylist_push(&op->insert_keys);
+
+ op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
+ continue_at_nobarrier(cl, bch2_write_index, NULL);
+ return;
+err:
+ bch2_write_done(&op->cl);
+}
+
/**
* bch_write - handle a write to a cache device or flash only volume
*
@@ -1144,22 +1192,22 @@ void bch2_write(struct closure *cl)
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bio *bio = &op->wbio.bio;
struct bch_fs *c = op->c;
+ unsigned data_len;
BUG_ON(!op->nr_replicas);
BUG_ON(!op->write_point.v);
BUG_ON(!bkey_cmp(op->pos, POS_MAX));
+ op->start_time = local_clock();
+ bch2_keylist_init(&op->insert_keys, op->inline_keys);
+ wbio_init(bio)->put_bio = false;
+
if (bio_sectors(bio) & (c->opts.block_size - 1)) {
__bcache_io_error(c, "misaligned write");
op->error = -EIO;
goto err;
}
- op->start_time = local_clock();
-
- bch2_keylist_init(&op->insert_keys, op->inline_keys);
- wbio_init(bio)->put_bio = false;
-
if (c->opts.nochanges ||
!percpu_ref_tryget(&c->writes)) {
__bcache_io_error(c, "read only");
@@ -1169,12 +1217,25 @@ void bch2_write(struct closure *cl)
bch2_increment_clock(c, bio_sectors(bio), WRITE);
+ data_len = min_t(u64, bio->bi_iter.bi_size,
+ op->new_i_size - (op->pos.offset << 9));
+
+ if (data_len <= min(block_bytes(c) / 2, 1024U)) {
+ bch2_write_data_inline(op, data_len);
+ return;
+ }
+
continue_at_nobarrier(cl, __bch2_write, NULL);
return;
err:
if (!(op->flags & BCH_WRITE_NOPUT_RESERVATION))
bch2_disk_reservation_put(c, &op->res);
- closure_return(cl);
+ if (op->end_io)
+ op->end_io(op);
+ if (cl->parent)
+ closure_return(cl);
+ else
+ closure_debug_destroy(cl);
}
/* Cache promotion on read */
@@ -1456,13 +1517,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
{
struct btree_trans trans;
struct btree_iter *iter;
- BKEY_PADDED(k) tmp;
+ struct bkey_on_stack sk;
struct bkey_s_c k;
int ret;
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -1474,11 +1536,12 @@ retry:
if (bkey_err(k))
goto err;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
+ k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
- if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k),
+ if (!bch2_bkey_matches_ptr(c, k,
rbio->pick.ptr,
rbio->pos.offset -
rbio->pick.crc.offset)) {
@@ -1495,6 +1558,7 @@ retry:
out:
bch2_rbio_done(rbio);
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&sk, c);
return;
err:
rbio->bio.bi_status = BLK_STS_IOERR;
@@ -1507,12 +1571,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
{
struct btree_trans trans;
struct btree_iter *iter;
+ struct bkey_on_stack sk;
struct bkey_s_c k;
int ret;
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@@ -1520,18 +1586,18 @@ retry:
for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
POS(inode, bvec_iter.bi_sector),
BTREE_ITER_SLOTS, k, ret) {
- BKEY_PADDED(k) tmp;
unsigned bytes, sectors, offset_into_extent;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
+ k = bkey_i_to_s_c(sk.k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
ret = bch2_read_indirect_extent(&trans,
- &offset_into_extent, &tmp.k);
+ &offset_into_extent, sk.k);
if (ret)
break;
@@ -1570,6 +1636,7 @@ err:
rbio->bio.bi_status = BLK_STS_IOERR;
out:
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&sk, c);
bch2_rbio_done(rbio);
}
@@ -1626,7 +1693,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
- BKEY_PADDED(k) new;
+ struct bkey_on_stack new;
struct bch_extent_crc_unpacked new_crc;
u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
int ret;
@@ -1634,6 +1701,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
if (rbio->pick.crc.compression_type)
return;
+ bkey_on_stack_init(&new);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@@ -1644,8 +1712,9 @@ retry:
if (IS_ERR_OR_NULL(k.k))
goto out;
- bkey_reassemble(&new.k, k);
- k = bkey_i_to_s_c(&new.k);
+ bkey_on_stack_realloc(&new, c, k.k->u64s);
+ bkey_reassemble(new.k, k);
+ k = bkey_i_to_s_c(new.k);
if (bversion_cmp(k.k->version, rbio->version) ||
!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
@@ -1664,10 +1733,10 @@ retry:
goto out;
}
- if (!bch2_bkey_narrow_crcs(&new.k, new_crc))
+ if (!bch2_bkey_narrow_crcs(new.k, new_crc))
goto out;
- bch2_trans_update(&trans, iter, &new.k);
+ bch2_trans_update(&trans, iter, new.k);
ret = bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
@@ -1676,6 +1745,7 @@ retry:
goto retry;
out:
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&new, c);
}
/* Inner part that may run in process context */
@@ -1872,6 +1942,19 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bpos pos = bkey_start_pos(k.k);
int pick_ret;
+ if (k.k->type == KEY_TYPE_inline_data) {
+ struct bkey_s_c_inline_data d = bkey_s_c_to_inline_data(k);
+ unsigned bytes = min_t(unsigned, iter.bi_size,
+ bkey_val_bytes(d.k));
+
+ swap(iter.bi_size, bytes);
+ memcpy_to_bio(&orig->bio, iter, d.v->data);
+ swap(iter.bi_size, bytes);
+ bio_advance_iter(&orig->bio, &iter, bytes);
+ zero_fill_bio_iter(&orig->bio, iter);
+ goto out_read_done;
+ }
+
pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
@@ -2100,6 +2183,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
{
struct btree_trans trans;
struct btree_iter *iter;
+ struct bkey_on_stack sk;
struct bkey_s_c k;
unsigned flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE|
@@ -2113,6 +2197,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
rbio->c = c;
rbio->start_time = local_clock();
+ bkey_on_stack_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@@ -2121,7 +2206,6 @@ retry:
POS(inode, rbio->bio.bi_iter.bi_sector),
BTREE_ITER_SLOTS);
while (1) {
- BKEY_PADDED(k) tmp;
unsigned bytes, sectors, offset_into_extent;
bch2_btree_iter_set_pos(iter,
@@ -2132,15 +2216,16 @@ retry:
if (ret)
goto err;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
-
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
+ bkey_on_stack_realloc(&sk, c, k.k->u64s);
+ bkey_reassemble(sk.k, k);
+ k = bkey_i_to_s_c(sk.k);
+
ret = bch2_read_indirect_extent(&trans,
- &offset_into_extent, &tmp.k);
+ &offset_into_extent, sk.k);
if (ret)
goto err;
@@ -2172,6 +2257,7 @@ retry:
}
out:
bch2_trans_exit(&trans);
+ bkey_on_stack_exit(&sk, c);
return;
err:
if (ret == -EINTR)