summaryrefslogtreecommitdiff
path: root/libbcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs')
-rw-r--r--libbcachefs/data_update.c139
-rw-r--r--libbcachefs/data_update.h5
-rw-r--r--libbcachefs/fs-io-buffered.c128
-rw-r--r--libbcachefs/fs-io-buffered.h5
-rw-r--r--libbcachefs/fs.c7
-rw-r--r--libbcachefs/io_read.c1
-rw-r--r--libbcachefs/journal.c10
-rw-r--r--libbcachefs/migrate.c8
-rw-r--r--libbcachefs/move.c186
-rw-r--r--libbcachefs/move.h2
-rw-r--r--libbcachefs/super.c8
11 files changed, 172 insertions, 327 deletions
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index f23951a1..155c1ad4 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -522,6 +522,15 @@ void bch2_data_update_exit(struct data_update *update)
struct bch_fs *c = update->op.c;
struct bkey_s_c k = bkey_i_to_s_c(update->k.k);
+ if (update->b)
+ atomic_dec(&update->b->count);
+
+ if (update->ctxt) {
+ scoped_guard(mutex, &update->ctxt->lock)
+ list_del(&update->io_list);
+ wake_up(&update->ctxt->wait);
+ }
+
bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
kfree(update->bvecs);
update->bvecs = NULL;
@@ -866,8 +875,11 @@ int bch2_data_update_init(struct btree_trans *trans,
: BCH_DATA_UPDATE_rebalance;
m->btree_id = btree_id;
m->data_opts = data_opts;
+
m->ctxt = ctxt;
m->stats = ctxt ? ctxt->stats : NULL;
+ INIT_LIST_HEAD(&m->read_list);
+ INIT_LIST_HEAD(&m->io_list);
bch2_write_op_init(&m->op, c, *io_opts);
m->op.pos = bkey_start_pos(k.k);
@@ -927,74 +939,81 @@ int bch2_data_update_init(struct btree_trans *trans,
ptr_bit <<= 1;
}
- unsigned durability_required = max(0, (int) (io_opts->data_replicas - durability_have));
+ if (!data_opts.scrub) {
+ unsigned durability_required = max(0, (int) (io_opts->data_replicas - durability_have));
- /*
- * If current extent durability is less than io_opts.data_replicas,
- * we're not trying to rereplicate the extent up to data_replicas here -
- * unless extra_replicas was specified
- *
- * Increasing replication is an explicit operation triggered by
- * rereplicate, currently, so that users don't get an unexpected -ENOSPC
- */
- m->op.nr_replicas = min(durability_removing, durability_required) +
- m->data_opts.extra_replicas;
-
- /*
- * If device(s) were set to durability=0 after data was written to them
- * we can end up with a duribilty=0 extent, and the normal algorithm
- * that tries not to increase durability doesn't work:
- */
- if (!(durability_have + durability_removing))
- m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+ /*
+ * If current extent durability is less than io_opts.data_replicas,
+ * we're not trying to rereplicate the extent up to data_replicas here -
+ * unless extra_replicas was specified
+ *
+ * Increasing replication is an explicit operation triggered by
+ * rereplicate, currently, so that users don't get an unexpected -ENOSPC
+ */
+ m->op.nr_replicas = min(durability_removing, durability_required) +
+ m->data_opts.extra_replicas;
- m->op.nr_replicas_required = m->op.nr_replicas;
+ /*
+ * If device(s) were set to durability=0 after data was written to them
+ * we can end up with a duribilty=0 extent, and the normal algorithm
+ * that tries not to increase durability doesn't work:
+ */
+ if (!(durability_have + durability_removing))
+ m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
- /*
- * It might turn out that we don't need any new replicas, if the
- * replicas or durability settings have been changed since the extent
- * was written:
- */
- if (!m->op.nr_replicas) {
- m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
- m->data_opts.rewrite_ptrs = 0;
- /* if iter == NULL, it's just a promote */
- if (iter)
- ret = bch2_extent_drop_ptrs(trans, iter, k, io_opts, &m->data_opts);
- if (!ret)
- ret = bch_err_throw(c, data_update_done_no_writes_needed);
- goto out_bkey_buf_exit;
- }
+ m->op.nr_replicas_required = m->op.nr_replicas;
- /*
- * Check if the allocation will succeed, to avoid getting an error later
- * in bch2_write() -> bch2_alloc_sectors_start() and doing a useless
- * read:
- *
- * This guards against
- * - BCH_WRITE_alloc_nowait allocations failing (promotes)
- * - Destination target full
- * - Device(s) in destination target offline
- * - Insufficient durability available in destination target
- * (i.e. trying to move a durability=2 replica to a target with a
- * single durability=2 device)
- */
- ret = can_write_extent(c, m);
- if (ret)
- goto out_bkey_buf_exit;
+ /*
+ * It might turn out that we don't need any new replicas, if the
+ * replicas or durability settings have been changed since the extent
+ * was written:
+ */
+ if (!m->op.nr_replicas) {
+ m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
+ m->data_opts.rewrite_ptrs = 0;
+ /* if iter == NULL, it's just a promote */
+ if (iter)
+ ret = bch2_extent_drop_ptrs(trans, iter, k, io_opts, &m->data_opts);
+ if (!ret)
+ ret = bch_err_throw(c, data_update_done_no_writes_needed);
+ goto out;
+ }
- if (reserve_sectors) {
- ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
- m->data_opts.extra_replicas
- ? 0
- : BCH_DISK_RESERVATION_NOFAIL);
+ /*
+ * Check if the allocation will succeed, to avoid getting an error later
+ * in bch2_write() -> bch2_alloc_sectors_start() and doing a useless
+ * read:
+ *
+ * This guards against
+ * - BCH_WRITE_alloc_nowait allocations failing (promotes)
+ * - Destination target full
+ * - Device(s) in destination target offline
+ * - Insufficient durability available in destination target
+ * (i.e. trying to move a durability=2 replica to a target with a
+ * single durability=2 device)
+ */
+ ret = can_write_extent(c, m);
if (ret)
- goto out_bkey_buf_exit;
+ goto out;
+
+ if (reserve_sectors) {
+ ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
+ m->data_opts.extra_replicas
+ ? 0
+ : BCH_DISK_RESERVATION_NOFAIL);
+ if (ret)
+ goto out;
+ }
+ } else {
+ if (unwritten) {
+ ret = bch_err_throw(c, data_update_done_unwritten);
+ goto out;
+ }
}
if (!bkey_get_dev_refs(c, k)) {
ret = bch_err_throw(c, data_update_done_no_dev_refs);
- goto out_put_disk_res;
+ goto out;
}
if (c->opts.nocow_enabled &&
@@ -1021,10 +1040,8 @@ out_nocow_unlock:
bkey_nocow_unlock(c, k);
out_put_dev_refs:
bkey_put_dev_refs(c, k);
-out_put_disk_res:
+out:
bch2_disk_reservation_put(c, &m->op.res);
-out_bkey_buf_exit:
- bch2_bkey_buf_exit(&m->k, c);
return ret;
}
diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h
index 3b0ba6f6..0e93b518 100644
--- a/libbcachefs/data_update.h
+++ b/libbcachefs/data_update.h
@@ -43,6 +43,11 @@ struct data_update {
enum btree_id btree_id;
struct bkey_buf k;
struct data_update_opts data_opts;
+
+ /* associated with @ctxt */
+ struct list_head read_list;
+ struct list_head io_list;
+ struct move_bucket *b;
struct moving_context *ctxt;
struct bch_move_stats *stats;
diff --git a/libbcachefs/fs-io-buffered.c b/libbcachefs/fs-io-buffered.c
index fe684adc..bfa1307b 100644
--- a/libbcachefs/fs-io-buffered.c
+++ b/libbcachefs/fs-io-buffered.c
@@ -729,134 +729,6 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
/* buffered writes: */
-int bch2_write_begin(const struct kiocb *iocb, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct folio **foliop, void **fsdata)
-{
- struct bch_inode_info *inode = to_bch_ei(mapping->host);
- struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct bch2_folio_reservation *res;
- struct folio *folio;
- unsigned offset;
- int ret = -ENOMEM;
-
- res = kmalloc(sizeof(*res), GFP_KERNEL);
- if (!res)
- return -ENOMEM;
-
- bch2_folio_reservation_init(c, inode, res);
- *fsdata = res;
-
- bch2_pagecache_add_get(inode);
-
- folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT,
- FGP_WRITEBEGIN | fgf_set_order(len),
- mapping_gfp_mask(mapping));
- if (IS_ERR(folio))
- goto err_unlock;
-
- offset = pos - folio_pos(folio);
- len = min_t(size_t, len, folio_end_pos(folio) - pos);
-
- if (folio_test_uptodate(folio))
- goto out;
-
- /* If we're writing entire folio, don't need to read it in first: */
- if (!offset && len == folio_size(folio))
- goto out;
-
- if (!offset && pos + len >= inode->v.i_size) {
- folio_zero_segment(folio, len, folio_size(folio));
- flush_dcache_folio(folio);
- goto out;
- }
-
- if (folio_pos(folio) >= inode->v.i_size) {
- folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio));
- flush_dcache_folio(folio);
- goto out;
- }
-readpage:
- ret = bch2_read_single_folio(folio, mapping);
- if (ret)
- goto err;
-out:
- ret = bch2_folio_set(c, inode_inum(inode), &folio, 1);
- if (ret)
- goto err;
-
- ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len);
- if (ret) {
- if (!folio_test_uptodate(folio)) {
- /*
- * If the folio hasn't been read in, we won't know if we
- * actually need a reservation - we don't actually need
- * to read here, we just need to check if the folio is
- * fully backed by uncompressed data:
- */
- goto readpage;
- }
-
- goto err;
- }
-
- *foliop = folio;
- return 0;
-err:
- folio_unlock(folio);
- folio_put(folio);
-err_unlock:
- bch2_pagecache_add_put(inode);
- kfree(res);
- *fsdata = NULL;
- return bch2_err_class(ret);
-}
-
-int bch2_write_end(const struct kiocb *iocb, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio, void *fsdata)
-{
- struct bch_inode_info *inode = to_bch_ei(mapping->host);
- struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct bch2_folio_reservation *res = fsdata;
- unsigned offset = pos - folio_pos(folio);
-
- BUG_ON(offset + copied > folio_size(folio));
-
- if (unlikely(copied < len && !folio_test_uptodate(folio))) {
- /*
- * The folio needs to be read in, but that would destroy
- * our partial write - simplest thing is to just force
- * userspace to redo the write:
- */
- folio_zero_range(folio, 0, folio_size(folio));
- flush_dcache_folio(folio);
- copied = 0;
- }
-
- scoped_guard(spinlock, &inode->v.i_lock)
- if (pos + copied > inode->v.i_size)
- i_size_write(&inode->v, pos + copied);
-
- if (copied) {
- if (!folio_test_uptodate(folio))
- folio_mark_uptodate(folio);
-
- bch2_set_folio_dirty(c, inode, folio, res, offset, copied);
-
- inode->ei_last_dirtied = (unsigned long) current;
- }
-
- folio_unlock(folio);
- folio_put(folio);
- bch2_pagecache_add_put(inode);
-
- bch2_folio_reservation_put(c, inode, res);
- kfree(res);
-
- return copied;
-}
-
static noinline void folios_trunc(folios *fs, struct folio **fi)
{
while (fs->data + fs->nr > fi) {
diff --git a/libbcachefs/fs-io-buffered.h b/libbcachefs/fs-io-buffered.h
index 14de91c2..df59398b 100644
--- a/libbcachefs/fs-io-buffered.h
+++ b/libbcachefs/fs-io-buffered.h
@@ -10,11 +10,6 @@ int bch2_read_folio(struct file *, struct folio *);
int bch2_writepages(struct address_space *, struct writeback_control *);
void bch2_readahead(struct readahead_control *);
-int bch2_write_begin(const struct kiocb *, struct address_space *, loff_t pos,
- unsigned len, struct folio **, void **);
-int bch2_write_end(const struct kiocb *, struct address_space *, loff_t,
- unsigned len, unsigned copied, struct folio *, void *);
-
ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
void bch2_fs_fs_io_buffered_exit(struct bch_fs *);
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index d6a2031e..9b309ea6 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -44,6 +44,7 @@
#include <linux/siphash.h>
#include <linux/statfs.h>
#include <linux/string.h>
+#include <linux/version.h>
#include <linux/xattr.h>
static struct kmem_cache *bch2_inode_cache;
@@ -1585,6 +1586,10 @@ static const __maybe_unused unsigned bch_flags_to_xflags[] = {
[__BCH_INODE_noatime] = FS_XFLAG_NOATIME,
};
+#if LINUX_VERSION_CODE < KERNEL_VERSION(6,17,0)
+#define file_kattr fileattr
+#endif
+
static int bch2_fileattr_get(struct dentry *dentry,
struct file_kattr *fa)
{
@@ -1803,8 +1808,6 @@ static const struct address_space_operations bch_address_space_operations = {
.writepages = bch2_writepages,
.readahead = bch2_readahead,
.dirty_folio = filemap_dirty_folio,
- .write_begin = bch2_write_begin,
- .write_end = bch2_write_end,
.invalidate_folio = bch2_invalidate_folio,
.release_folio = bch2_release_folio,
#ifdef CONFIG_MIGRATION
diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c
index 8b4cda1d..ca480b8f 100644
--- a/libbcachefs/io_read.c
+++ b/libbcachefs/io_read.c
@@ -327,6 +327,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
return &op->write.rbio;
err_remove_list:
+ bch2_bkey_buf_exit(&op->write.k, c);
async_object_list_del(c, promote, op->list_idx);
err_remove_hash:
BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash,
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index 6505c79f..9058df47 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -1126,6 +1126,12 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr,
ob[nr_got] = bch2_bucket_alloc(c, ca, watermark,
BCH_DATA_journal, cl);
ret = PTR_ERR_OR_ZERO(ob[nr_got]);
+
+ if (ret == -BCH_ERR_bucket_alloc_blocked)
+ ret = bch_err_throw(c, freelist_empty);
+ if (ret == -BCH_ERR_freelist_empty) /* don't if we're actually out of buckets */
+ closure_wake_up(&c->freelist_wait);
+
if (ret)
break;
@@ -1258,9 +1264,7 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca
}
ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl);
-
- if (ret == -BCH_ERR_bucket_alloc_blocked ||
- ret == -BCH_ERR_open_buckets_empty)
+ if (ret == -BCH_ERR_open_buckets_empty)
ret = 0; /* wait and retry */
bch2_disk_reservation_put(c, &disk_res);
diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c
index 92edff50..139a6587 100644
--- a/libbcachefs/migrate.c
+++ b/libbcachefs/migrate.c
@@ -19,6 +19,7 @@
#include "migrate.h"
#include "move.h"
#include "progress.h"
+#include "rebalance.h"
#include "replicas.h"
#include "super-io.h"
@@ -79,7 +80,12 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
if (ret)
return ret;
- ret = drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, err, false);
+ enum set_needs_rebalance_ctx ctx = SET_NEEDS_REBALANCE_opt_change;
+ struct bch_inode_opts opts;
+
+ ret = bch2_extent_get_apply_io_opts_one(trans, &opts, iter, k, ctx) ?:
+ bch2_bkey_set_needs_rebalance(c, &opts, n, ctx, 0) ?:
+ drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, err, false);
if (ret)
return ret;
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 9a440d3f..63c8f57b 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -105,46 +105,11 @@ trace_io_move_evacuate_bucket2(struct bch_fs *c, struct bpos bucket, int gen)
printbuf_exit(&buf);
}
-struct moving_io {
- struct list_head read_list;
- struct list_head io_list;
- struct move_bucket *b;
- struct closure cl;
- bool read_completed;
-
- unsigned read_sectors;
- unsigned write_sectors;
-
- struct data_update write;
-};
-
-static void move_free(struct moving_io *io)
-{
- struct moving_context *ctxt = io->write.ctxt;
- struct bch_fs *c = io->write.op.c;
-
- if (io->b)
- atomic_dec(&io->b->count);
-
- scoped_guard(mutex, &ctxt->lock)
- list_del(&io->io_list);
- wake_up(&ctxt->wait);
-
- if (!io->write.data_opts.scrub) {
- bch2_data_update_exit(&io->write);
- } else {
- bch2_bio_free_pages_pool(c, &io->write.op.wbio.bio);
- kfree(io->write.bvecs);
- bch2_bkey_buf_exit(&io->write.k, c);
- }
- kfree(io);
-}
-
static void move_write_done(struct bch_write_op *op)
{
- struct moving_io *io = container_of(op, struct moving_io, write.op);
+ struct data_update *u = container_of(op, struct data_update, op);
struct bch_fs *c = op->c;
- struct moving_context *ctxt = io->write.ctxt;
+ struct moving_context *ctxt = u->ctxt;
if (op->error) {
if (trace_io_move_write_fail_enabled()) {
@@ -157,24 +122,25 @@ static void move_write_done(struct bch_write_op *op)
ctxt->write_error = true;
}
- atomic_sub(io->write_sectors, &ctxt->write_sectors);
+ atomic_sub(u->k.k->k.size, &ctxt->write_sectors);
atomic_dec(&ctxt->write_ios);
- move_free(io);
+ bch2_data_update_exit(u);
+ kfree(u);
closure_put(&ctxt->cl);
}
-static void move_write(struct moving_io *io)
+static void move_write(struct data_update *u)
{
- struct bch_fs *c = io->write.op.c;
- struct moving_context *ctxt = io->write.ctxt;
- struct bch_read_bio *rbio = &io->write.rbio;
+ struct bch_fs *c = u->op.c;
+ struct moving_context *ctxt = u->ctxt;
+ struct bch_read_bio *rbio = &u->rbio;
if (ctxt->stats) {
if (rbio->bio.bi_status)
- atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
+ atomic64_add(u->rbio.bvec_iter.bi_size >> 9,
&ctxt->stats->sectors_error_uncorrected);
else if (rbio->saw_error)
- atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9,
+ atomic64_add(u->rbio.bvec_iter.bi_size >> 9,
&ctxt->stats->sectors_error_corrected);
}
@@ -184,7 +150,7 @@ static void move_write(struct moving_io *io)
* that userspace still gets the appropriate error.
*/
if (unlikely(rbio->ret == -BCH_ERR_data_read_csum_err &&
- (bch2_bkey_extent_flags(bkey_i_to_s_c(io->write.k.k)) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)))) {
+ (bch2_bkey_extent_flags(bkey_i_to_s_c(u->k.k)) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)))) {
struct bch_extent_crc_unpacked crc = rbio->pick.crc;
struct nonce nonce = extent_nonce(rbio->version, crc);
@@ -193,40 +159,41 @@ static void move_write(struct moving_io *io)
rbio->ret = 0;
}
- if (unlikely(rbio->ret || io->write.data_opts.scrub)) {
- move_free(io);
+ if (unlikely(rbio->ret || u->data_opts.scrub)) {
+ bch2_data_update_exit(u);
+ kfree(u);
return;
}
if (trace_io_move_write_enabled()) {
CLASS(printbuf, buf)();
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k));
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(u->k.k));
trace_io_move_write(c, buf.buf);
}
- closure_get(&io->write.ctxt->cl);
- atomic_add(io->write_sectors, &io->write.ctxt->write_sectors);
- atomic_inc(&io->write.ctxt->write_ios);
+ closure_get(&ctxt->cl);
+ atomic_add(u->k.k->k.size, &ctxt->write_sectors);
+ atomic_inc(&ctxt->write_ios);
- bch2_data_update_read_done(&io->write);
+ bch2_data_update_read_done(u);
}
-struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
+struct data_update *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt)
{
- struct moving_io *io =
- list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list);
+ struct data_update *u =
+ list_first_entry_or_null(&ctxt->reads, struct data_update, read_list);
- return io && io->read_completed ? io : NULL;
+ return u && u->read_done ? u : NULL;
}
static void move_read_endio(struct bio *bio)
{
- struct moving_io *io = container_of(bio, struct moving_io, write.rbio.bio);
- struct moving_context *ctxt = io->write.ctxt;
+ struct data_update *u = container_of(bio, struct data_update, rbio.bio);
+ struct moving_context *ctxt = u->ctxt;
- atomic_sub(io->read_sectors, &ctxt->read_sectors);
+ atomic_sub(u->k.k->k.size, &ctxt->read_sectors);
atomic_dec(&ctxt->read_ios);
- io->read_completed = true;
+ u->read_done = true;
wake_up(&ctxt->wait);
closure_put(&ctxt->cl);
@@ -234,12 +201,12 @@ static void move_read_endio(struct bio *bio)
void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt)
{
- struct moving_io *io;
+ struct data_update *u;
- while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) {
+ while ((u = bch2_moving_ctxt_next_pending_write(ctxt))) {
bch2_trans_unlock_long(ctxt->trans);
- list_del(&io->read_list);
- move_write(io);
+ list_del(&u->read_list);
+ move_write(u);
}
}
@@ -355,64 +322,44 @@ int bch2_move_extent(struct moving_context *ctxt,
}
}
- struct moving_io *io = allocate_dropping_locks(trans, ret,
- kzalloc(sizeof(struct moving_io), _gfp));
- if (!io && !ret)
+ struct data_update *u = allocate_dropping_locks(trans, ret,
+ kzalloc(sizeof(struct data_update), _gfp));
+ if (!u && !ret)
ret = bch_err_throw(c, ENOMEM_move_extent);
if (ret)
goto err;
- INIT_LIST_HEAD(&io->io_list);
- io->write.ctxt = ctxt;
- io->read_sectors = k.k->size;
- io->write_sectors = k.k->size;
-
- if (!data_opts.scrub) {
- ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
- &io_opts, data_opts, iter->btree_id, k);
- if (ret)
- goto err;
-
- io->write.op.end_io = move_write_done;
- } else {
- bch2_bkey_buf_init(&io->write.k);
- bch2_bkey_buf_reassemble(&io->write.k, c, k);
-
- io->write.op.c = c;
- io->write.data_opts = data_opts;
-
- bch2_trans_unlock(trans);
-
- ret = bch2_data_update_bios_init(&io->write, c, &io_opts);
- if (ret)
- goto err;
- }
+ ret = bch2_data_update_init(trans, iter, ctxt, u, ctxt->wp,
+ &io_opts, data_opts, iter->btree_id, k);
+ if (ret)
+ goto err;
- io->write.rbio.bio.bi_end_io = move_read_endio;
- io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
+ u->op.end_io = move_write_done;
+ u->rbio.bio.bi_end_io = move_read_endio;
+ u->rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
if (ctxt->rate)
bch2_ratelimit_increment(ctxt->rate, k.k->size);
if (ctxt->stats) {
atomic64_inc(&ctxt->stats->keys_moved);
- atomic64_add(k.k->size, &ctxt->stats->sectors_moved);
+ atomic64_add(u->k.k->k.size, &ctxt->stats->sectors_moved);
}
if (bucket_in_flight) {
- io->b = bucket_in_flight;
- atomic_inc(&io->b->count);
+ u->b = bucket_in_flight;
+ atomic_inc(&u->b->count);
}
if (trace_io_move_read_enabled())
trace_io_move_read2(c, k);
scoped_guard(mutex, &ctxt->lock) {
- atomic_add(io->read_sectors, &ctxt->read_sectors);
+ atomic_add(u->k.k->k.size, &ctxt->read_sectors);
atomic_inc(&ctxt->read_ios);
- list_add_tail(&io->read_list, &ctxt->reads);
- list_add_tail(&io->io_list, &ctxt->ios);
+ list_add_tail(&u->read_list, &ctxt->reads);
+ list_add_tail(&u->io_list, &ctxt->ios);
}
/*
@@ -420,8 +367,8 @@ int bch2_move_extent(struct moving_context *ctxt,
* ctxt when doing wakeup
*/
closure_get(&ctxt->cl);
- __bch2_read_extent(trans, &io->write.rbio,
- io->write.rbio.bio.bi_iter,
+ __bch2_read_extent(trans, &u->rbio,
+ u->rbio.bio.bi_iter,
bkey_start_pos(k.k),
iter->btree_id, k, 0,
NULL,
@@ -429,23 +376,22 @@ int bch2_move_extent(struct moving_context *ctxt,
data_opts.scrub ? data_opts.read_dev : -1);
return 0;
err:
- bch2_bkey_buf_exit(&io->write.k, c);
- kfree(io);
-
- if (bch2_err_matches(ret, EROFS) ||
- bch2_err_matches(ret, BCH_ERR_transaction_restart))
- return ret;
+ if (!bch2_err_matches(ret, EROFS) &&
+ !bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ count_event(c, io_move_start_fail);
- count_event(c, io_move_start_fail);
-
- if (trace_io_move_start_fail_enabled()) {
- CLASS(printbuf, buf)();
- bch2_bkey_val_to_text(&buf, c, k);
- prt_str(&buf, ": ");
- prt_str(&buf, bch2_err_str(ret));
- trace_io_move_start_fail(c, buf.buf);
+ if (trace_io_move_start_fail_enabled()) {
+ CLASS(printbuf, buf)();
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(u->k.k));
+ prt_str(&buf, ": ");
+ prt_str(&buf, bch2_err_str(ret));
+ trace_io_move_start_fail(c, buf.buf);
+ }
}
+ bch2_bkey_buf_exit(&u->k, c);
+ kfree(u);
+
if (bch2_err_matches(ret, BCH_ERR_data_update_done))
return 0;
return ret;
@@ -1301,9 +1247,9 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str
guard(printbuf_indent)(out);
scoped_guard(mutex, &ctxt->lock) {
- struct moving_io *io;
- list_for_each_entry(io, &ctxt->ios, io_list)
- bch2_data_update_inflight_to_text(out, &io->write);
+ struct data_update *u;
+ list_for_each_entry(u, &ctxt->ios, io_list)
+ bch2_data_update_inflight_to_text(out, u);
}
}
diff --git a/libbcachefs/move.h b/libbcachefs/move.h
index 754b0ad4..62831014 100644
--- a/libbcachefs/move.h
+++ b/libbcachefs/move.h
@@ -81,7 +81,7 @@ void bch2_moving_ctxt_exit(struct moving_context *);
void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *,
struct bch_ratelimit *, struct bch_move_stats *,
struct write_point_specifier, bool);
-struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *);
+struct data_update *bch2_moving_ctxt_next_pending_write(struct moving_context *);
void bch2_moving_ctxt_do_pending_writes(struct moving_context *);
void bch2_moving_ctxt_flush_all(struct moving_context *);
void bch2_move_ctxt_wait_for_io(struct moving_context *);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 4b873694..5cd308a6 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -2012,13 +2012,9 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags,
*/
bch2_dev_put(ca);
- if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) {
- prt_printf(err, "Cannot remove without losing data\n");
- ret = bch_err_throw(c, device_state_not_allowed);
+ ret = __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_failed, flags, err);
+ if (ret)
goto err;
- }
-
- __bch2_dev_read_only(c, ca);
ret = fast_device_removal
? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags, err)