diff options
Diffstat (limited to 'libbcachefs')
-rw-r--r-- | libbcachefs/data_update.c | 139 | ||||
-rw-r--r-- | libbcachefs/data_update.h | 5 | ||||
-rw-r--r-- | libbcachefs/fs-io-buffered.c | 128 | ||||
-rw-r--r-- | libbcachefs/fs-io-buffered.h | 5 | ||||
-rw-r--r-- | libbcachefs/fs.c | 7 | ||||
-rw-r--r-- | libbcachefs/io_read.c | 1 | ||||
-rw-r--r-- | libbcachefs/journal.c | 10 | ||||
-rw-r--r-- | libbcachefs/migrate.c | 8 | ||||
-rw-r--r-- | libbcachefs/move.c | 186 | ||||
-rw-r--r-- | libbcachefs/move.h | 2 | ||||
-rw-r--r-- | libbcachefs/super.c | 8 |
11 files changed, 172 insertions, 327 deletions
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index f23951a1..155c1ad4 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -522,6 +522,15 @@ void bch2_data_update_exit(struct data_update *update) struct bch_fs *c = update->op.c; struct bkey_s_c k = bkey_i_to_s_c(update->k.k); + if (update->b) + atomic_dec(&update->b->count); + + if (update->ctxt) { + scoped_guard(mutex, &update->ctxt->lock) + list_del(&update->io_list); + wake_up(&update->ctxt->wait); + } + bch2_bio_free_pages_pool(c, &update->op.wbio.bio); kfree(update->bvecs); update->bvecs = NULL; @@ -866,8 +875,11 @@ int bch2_data_update_init(struct btree_trans *trans, : BCH_DATA_UPDATE_rebalance; m->btree_id = btree_id; m->data_opts = data_opts; + m->ctxt = ctxt; m->stats = ctxt ? ctxt->stats : NULL; + INIT_LIST_HEAD(&m->read_list); + INIT_LIST_HEAD(&m->io_list); bch2_write_op_init(&m->op, c, *io_opts); m->op.pos = bkey_start_pos(k.k); @@ -927,74 +939,81 @@ int bch2_data_update_init(struct btree_trans *trans, ptr_bit <<= 1; } - unsigned durability_required = max(0, (int) (io_opts->data_replicas - durability_have)); + if (!data_opts.scrub) { + unsigned durability_required = max(0, (int) (io_opts->data_replicas - durability_have)); - /* - * If current extent durability is less than io_opts.data_replicas, - * we're not trying to rereplicate the extent up to data_replicas here - - * unless extra_replicas was specified - * - * Increasing replication is an explicit operation triggered by - * rereplicate, currently, so that users don't get an unexpected -ENOSPC - */ - m->op.nr_replicas = min(durability_removing, durability_required) + - m->data_opts.extra_replicas; - - /* - * If device(s) were set to durability=0 after data was written to them - * we can end up with a duribilty=0 extent, and the normal algorithm - * that tries not to increase durability doesn't work: - */ - if (!(durability_have + durability_removing)) - m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); + /* + * If current extent durability is less than io_opts.data_replicas, + * we're not trying to rereplicate the extent up to data_replicas here - + * unless extra_replicas was specified + * + * Increasing replication is an explicit operation triggered by + * rereplicate, currently, so that users don't get an unexpected -ENOSPC + */ + m->op.nr_replicas = min(durability_removing, durability_required) + + m->data_opts.extra_replicas; - m->op.nr_replicas_required = m->op.nr_replicas; + /* + * If device(s) were set to durability=0 after data was written to them + * we can end up with a duribilty=0 extent, and the normal algorithm + * that tries not to increase durability doesn't work: + */ + if (!(durability_have + durability_removing)) + m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); - /* - * It might turn out that we don't need any new replicas, if the - * replicas or durability settings have been changed since the extent - * was written: - */ - if (!m->op.nr_replicas) { - m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; - m->data_opts.rewrite_ptrs = 0; - /* if iter == NULL, it's just a promote */ - if (iter) - ret = bch2_extent_drop_ptrs(trans, iter, k, io_opts, &m->data_opts); - if (!ret) - ret = bch_err_throw(c, data_update_done_no_writes_needed); - goto out_bkey_buf_exit; - } + m->op.nr_replicas_required = m->op.nr_replicas; - /* - * Check if the allocation will succeed, to avoid getting an error later - * in bch2_write() -> bch2_alloc_sectors_start() and doing a useless - * read: - * - * This guards against - * - BCH_WRITE_alloc_nowait allocations failing (promotes) - * - Destination target full - * - Device(s) in destination target offline - * - Insufficient durability available in destination target - * (i.e. trying to move a durability=2 replica to a target with a - * single durability=2 device) - */ - ret = can_write_extent(c, m); - if (ret) - goto out_bkey_buf_exit; + /* + * It might turn out that we don't need any new replicas, if the + * replicas or durability settings have been changed since the extent + * was written: + */ + if (!m->op.nr_replicas) { + m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; + m->data_opts.rewrite_ptrs = 0; + /* if iter == NULL, it's just a promote */ + if (iter) + ret = bch2_extent_drop_ptrs(trans, iter, k, io_opts, &m->data_opts); + if (!ret) + ret = bch_err_throw(c, data_update_done_no_writes_needed); + goto out; + } - if (reserve_sectors) { - ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, - m->data_opts.extra_replicas - ? 0 - : BCH_DISK_RESERVATION_NOFAIL); + /* + * Check if the allocation will succeed, to avoid getting an error later + * in bch2_write() -> bch2_alloc_sectors_start() and doing a useless + * read: + * + * This guards against + * - BCH_WRITE_alloc_nowait allocations failing (promotes) + * - Destination target full + * - Device(s) in destination target offline + * - Insufficient durability available in destination target + * (i.e. trying to move a durability=2 replica to a target with a + * single durability=2 device) + */ + ret = can_write_extent(c, m); if (ret) - goto out_bkey_buf_exit; + goto out; + + if (reserve_sectors) { + ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, + m->data_opts.extra_replicas + ? 0 + : BCH_DISK_RESERVATION_NOFAIL); + if (ret) + goto out; + } + } else { + if (unwritten) { + ret = bch_err_throw(c, data_update_done_unwritten); + goto out; + } } if (!bkey_get_dev_refs(c, k)) { ret = bch_err_throw(c, data_update_done_no_dev_refs); - goto out_put_disk_res; + goto out; } if (c->opts.nocow_enabled && @@ -1021,10 +1040,8 @@ out_nocow_unlock: bkey_nocow_unlock(c, k); out_put_dev_refs: bkey_put_dev_refs(c, k); -out_put_disk_res: +out: bch2_disk_reservation_put(c, &m->op.res); -out_bkey_buf_exit: - bch2_bkey_buf_exit(&m->k, c); return ret; } diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h index 3b0ba6f6..0e93b518 100644 --- a/libbcachefs/data_update.h +++ b/libbcachefs/data_update.h @@ -43,6 +43,11 @@ struct data_update { enum btree_id btree_id; struct bkey_buf k; struct data_update_opts data_opts; + + /* associated with @ctxt */ + struct list_head read_list; + struct list_head io_list; + struct move_bucket *b; struct moving_context *ctxt; struct bch_move_stats *stats; diff --git a/libbcachefs/fs-io-buffered.c b/libbcachefs/fs-io-buffered.c index fe684adc..bfa1307b 100644 --- a/libbcachefs/fs-io-buffered.c +++ b/libbcachefs/fs-io-buffered.c @@ -729,134 +729,6 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc /* buffered writes: */ -int bch2_write_begin(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, unsigned len, - struct folio **foliop, void **fsdata) -{ - struct bch_inode_info *inode = to_bch_ei(mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch2_folio_reservation *res; - struct folio *folio; - unsigned offset; - int ret = -ENOMEM; - - res = kmalloc(sizeof(*res), GFP_KERNEL); - if (!res) - return -ENOMEM; - - bch2_folio_reservation_init(c, inode, res); - *fsdata = res; - - bch2_pagecache_add_get(inode); - - folio = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, - FGP_WRITEBEGIN | fgf_set_order(len), - mapping_gfp_mask(mapping)); - if (IS_ERR(folio)) - goto err_unlock; - - offset = pos - folio_pos(folio); - len = min_t(size_t, len, folio_end_pos(folio) - pos); - - if (folio_test_uptodate(folio)) - goto out; - - /* If we're writing entire folio, don't need to read it in first: */ - if (!offset && len == folio_size(folio)) - goto out; - - if (!offset && pos + len >= inode->v.i_size) { - folio_zero_segment(folio, len, folio_size(folio)); - flush_dcache_folio(folio); - goto out; - } - - if (folio_pos(folio) >= inode->v.i_size) { - folio_zero_segments(folio, 0, offset, offset + len, folio_size(folio)); - flush_dcache_folio(folio); - goto out; - } -readpage: - ret = bch2_read_single_folio(folio, mapping); - if (ret) - goto err; -out: - ret = bch2_folio_set(c, inode_inum(inode), &folio, 1); - if (ret) - goto err; - - ret = bch2_folio_reservation_get(c, inode, folio, res, offset, len); - if (ret) { - if (!folio_test_uptodate(folio)) { - /* - * If the folio hasn't been read in, we won't know if we - * actually need a reservation - we don't actually need - * to read here, we just need to check if the folio is - * fully backed by uncompressed data: - */ - goto readpage; - } - - goto err; - } - - *foliop = folio; - return 0; -err: - folio_unlock(folio); - folio_put(folio); -err_unlock: - bch2_pagecache_add_put(inode); - kfree(res); - *fsdata = NULL; - return bch2_err_class(ret); -} - -int bch2_write_end(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct folio *folio, void *fsdata) -{ - struct bch_inode_info *inode = to_bch_ei(mapping->host); - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch2_folio_reservation *res = fsdata; - unsigned offset = pos - folio_pos(folio); - - BUG_ON(offset + copied > folio_size(folio)); - - if (unlikely(copied < len && !folio_test_uptodate(folio))) { - /* - * The folio needs to be read in, but that would destroy - * our partial write - simplest thing is to just force - * userspace to redo the write: - */ - folio_zero_range(folio, 0, folio_size(folio)); - flush_dcache_folio(folio); - copied = 0; - } - - scoped_guard(spinlock, &inode->v.i_lock) - if (pos + copied > inode->v.i_size) - i_size_write(&inode->v, pos + copied); - - if (copied) { - if (!folio_test_uptodate(folio)) - folio_mark_uptodate(folio); - - bch2_set_folio_dirty(c, inode, folio, res, offset, copied); - - inode->ei_last_dirtied = (unsigned long) current; - } - - folio_unlock(folio); - folio_put(folio); - bch2_pagecache_add_put(inode); - - bch2_folio_reservation_put(c, inode, res); - kfree(res); - - return copied; -} - static noinline void folios_trunc(folios *fs, struct folio **fi) { while (fs->data + fs->nr > fi) { diff --git a/libbcachefs/fs-io-buffered.h b/libbcachefs/fs-io-buffered.h index 14de91c2..df59398b 100644 --- a/libbcachefs/fs-io-buffered.h +++ b/libbcachefs/fs-io-buffered.h @@ -10,11 +10,6 @@ int bch2_read_folio(struct file *, struct folio *); int bch2_writepages(struct address_space *, struct writeback_control *); void bch2_readahead(struct readahead_control *); -int bch2_write_begin(const struct kiocb *, struct address_space *, loff_t pos, - unsigned len, struct folio **, void **); -int bch2_write_end(const struct kiocb *, struct address_space *, loff_t, - unsigned len, unsigned copied, struct folio *, void *); - ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *); void bch2_fs_fs_io_buffered_exit(struct bch_fs *); diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index d6a2031e..9b309ea6 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -44,6 +44,7 @@ #include <linux/siphash.h> #include <linux/statfs.h> #include <linux/string.h> +#include <linux/version.h> #include <linux/xattr.h> static struct kmem_cache *bch2_inode_cache; @@ -1585,6 +1586,10 @@ static const __maybe_unused unsigned bch_flags_to_xflags[] = { [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, }; +#if LINUX_VERSION_CODE < KERNEL_VERSION(6,17,0) +#define file_kattr fileattr +#endif + static int bch2_fileattr_get(struct dentry *dentry, struct file_kattr *fa) { @@ -1803,8 +1808,6 @@ static const struct address_space_operations bch_address_space_operations = { .writepages = bch2_writepages, .readahead = bch2_readahead, .dirty_folio = filemap_dirty_folio, - .write_begin = bch2_write_begin, - .write_end = bch2_write_end, .invalidate_folio = bch2_invalidate_folio, .release_folio = bch2_release_folio, #ifdef CONFIG_MIGRATION diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c index 8b4cda1d..ca480b8f 100644 --- a/libbcachefs/io_read.c +++ b/libbcachefs/io_read.c @@ -327,6 +327,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, return &op->write.rbio; err_remove_list: + bch2_bkey_buf_exit(&op->write.k, c); async_object_list_del(c, promote, op->list_idx); err_remove_hash: BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 6505c79f..9058df47 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1126,6 +1126,12 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, ob[nr_got] = bch2_bucket_alloc(c, ca, watermark, BCH_DATA_journal, cl); ret = PTR_ERR_OR_ZERO(ob[nr_got]); + + if (ret == -BCH_ERR_bucket_alloc_blocked) + ret = bch_err_throw(c, freelist_empty); + if (ret == -BCH_ERR_freelist_empty) /* don't if we're actually out of buckets */ + closure_wake_up(&c->freelist_wait); + if (ret) break; @@ -1258,9 +1264,7 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca } ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl); - - if (ret == -BCH_ERR_bucket_alloc_blocked || - ret == -BCH_ERR_open_buckets_empty) + if (ret == -BCH_ERR_open_buckets_empty) ret = 0; /* wait and retry */ bch2_disk_reservation_put(c, &disk_res); diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c index 92edff50..139a6587 100644 --- a/libbcachefs/migrate.c +++ b/libbcachefs/migrate.c @@ -19,6 +19,7 @@ #include "migrate.h" #include "move.h" #include "progress.h" +#include "rebalance.h" #include "replicas.h" #include "super-io.h" @@ -79,7 +80,12 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, if (ret) return ret; - ret = drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, err, false); + enum set_needs_rebalance_ctx ctx = SET_NEEDS_REBALANCE_opt_change; + struct bch_inode_opts opts; + + ret = bch2_extent_get_apply_io_opts_one(trans, &opts, iter, k, ctx) ?: + bch2_bkey_set_needs_rebalance(c, &opts, n, ctx, 0) ?: + drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, err, false); if (ret) return ret; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 9a440d3f..63c8f57b 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -105,46 +105,11 @@ trace_io_move_evacuate_bucket2(struct bch_fs *c, struct bpos bucket, int gen) printbuf_exit(&buf); } -struct moving_io { - struct list_head read_list; - struct list_head io_list; - struct move_bucket *b; - struct closure cl; - bool read_completed; - - unsigned read_sectors; - unsigned write_sectors; - - struct data_update write; -}; - -static void move_free(struct moving_io *io) -{ - struct moving_context *ctxt = io->write.ctxt; - struct bch_fs *c = io->write.op.c; - - if (io->b) - atomic_dec(&io->b->count); - - scoped_guard(mutex, &ctxt->lock) - list_del(&io->io_list); - wake_up(&ctxt->wait); - - if (!io->write.data_opts.scrub) { - bch2_data_update_exit(&io->write); - } else { - bch2_bio_free_pages_pool(c, &io->write.op.wbio.bio); - kfree(io->write.bvecs); - bch2_bkey_buf_exit(&io->write.k, c); - } - kfree(io); -} - static void move_write_done(struct bch_write_op *op) { - struct moving_io *io = container_of(op, struct moving_io, write.op); + struct data_update *u = container_of(op, struct data_update, op); struct bch_fs *c = op->c; - struct moving_context *ctxt = io->write.ctxt; + struct moving_context *ctxt = u->ctxt; if (op->error) { if (trace_io_move_write_fail_enabled()) { @@ -157,24 +122,25 @@ static void move_write_done(struct bch_write_op *op) ctxt->write_error = true; } - atomic_sub(io->write_sectors, &ctxt->write_sectors); + atomic_sub(u->k.k->k.size, &ctxt->write_sectors); atomic_dec(&ctxt->write_ios); - move_free(io); + bch2_data_update_exit(u); + kfree(u); closure_put(&ctxt->cl); } -static void move_write(struct moving_io *io) +static void move_write(struct data_update *u) { - struct bch_fs *c = io->write.op.c; - struct moving_context *ctxt = io->write.ctxt; - struct bch_read_bio *rbio = &io->write.rbio; + struct bch_fs *c = u->op.c; + struct moving_context *ctxt = u->ctxt; + struct bch_read_bio *rbio = &u->rbio; if (ctxt->stats) { if (rbio->bio.bi_status) - atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, + atomic64_add(u->rbio.bvec_iter.bi_size >> 9, &ctxt->stats->sectors_error_uncorrected); else if (rbio->saw_error) - atomic64_add(io->write.rbio.bvec_iter.bi_size >> 9, + atomic64_add(u->rbio.bvec_iter.bi_size >> 9, &ctxt->stats->sectors_error_corrected); } @@ -184,7 +150,7 @@ static void move_write(struct moving_io *io) * that userspace still gets the appropriate error. */ if (unlikely(rbio->ret == -BCH_ERR_data_read_csum_err && - (bch2_bkey_extent_flags(bkey_i_to_s_c(io->write.k.k)) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)))) { + (bch2_bkey_extent_flags(bkey_i_to_s_c(u->k.k)) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)))) { struct bch_extent_crc_unpacked crc = rbio->pick.crc; struct nonce nonce = extent_nonce(rbio->version, crc); @@ -193,40 +159,41 @@ static void move_write(struct moving_io *io) rbio->ret = 0; } - if (unlikely(rbio->ret || io->write.data_opts.scrub)) { - move_free(io); + if (unlikely(rbio->ret || u->data_opts.scrub)) { + bch2_data_update_exit(u); + kfree(u); return; } if (trace_io_move_write_enabled()) { CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(u->k.k)); trace_io_move_write(c, buf.buf); } - closure_get(&io->write.ctxt->cl); - atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); - atomic_inc(&io->write.ctxt->write_ios); + closure_get(&ctxt->cl); + atomic_add(u->k.k->k.size, &ctxt->write_sectors); + atomic_inc(&ctxt->write_ios); - bch2_data_update_read_done(&io->write); + bch2_data_update_read_done(u); } -struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) +struct data_update *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) { - struct moving_io *io = - list_first_entry_or_null(&ctxt->reads, struct moving_io, read_list); + struct data_update *u = + list_first_entry_or_null(&ctxt->reads, struct data_update, read_list); - return io && io->read_completed ? io : NULL; + return u && u->read_done ? u : NULL; } static void move_read_endio(struct bio *bio) { - struct moving_io *io = container_of(bio, struct moving_io, write.rbio.bio); - struct moving_context *ctxt = io->write.ctxt; + struct data_update *u = container_of(bio, struct data_update, rbio.bio); + struct moving_context *ctxt = u->ctxt; - atomic_sub(io->read_sectors, &ctxt->read_sectors); + atomic_sub(u->k.k->k.size, &ctxt->read_sectors); atomic_dec(&ctxt->read_ios); - io->read_completed = true; + u->read_done = true; wake_up(&ctxt->wait); closure_put(&ctxt->cl); @@ -234,12 +201,12 @@ static void move_read_endio(struct bio *bio) void bch2_moving_ctxt_do_pending_writes(struct moving_context *ctxt) { - struct moving_io *io; + struct data_update *u; - while ((io = bch2_moving_ctxt_next_pending_write(ctxt))) { + while ((u = bch2_moving_ctxt_next_pending_write(ctxt))) { bch2_trans_unlock_long(ctxt->trans); - list_del(&io->read_list); - move_write(io); + list_del(&u->read_list); + move_write(u); } } @@ -355,64 +322,44 @@ int bch2_move_extent(struct moving_context *ctxt, } } - struct moving_io *io = allocate_dropping_locks(trans, ret, - kzalloc(sizeof(struct moving_io), _gfp)); - if (!io && !ret) + struct data_update *u = allocate_dropping_locks(trans, ret, + kzalloc(sizeof(struct data_update), _gfp)); + if (!u && !ret) ret = bch_err_throw(c, ENOMEM_move_extent); if (ret) goto err; - INIT_LIST_HEAD(&io->io_list); - io->write.ctxt = ctxt; - io->read_sectors = k.k->size; - io->write_sectors = k.k->size; - - if (!data_opts.scrub) { - ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, - &io_opts, data_opts, iter->btree_id, k); - if (ret) - goto err; - - io->write.op.end_io = move_write_done; - } else { - bch2_bkey_buf_init(&io->write.k); - bch2_bkey_buf_reassemble(&io->write.k, c, k); - - io->write.op.c = c; - io->write.data_opts = data_opts; - - bch2_trans_unlock(trans); - - ret = bch2_data_update_bios_init(&io->write, c, &io_opts); - if (ret) - goto err; - } + ret = bch2_data_update_init(trans, iter, ctxt, u, ctxt->wp, + &io_opts, data_opts, iter->btree_id, k); + if (ret) + goto err; - io->write.rbio.bio.bi_end_io = move_read_endio; - io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); + u->op.end_io = move_write_done; + u->rbio.bio.bi_end_io = move_read_endio; + u->rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); if (ctxt->rate) bch2_ratelimit_increment(ctxt->rate, k.k->size); if (ctxt->stats) { atomic64_inc(&ctxt->stats->keys_moved); - atomic64_add(k.k->size, &ctxt->stats->sectors_moved); + atomic64_add(u->k.k->k.size, &ctxt->stats->sectors_moved); } if (bucket_in_flight) { - io->b = bucket_in_flight; - atomic_inc(&io->b->count); + u->b = bucket_in_flight; + atomic_inc(&u->b->count); } if (trace_io_move_read_enabled()) trace_io_move_read2(c, k); scoped_guard(mutex, &ctxt->lock) { - atomic_add(io->read_sectors, &ctxt->read_sectors); + atomic_add(u->k.k->k.size, &ctxt->read_sectors); atomic_inc(&ctxt->read_ios); - list_add_tail(&io->read_list, &ctxt->reads); - list_add_tail(&io->io_list, &ctxt->ios); + list_add_tail(&u->read_list, &ctxt->reads); + list_add_tail(&u->io_list, &ctxt->ios); } /* @@ -420,8 +367,8 @@ int bch2_move_extent(struct moving_context *ctxt, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - __bch2_read_extent(trans, &io->write.rbio, - io->write.rbio.bio.bi_iter, + __bch2_read_extent(trans, &u->rbio, + u->rbio.bio.bi_iter, bkey_start_pos(k.k), iter->btree_id, k, 0, NULL, @@ -429,23 +376,22 @@ int bch2_move_extent(struct moving_context *ctxt, data_opts.scrub ? data_opts.read_dev : -1); return 0; err: - bch2_bkey_buf_exit(&io->write.k, c); - kfree(io); - - if (bch2_err_matches(ret, EROFS) || - bch2_err_matches(ret, BCH_ERR_transaction_restart)) - return ret; + if (!bch2_err_matches(ret, EROFS) && + !bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + count_event(c, io_move_start_fail); - count_event(c, io_move_start_fail); - - if (trace_io_move_start_fail_enabled()) { - CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, ": "); - prt_str(&buf, bch2_err_str(ret)); - trace_io_move_start_fail(c, buf.buf); + if (trace_io_move_start_fail_enabled()) { + CLASS(printbuf, buf)(); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(u->k.k)); + prt_str(&buf, ": "); + prt_str(&buf, bch2_err_str(ret)); + trace_io_move_start_fail(c, buf.buf); + } } + bch2_bkey_buf_exit(&u->k, c); + kfree(u); + if (bch2_err_matches(ret, BCH_ERR_data_update_done)) return 0; return ret; @@ -1301,9 +1247,9 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str guard(printbuf_indent)(out); scoped_guard(mutex, &ctxt->lock) { - struct moving_io *io; - list_for_each_entry(io, &ctxt->ios, io_list) - bch2_data_update_inflight_to_text(out, &io->write); + struct data_update *u; + list_for_each_entry(u, &ctxt->ios, io_list) + bch2_data_update_inflight_to_text(out, u); } } diff --git a/libbcachefs/move.h b/libbcachefs/move.h index 754b0ad4..62831014 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -81,7 +81,7 @@ void bch2_moving_ctxt_exit(struct moving_context *); void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *, struct bch_ratelimit *, struct bch_move_stats *, struct write_point_specifier, bool); -struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *); +struct data_update *bch2_moving_ctxt_next_pending_write(struct moving_context *); void bch2_moving_ctxt_do_pending_writes(struct moving_context *); void bch2_moving_ctxt_flush_all(struct moving_context *); void bch2_move_ctxt_wait_for_io(struct moving_context *); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 4b873694..5cd308a6 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -2012,13 +2012,9 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags, */ bch2_dev_put(ca); - if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags, NULL)) { - prt_printf(err, "Cannot remove without losing data\n"); - ret = bch_err_throw(c, device_state_not_allowed); + ret = __bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_failed, flags, err); + if (ret) goto err; - } - - __bch2_dev_read_only(c, ca); ret = fast_device_removal ? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags, err) |