diff options
-rw-r--r-- | drivers/md/dm-crypt.c | 42 | ||||
-rw-r--r-- | drivers/md/dm-io.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-ps-io-affinity.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 29 | ||||
-rw-r--r-- | drivers/md/dm.c | 31 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 54 | ||||
-rw-r--r-- | include/linux/device-mapper.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/dm-ioctl.h | 4 |
12 files changed, 120 insertions, 63 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1ae2c71bb383..02a2919f4e5a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -59,6 +59,7 @@ struct convert_context { struct bio *bio_out; struct bvec_iter iter_out; atomic_t cc_pending; + unsigned int tag_offset; u64 cc_sector; union { struct skcipher_request *req; @@ -1187,7 +1188,7 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio) tag_len = io->cc->tuple_size * (bio_sectors(bio) >> io->cc->sector_shift); - bip->bip_iter.bi_sector = io->cc->start + io->sector; + bip->bip_iter.bi_sector = bio->bi_iter.bi_sector; ret = bio_integrity_add_page(bio, virt_to_page(io->integrity_metadata), tag_len, offset_in_page(io->integrity_metadata)); @@ -1256,6 +1257,7 @@ static void crypt_convert_init(struct crypt_config *cc, if (bio_out) ctx->iter_out = bio_out->bi_iter; ctx->cc_sector = sector + cc->iv_offset; + ctx->tag_offset = 0; init_completion(&ctx->restart); } @@ -1588,7 +1590,6 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_ static blk_status_t crypt_convert(struct crypt_config *cc, struct convert_context *ctx, bool atomic, bool reset_pending) { - unsigned int tag_offset = 0; unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; int r; @@ -1611,9 +1612,9 @@ static blk_status_t crypt_convert(struct crypt_config *cc, atomic_inc(&ctx->cc_pending); if (crypt_integrity_aead(cc)) - r = crypt_convert_block_aead(cc, ctx, ctx->r.req_aead, tag_offset); + r = crypt_convert_block_aead(cc, ctx, ctx->r.req_aead, ctx->tag_offset); else - r = crypt_convert_block_skcipher(cc, ctx, ctx->r.req, tag_offset); + r = crypt_convert_block_skcipher(cc, ctx, ctx->r.req, ctx->tag_offset); switch (r) { /* @@ -1633,8 +1634,8 @@ static blk_status_t crypt_convert(struct crypt_config *cc, * exit and continue processing in a workqueue */ ctx->r.req = NULL; + ctx->tag_offset++; ctx->cc_sector += sector_step; - tag_offset++; return BLK_STS_DEV_RESOURCE; } } else { @@ -1648,8 +1649,8 @@ static blk_status_t crypt_convert(struct crypt_config *cc, */ case -EINPROGRESS: ctx->r.req = NULL; + ctx->tag_offset++; ctx->cc_sector += sector_step; - tag_offset++; continue; /* * The request was already processed (synchronously). @@ -1657,7 +1658,7 @@ static blk_status_t crypt_convert(struct crypt_config *cc, case 0: atomic_dec(&ctx->cc_pending); ctx->cc_sector += sector_step; - tag_offset++; + ctx->tag_offset++; if (!atomic) cond_resched(); continue; @@ -1719,6 +1720,7 @@ retry: clone->bi_private = io; clone->bi_end_io = crypt_endio; clone->bi_ioprio = io->base_bio->bi_ioprio; + clone->bi_iter.bi_sector = cc->start + io->sector; remaining_size = size; @@ -1909,7 +1911,6 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) crypt_dec_pending(io); return 1; } - clone->bi_iter.bi_sector = cc->start + io->sector; crypt_convert_init(cc, &io->ctx, clone, clone, io->sector); io->saved_bi_iter = clone->bi_iter; dm_submit_bio_remap(io->base_bio, clone); @@ -1925,13 +1926,13 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) clone = bio_alloc_clone(cc->dev->bdev, io->base_bio, gfp, &cc->bs); if (!clone) return 1; + + clone->bi_iter.bi_sector = cc->start + io->sector; clone->bi_private = io; clone->bi_end_io = crypt_endio; crypt_inc_pending(io); - clone->bi_iter.bi_sector = cc->start + io->sector; - if (dm_crypt_integrity_io_alloc(io, clone)) { crypt_dec_pending(io); bio_put(clone); @@ -2039,8 +2040,6 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) /* crypt_convert should have filled the clone bio */ BUG_ON(io->ctx.iter_out.bi_size); - clone->bi_iter.bi_sector = cc->start + io->sector; - if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) || test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) { dm_submit_bio_remap(io->base_bio, clone); @@ -2092,13 +2091,12 @@ static void kcryptd_crypt_write_continue(struct work_struct *work) struct crypt_config *cc = io->cc; struct convert_context *ctx = &io->ctx; int crypt_finished; - sector_t sector = io->sector; blk_status_t r; wait_for_completion(&ctx->restart); reinit_completion(&ctx->restart); - r = crypt_convert(cc, &io->ctx, true, false); + r = crypt_convert(cc, &io->ctx, false, false); if (r) io->error = r; crypt_finished = atomic_dec_and_test(&ctx->cc_pending); @@ -2109,10 +2107,8 @@ static void kcryptd_crypt_write_continue(struct work_struct *work) } /* Encryption was already finished, submit io now */ - if (crypt_finished) { + if (crypt_finished) kcryptd_crypt_write_io_submit(io, 0); - io->sector = sector; - } crypt_dec_pending(io); } @@ -2123,14 +2119,13 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) struct convert_context *ctx = &io->ctx; struct bio *clone; int crypt_finished; - sector_t sector = io->sector; blk_status_t r; /* * Prevent io from disappearing until this function completes. */ crypt_inc_pending(io); - crypt_convert_init(cc, ctx, NULL, io->base_bio, sector); + crypt_convert_init(cc, ctx, NULL, io->base_bio, io->sector); clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); if (unlikely(!clone)) { @@ -2147,8 +2142,6 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.iter_in = clone->bi_iter; } - sector += bio_sectors(clone); - crypt_inc_pending(io); r = crypt_convert(cc, ctx, test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true); @@ -2172,10 +2165,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) } /* Encryption was already finished, submit io now */ - if (crypt_finished) { + if (crypt_finished) kcryptd_crypt_write_io_submit(io, 0); - io->sector = sector; - } dec: crypt_dec_pending(io); @@ -2203,7 +2194,7 @@ static void kcryptd_crypt_read_continue(struct work_struct *work) wait_for_completion(&io->ctx.restart); reinit_completion(&io->ctx.restart); - r = crypt_convert(cc, &io->ctx, true, false); + r = crypt_convert(cc, &io->ctx, false, false); if (r) io->error = r; @@ -2221,7 +2212,6 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_inc_pending(io); if (io->ctx.aead_recheck) { - io->ctx.cc_sector = io->sector + cc->iv_offset; r = crypt_convert(cc, &io->ctx, test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); } else { diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index d7a8e2f40db3..c37668790577 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -379,6 +379,7 @@ static void do_region(const blk_opf_t opf, unsigned int region, atomic_inc(&io->count); submit_bio(bio); + WARN_ON_ONCE(opf & REQ_ATOMIC && remaining); } while (remaining); } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 49fb0f684193..66318aba4bdb 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -199,9 +199,10 @@ static size_t linear_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff, static struct target_type linear_target = { .name = "linear", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT | - DM_TARGET_ZONED_HM | DM_TARGET_PASSES_CRYPTO, + DM_TARGET_ZONED_HM | DM_TARGET_PASSES_CRYPTO | + DM_TARGET_ATOMIC_WRITES, .report_zones = linear_report_zones, .module = THIS_MODULE, .ctr = linear_ctr, diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c index 461ee6b2044d..716807e511ee 100644 --- a/drivers/md/dm-ps-io-affinity.c +++ b/drivers/md/dm-ps-io-affinity.c @@ -116,7 +116,7 @@ static int ioa_create(struct path_selector *ps, unsigned int argc, char **argv) if (!s) return -ENOMEM; - s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *), + s->path_map = kcalloc(nr_cpu_ids, sizeof(struct path_info *), GFP_KERNEL); if (!s->path_map) goto free_selector; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1e0d3b9b75d6..6adc55fd90d3 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3196,7 +3196,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (reshape_sectors || rs_is_raid1(rs)) { /* * We can only prepare for a reshape here, because the - * raid set needs to run to provide the repective reshape + * raid set needs to run to provide the respective reshape * check functions via its MD personality instance. * * So do the reshape check after md_run() succeeded. diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9511dae5b556..8c6f1f7e6456 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -656,7 +656,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) unsigned int i; struct dm_io_region io[MAX_NR_MIRRORS], *dest = io; struct mirror *m; - blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH); + blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH | REQ_ATOMIC); struct dm_io_request io_req = { .bi_opf = REQ_OP_WRITE | op_flags, .mem.type = DM_IO_BIO, @@ -1483,8 +1483,9 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 14, 0}, + .version = {1, 15, 0}, .module = THIS_MODULE, + .features = DM_TARGET_ATOMIC_WRITES, .ctr = mirror_ctr, .dtr = mirror_dtr, .map = mirror_map, diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4112071de0be..3786ac67cefe 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -465,8 +465,9 @@ static void stripe_io_hints(struct dm_target *ti, static struct target_type stripe_target = { .name = "striped", - .version = {1, 6, 0}, - .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT, + .version = {1, 7, 0}, + .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT | + DM_TARGET_ATOMIC_WRITES, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index bd8b796ae683..0ef5203387b2 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1806,6 +1806,32 @@ static bool dm_table_supports_secure_erase(struct dm_table *t) return true; } +static int device_not_atomic_write_capable(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + return !bdev_can_atomic_write(dev->bdev); +} + +static bool dm_table_supports_atomic_writes(struct dm_table *t) +{ + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + + if (!dm_target_supports_atomic_writes(ti->type)) + return false; + + if (!ti->type->iterate_devices) + return false; + + if (ti->type->iterate_devices(ti, + device_not_atomic_write_capable, NULL)) { + return false; + } + } + return true; +} + int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1854,6 +1880,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, return r; } + if (dm_table_supports_atomic_writes(t)) + limits->features |= BLK_FEAT_ATOMIC_WRITES; + r = queue_limits_set(q, limits); if (r) return r; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 12ecf07a3841..4d1e42891d24 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1479,12 +1479,12 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len) static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, struct dm_target *ti, unsigned int num_bios, - unsigned *len, gfp_t gfp_flag) + unsigned *len) { struct bio *bio; - int try = (gfp_flag & GFP_NOWAIT) ? 0 : 1; + int try; - for (; try < 2; try++) { + for (try = 0; try < 2; try++) { int bio_nr; if (try && num_bios > 1) @@ -1508,8 +1508,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, } static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, - unsigned int num_bios, unsigned int *len, - gfp_t gfp_flag) + unsigned int num_bios, unsigned int *len) { struct bio_list blist = BIO_EMPTY_LIST; struct bio *clone; @@ -1526,7 +1525,7 @@ static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_targe * Using alloc_multiple_bios(), even if num_bios is 1, to consistently * support allocating using GFP_NOWAIT with GFP_NOIO fallback. */ - alloc_multiple_bios(&blist, ci, ti, num_bios, len, gfp_flag); + alloc_multiple_bios(&blist, ci, ti, num_bios, len); while ((clone = bio_list_pop(&blist))) { if (num_bios > 1) dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); @@ -1564,7 +1563,7 @@ static void __send_empty_flush(struct clone_info *ci) atomic_add(ti->num_flush_bios, &ci->io->io_count); bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, - NULL, GFP_NOWAIT); + NULL); atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count); } } else { @@ -1612,7 +1611,7 @@ static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti, __max_io_len(ti, ci->sector, max_granularity, max_sectors)); atomic_add(num_bios, &ci->io->io_count); - bios = __send_duplicate_bios(ci, ti, num_bios, &len, GFP_NOIO); + bios = __send_duplicate_bios(ci, ti, num_bios, &len); /* * alloc_io() takes one extra reference for submission, so the * reference won't reach 0 without the following (+1) subtraction @@ -1746,6 +1745,9 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED); len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); + if (ci->bio->bi_opf & REQ_ATOMIC && len != ci->sector_count) + return BLK_STS_IOERR; + setup_split_accounting(ci, len); if (unlikely(ci->bio->bi_opf & REQ_NOWAIT)) { @@ -1849,7 +1851,7 @@ static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci, * not go crazy with the clone allocation. */ alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32), - NULL, GFP_NOIO); + NULL); } /* Get a clone and change it to a regular reset operation. */ @@ -1881,7 +1883,7 @@ static void __send_zone_reset_all_native(struct clone_info *ci, unsigned int bios; atomic_add(1, &ci->io->io_count); - bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO); + bios = __send_duplicate_bios(ci, ti, 1, NULL); atomic_sub(1 - bios, &ci->io->io_count); ci->sector_count = 0; @@ -1969,6 +1971,15 @@ static void dm_split_and_process_bio(struct mapped_device *md, /* Only support nowait for normal IO */ if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) { + /* + * Don't support NOWAIT for FLUSH because it may allocate + * multiple bios and there's no easy way how to undo the + * allocations. + */ + if (bio->bi_opf & REQ_PREFLUSH) { + bio_wouldblock_error(bio); + return; + } io = alloc_io(md, bio, GFP_NOWAIT); if (unlikely(!io)) { /* Unable to do anything without dm_io. */ diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index c7ba4e6cbbc7..98c745d90f48 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -13,6 +13,7 @@ #include <linux/export.h> #include <linux/mutex.h> #include <linux/hash.h> +#include <linux/rbtree.h> #include <linux/slab.h> #include <linux/device-mapper.h> @@ -77,7 +78,7 @@ static void prefetch_issue(struct prefetch_set *p, struct dm_block_manager *bm) /*----------------------------------------------------------------*/ struct shadow_info { - struct hlist_node hlist; + struct rb_node node; dm_block_t where; }; @@ -95,7 +96,7 @@ struct dm_transaction_manager { struct dm_space_map *sm; spinlock_t lock; - struct hlist_head buckets[DM_HASH_SIZE]; + struct rb_root buckets[DM_HASH_SIZE]; struct prefetch_set prefetches; }; @@ -106,14 +107,22 @@ static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b) { int r = 0; unsigned int bucket = dm_hash_block(b, DM_HASH_MASK); - struct shadow_info *si; + struct rb_node **node; spin_lock(&tm->lock); - hlist_for_each_entry(si, tm->buckets + bucket, hlist) - if (si->where == b) { + node = &tm->buckets[bucket].rb_node; + while (*node) { + struct shadow_info *si = + rb_entry(*node, struct shadow_info, node); + if (b == si->where) { r = 1; break; } + if (b < si->where) + node = &si->node.rb_left; + else + node = &si->node.rb_right; + } spin_unlock(&tm->lock); return r; @@ -130,30 +139,41 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b) si = kmalloc(sizeof(*si), GFP_NOIO); if (si) { + struct rb_node **node, *parent; si->where = b; bucket = dm_hash_block(b, DM_HASH_MASK); + spin_lock(&tm->lock); - hlist_add_head(&si->hlist, tm->buckets + bucket); + node = &tm->buckets[bucket].rb_node; + parent = NULL; + while (*node) { + struct shadow_info *si = + rb_entry(*node, struct shadow_info, node); + parent = *node; + if (b < si->where) + node = &si->node.rb_left; + else + node = &si->node.rb_right; + } + rb_link_node(&si->node, parent, node); + rb_insert_color(&si->node, &tm->buckets[bucket]); spin_unlock(&tm->lock); } } static void wipe_shadow_table(struct dm_transaction_manager *tm) { - struct shadow_info *si; - struct hlist_node *tmp; - struct hlist_head *bucket; - int i; + unsigned int i; spin_lock(&tm->lock); for (i = 0; i < DM_HASH_SIZE; i++) { - bucket = tm->buckets + i; - hlist_for_each_entry_safe(si, tmp, bucket, hlist) + while (!RB_EMPTY_ROOT(&tm->buckets[i])) { + struct shadow_info *si = + rb_entry(tm->buckets[i].rb_node, struct shadow_info, node); + rb_erase(&si->node, &tm->buckets[i]); kfree(si); - - INIT_HLIST_HEAD(bucket); + } } - spin_unlock(&tm->lock); } @@ -162,7 +182,7 @@ static void wipe_shadow_table(struct dm_transaction_manager *tm) static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm, struct dm_space_map *sm) { - int i; + unsigned int i; struct dm_transaction_manager *tm; tm = kmalloc(sizeof(*tm), GFP_KERNEL); @@ -176,7 +196,7 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm, spin_lock_init(&tm->lock); for (i = 0; i < DM_HASH_SIZE; i++) - INIT_HLIST_HEAD(tm->buckets + i); + tm->buckets[i] = RB_ROOT; prefetch_init(&tm->prefetches); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8321f65897f3..bcc6d7b69470 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -299,6 +299,9 @@ struct target_type { #define dm_target_supports_mixed_zoned_model(type) (false) #endif +#define DM_TARGET_ATOMIC_WRITES 0x00000400 +#define dm_target_supports_atomic_writes(type) ((type)->features & DM_TARGET_ATOMIC_WRITES) + struct dm_target { struct dm_table *table; struct target_type *type; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 1990b5700f69..b08c7378164d 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -286,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 48 +#define DM_VERSION_MINOR 49 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2023-03-01)" +#define DM_VERSION_EXTRA "-ioctl (2025-01-17)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ |