diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2014-05-16 13:17:49 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2014-05-16 13:17:49 +1000 |
commit | b1abb8068c7faed87c7c66505a1b9031c660125d (patch) | |
tree | d595927734c1b39b11dead44e26e9532321bbaba /drivers | |
parent | 4b0ff7fcb8a0e95a90f1a0aa54c70ad8cc7bcd4b (diff) | |
parent | 1cda12f8292fa0b28e358323630c6c1cad11bcce (diff) |
Merge remote-tracking branch 'device-mapper/for-next'
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/dm-crypt.c | 61 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 67 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 89 | ||||
-rw-r--r-- | drivers/md/dm.c | 69 |
5 files changed, 187 insertions, 101 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 784695d22fde..53b213226c01 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -19,7 +19,6 @@ #include <linux/crypto.h> #include <linux/workqueue.h> #include <linux/backing-dev.h> -#include <linux/percpu.h> #include <linux/atomic.h> #include <linux/scatterlist.h> #include <asm/page.h> @@ -43,6 +42,7 @@ struct convert_context { struct bvec_iter iter_out; sector_t cc_sector; atomic_t cc_pending; + struct ablkcipher_request *req; }; /* @@ -111,15 +111,7 @@ struct iv_tcw_private { enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; /* - * Duplicated per-CPU state for cipher. - */ -struct crypt_cpu { - struct ablkcipher_request *req; -}; - -/* - * The fields in here must be read only after initialization, - * changing state should be in crypt_cpu. + * The fields in here must be read only after initialization. */ struct crypt_config { struct dm_dev *dev; @@ -150,12 +142,6 @@ struct crypt_config { sector_t iv_offset; unsigned int iv_size; - /* - * Duplicated per cpu state. Access through - * per_cpu_ptr() only. - */ - struct crypt_cpu __percpu *cpu; - /* ESSIV: struct crypto_cipher *essiv_tfm */ void *iv_private; struct crypto_ablkcipher **tfms; @@ -192,11 +178,6 @@ static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq); -static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) -{ - return this_cpu_ptr(cc->cpu); -} - /* * Use this to access cipher attributes that are the same for each CPU. */ @@ -903,16 +884,15 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, static void crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { - struct crypt_cpu *this_cc = this_crypt_config(cc); unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); - if (!this_cc->req) - this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); + if (!ctx->req) + ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO); - ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]); - ablkcipher_request_set_callback(this_cc->req, + ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]); + ablkcipher_request_set_callback(ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); + kcryptd_async_done, dmreq_of_req(cc, ctx->req)); } /* @@ -921,7 +901,6 @@ static void crypt_alloc_req(struct crypt_config *cc, static int crypt_convert(struct crypt_config *cc, struct convert_context *ctx) { - struct crypt_cpu *this_cc = this_crypt_config(cc); int r; atomic_set(&ctx->cc_pending, 1); @@ -932,7 +911,7 @@ static int crypt_convert(struct crypt_config *cc, atomic_inc(&ctx->cc_pending); - r = crypt_convert_block(cc, ctx, this_cc->req); + r = crypt_convert_block(cc, ctx, ctx->req); switch (r) { /* async */ @@ -941,7 +920,7 @@ static int crypt_convert(struct crypt_config *cc, reinit_completion(&ctx->restart); /* fall through*/ case -EINPROGRESS: - this_cc->req = NULL; + ctx->req = NULL; ctx->cc_sector++; continue; @@ -1040,6 +1019,7 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, io->sector = sector; io->error = 0; io->base_io = NULL; + io->ctx.req = NULL; atomic_set(&io->io_pending, 0); return io; @@ -1065,6 +1045,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; + if (io->ctx.req) + mempool_free(io->ctx.req, cc->req_pool); mempool_free(io, cc->io_pool); if (likely(!base_io)) @@ -1492,8 +1474,6 @@ static int crypt_wipe_key(struct crypt_config *cc) static void crypt_dtr(struct dm_target *ti) { struct crypt_config *cc = ti->private; - struct crypt_cpu *cpu_cc; - int cpu; ti->private = NULL; @@ -1505,13 +1485,6 @@ static void crypt_dtr(struct dm_target *ti) if (cc->crypt_queue) destroy_workqueue(cc->crypt_queue); - if (cc->cpu) - for_each_possible_cpu(cpu) { - cpu_cc = per_cpu_ptr(cc->cpu, cpu); - if (cpu_cc->req) - mempool_free(cpu_cc->req, cc->req_pool); - } - crypt_free_tfms(cc); if (cc->bs) @@ -1530,9 +1503,6 @@ static void crypt_dtr(struct dm_target *ti) if (cc->dev) dm_put_device(ti, cc->dev); - if (cc->cpu) - free_percpu(cc->cpu); - kzfree(cc->cipher); kzfree(cc->cipher_string); @@ -1588,13 +1558,6 @@ static int crypt_ctr_cipher(struct dm_target *ti, if (tmp) DMWARN("Ignoring unexpected additional cipher options"); - cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)), - __alignof__(struct crypt_cpu)); - if (!cc->cpu) { - ti->error = "Cannot allocate per cpu state"; - goto bad_mem; - } - /* * For compatibility with the original dm-crypt mapping format, if * only the cipher name is supplied, use cbc-plain. diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index aa009e865871..fa0f6cbd6a41 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1566,8 +1566,8 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, } if (m->pg_init_required) __pg_init_all_paths(m); - spin_unlock_irqrestore(&m->lock, flags); dm_table_run_md_queue_async(m->ti->table); + spin_unlock_irqrestore(&m->lock, flags); } return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index ebddef5237e4..e5a84c890e8f 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2141,6 +2141,11 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, * Origin: maps a linear range of a device, with hooks for snapshotting. */ +struct dm_origin { + struct dm_dev *dev; + unsigned split_boundary; +}; + /* * Construct an origin mapping: <dev_path> * The context for an origin is merely a 'struct dm_dev *' @@ -2149,41 +2154,65 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) { int r; - struct dm_dev *dev; + struct dm_origin *o; if (argc != 1) { ti->error = "origin: incorrect number of arguments"; return -EINVAL; } - r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev); + o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL); + if (!o) { + ti->error = "Cannot allocate private origin structure"; + r = -ENOMEM; + goto bad_alloc; + } + + r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev); if (r) { ti->error = "Cannot get target device"; - return r; + goto bad_open; } - ti->private = dev; + ti->private = o; ti->num_flush_bios = 1; return 0; + +bad_open: + kfree(o); +bad_alloc: + return r; } static void origin_dtr(struct dm_target *ti) { - struct dm_dev *dev = ti->private; - dm_put_device(ti, dev); + struct dm_origin *o = ti->private; + dm_put_device(ti, o->dev); + kfree(o); } static int origin_map(struct dm_target *ti, struct bio *bio) { - struct dm_dev *dev = ti->private; - bio->bi_bdev = dev->bdev; + struct dm_origin *o = ti->private; + unsigned available_sectors; - if (bio->bi_rw & REQ_FLUSH) + bio->bi_bdev = o->dev->bdev; + + if (unlikely(bio->bi_rw & REQ_FLUSH)) return DM_MAPIO_REMAPPED; + if (bio_rw(bio) != WRITE) + return DM_MAPIO_REMAPPED; + + available_sectors = o->split_boundary - + ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1)); + + if (bio_sectors(bio) > available_sectors) + dm_accept_partial_bio(bio, available_sectors); + /* Only tell snapshots if this is a write */ - return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; + return do_origin(o->dev, bio); } /* @@ -2192,15 +2221,15 @@ static int origin_map(struct dm_target *ti, struct bio *bio) */ static void origin_resume(struct dm_target *ti) { - struct dm_dev *dev = ti->private; + struct dm_origin *o = ti->private; - ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); + o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev); } static void origin_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { - struct dm_dev *dev = ti->private; + struct dm_origin *o = ti->private; switch (type) { case STATUSTYPE_INFO: @@ -2208,7 +2237,7 @@ static void origin_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s", dev->name); + snprintf(result, maxlen, "%s", o->dev->name); break; } } @@ -2216,13 +2245,13 @@ static void origin_status(struct dm_target *ti, status_type_t type, static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, struct bio_vec *biovec, int max_size) { - struct dm_dev *dev = ti->private; - struct request_queue *q = bdev_get_queue(dev->bdev); + struct dm_origin *o = ti->private; + struct request_queue *q = bdev_get_queue(o->dev->bdev); if (!q->merge_bvec_fn) return max_size; - bvm->bi_bdev = dev->bdev; + bvm->bi_bdev = o->dev->bdev; return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } @@ -2230,9 +2259,9 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, static int origin_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { - struct dm_dev *dev = ti->private; + struct dm_origin *o = ti->private; - return fn(ti, dev, 0, ti->len, data); + return fn(ti, o->dev, 0, ti->len, data); } static struct target_type origin_target = { diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 13abade76ad9..716729702873 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -27,6 +27,7 @@ #define MAPPING_POOL_SIZE 1024 #define PRISON_CELLS 1024 #define COMMIT_PERIOD HZ +#define NO_SPACE_TIMEOUT (HZ * 60) DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, "A percentage of time allocated for copy on write"); @@ -175,6 +176,7 @@ struct pool { struct workqueue_struct *wq; struct work_struct worker; struct delayed_work waker; + struct delayed_work no_space_timeout; unsigned long last_commit_jiffies; unsigned ref_count; @@ -935,7 +937,7 @@ static int commit(struct pool *pool) { int r; - if (get_pool_mode(pool) != PM_WRITE) + if (get_pool_mode(pool) >= PM_READ_ONLY) return -EINVAL; r = dm_pool_commit_metadata(pool->pmd); @@ -1590,49 +1592,79 @@ static void do_waker(struct work_struct *ws) queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); } +/* + * We're holding onto IO to allow userland time to react. After the + * timeout either the pool will have been resized (and thus back in + * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO. + */ +static void do_no_space_timeout(struct work_struct *ws) +{ + struct pool *pool = container_of(to_delayed_work(ws), struct pool, + no_space_timeout); + + if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) + set_pool_mode(pool, PM_READ_ONLY); +} + /*----------------------------------------------------------------*/ -struct noflush_work { +struct pool_work { struct work_struct worker; - struct thin_c *tc; + struct completion complete; +}; + +static struct pool_work *to_pool_work(struct work_struct *ws) +{ + return container_of(ws, struct pool_work, worker); +} + +static void pool_work_complete(struct pool_work *pw) +{ + complete(&pw->complete); +} + +static void pool_work_wait(struct pool_work *pw, struct pool *pool, + void (*fn)(struct work_struct *)) +{ + INIT_WORK_ONSTACK(&pw->worker, fn); + init_completion(&pw->complete); + queue_work(pool->wq, &pw->worker); + wait_for_completion(&pw->complete); +} - atomic_t complete; - wait_queue_head_t wait; +/*----------------------------------------------------------------*/ + +struct noflush_work { + struct pool_work pw; + struct thin_c *tc; }; -static void complete_noflush_work(struct noflush_work *w) +static struct noflush_work *to_noflush(struct work_struct *ws) { - atomic_set(&w->complete, 1); - wake_up(&w->wait); + return container_of(to_pool_work(ws), struct noflush_work, pw); } static void do_noflush_start(struct work_struct *ws) { - struct noflush_work *w = container_of(ws, struct noflush_work, worker); + struct noflush_work *w = to_noflush(ws); w->tc->requeue_mode = true; requeue_io(w->tc); - complete_noflush_work(w); + pool_work_complete(&w->pw); } static void do_noflush_stop(struct work_struct *ws) { - struct noflush_work *w = container_of(ws, struct noflush_work, worker); + struct noflush_work *w = to_noflush(ws); w->tc->requeue_mode = false; - complete_noflush_work(w); + pool_work_complete(&w->pw); } static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) { struct noflush_work w; - INIT_WORK_ONSTACK(&w.worker, fn); w.tc = tc; - atomic_set(&w.complete, 0); - init_waitqueue_head(&w.wait); - - queue_work(tc->pool->wq, &w.worker); - - wait_event(w.wait, atomic_read(&w.complete)); + pool_work_wait(&w.pw, tc->pool, fn); } /*----------------------------------------------------------------*/ @@ -1715,6 +1747,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pool->process_discard = process_discard; pool->process_prepared_mapping = process_prepared_mapping; pool->process_prepared_discard = process_prepared_discard_passdown; + + if (!pool->pf.error_if_no_space) + queue_delayed_work(pool->wq, &pool->no_space_timeout, NO_SPACE_TIMEOUT); break; case PM_WRITE: @@ -2100,6 +2135,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, INIT_WORK(&pool->worker, do_worker); INIT_DELAYED_WORK(&pool->waker, do_waker); + INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout); spin_lock_init(&pool->lock); bio_list_init(&pool->deferred_flush_bios); INIT_LIST_HEAD(&pool->prepared_mappings); @@ -2662,6 +2698,7 @@ static void pool_postsuspend(struct dm_target *ti) struct pool *pool = pt->pool; cancel_delayed_work(&pool->waker); + cancel_delayed_work(&pool->no_space_timeout); flush_workqueue(pool->wq); (void) commit(pool); } @@ -3430,6 +3467,19 @@ static int thin_iterate_devices(struct dm_target *ti, return 0; } +static int thin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct thin_c *tc = ti->private; + struct request_queue *q = bdev_get_queue(tc->pool_dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = tc->pool_dev->bdev; + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + static struct target_type thin_target = { .name = "thin", .version = {1, 12, 0}, @@ -3442,6 +3492,7 @@ static struct target_type thin_target = { .postsuspend = thin_postsuspend, .status = thin_status, .iterate_devices = thin_iterate_devices, + .merge = thin_merge }; /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6a71bc7c9133..bafed5e097ca 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1110,6 +1110,46 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); +/* + * A target may call dm_accept_partial_bio only from the map routine. It is + * allowed for all bio types except REQ_FLUSH. + * + * dm_accept_partial_bio informs the dm that the target only wants to process + * additional n_sectors sectors of the bio and the rest of the data should be + * sent in a next bio. + * + * A diagram that explains the arithmetics: + * +--------------------+---------------+-------+ + * | 1 | 2 | 3 | + * +--------------------+---------------+-------+ + * + * <-------------- *tio->len_ptr ---------------> + * <------- bi_size -------> + * <-- n_sectors --> + * + * Region 1 was already iterated over with bio_advance or similar function. + * (it may be empty if the target doesn't use bio_advance) + * Region 2 is the remaining bio size that the target wants to process. + * (it may be empty if region 1 is non-empty, although there is no reason + * to make it empty) + * The target requires that region 3 is to be sent in the next bio. + * + * If the target wants to receive multiple copies of the bio (via num_*bios, etc), + * the partially processed part (the sum of regions 1+2) must be the same for all + * copies of the bio. + */ +void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) +{ + struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); + unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; + BUG_ON(bio->bi_rw & REQ_FLUSH); + BUG_ON(bi_size > *tio->len_ptr); + BUG_ON(n_sectors > bi_size); + *tio->len_ptr -= bi_size - n_sectors; + bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; +} +EXPORT_SYMBOL_GPL(dm_accept_partial_bio); + static void __map_bio(struct dm_target_io *tio) { int r; @@ -1152,10 +1192,10 @@ struct clone_info { struct bio *bio; struct dm_io *io; sector_t sector; - sector_t sector_count; + unsigned sector_count; }; -static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) +static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) { bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_size = to_bytes(len); @@ -1200,11 +1240,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, static void __clone_and_map_simple_bio(struct clone_info *ci, struct dm_target *ti, - unsigned target_bio_nr, sector_t len) + unsigned target_bio_nr, unsigned *len) { struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); struct bio *clone = &tio->clone; + tio->len_ptr = len; + /* * Discard requests require the bio's inline iovecs be initialized. * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush @@ -1212,13 +1254,13 @@ static void __clone_and_map_simple_bio(struct clone_info *ci, */ __bio_clone_fast(clone, ci->bio); if (len) - bio_setup_sector(clone, ci->sector, len); + bio_setup_sector(clone, ci->sector, *len); __map_bio(tio); } static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, - unsigned num_bios, sector_t len) + unsigned num_bios, unsigned *len) { unsigned target_bio_nr; @@ -1233,13 +1275,13 @@ static int __send_empty_flush(struct clone_info *ci) BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) - __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0); + __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); return 0; } static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, - sector_t sector, unsigned len) + sector_t sector, unsigned *len) { struct bio *bio = ci->bio; struct dm_target_io *tio; @@ -1254,7 +1296,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { tio = alloc_tio(ci, ti, 0, target_bio_nr); - clone_bio(tio, bio, sector, len); + tio->len_ptr = len; + clone_bio(tio, bio, sector, *len); __map_bio(tio); } } @@ -1283,7 +1326,7 @@ static int __send_changing_extent_only(struct clone_info *ci, is_split_required_fn is_split_required) { struct dm_target *ti; - sector_t len; + unsigned len; unsigned num_bios; do { @@ -1302,11 +1345,11 @@ static int __send_changing_extent_only(struct clone_info *ci, return -EOPNOTSUPP; if (is_split_required && !is_split_required(ti)) - len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); + len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); else - len = min(ci->sector_count, max_io_len(ci->sector, ti)); + len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); - __send_duplicate_bios(ci, ti, num_bios, len); + __send_duplicate_bios(ci, ti, num_bios, &len); ci->sector += len; } while (ci->sector_count -= len); @@ -1345,7 +1388,7 @@ static int __split_and_process_non_flush(struct clone_info *ci) len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); - __clone_and_map_data_bio(ci, ti, ci->sector, len); + __clone_and_map_data_bio(ci, ti, ci->sector, &len); ci->sector += len; ci->sector_count -= len; |