summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-08-18 06:21:28 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2014-08-18 06:21:28 +1000
commit55273aa7539c97543cfd4310c876c4a207ad4c64 (patch)
treeb3d2ddc16b8265972ae6ddaa66e07eea8248bb47 /drivers
parent473f9639819684765e78d298f192b8030fe1290c (diff)
parent465a3d61c229058640ae479c51ef62678308203d (diff)
Merge remote-tracking branch 'device-mapper/for-next'
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/dm-crypt.c368
1 files changed, 186 insertions, 182 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 2785007e0e46..ba2fad80a302 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -18,9 +18,11 @@
#include <linux/slab.h>
#include <linux/crypto.h>
#include <linux/workqueue.h>
+#include <linux/kthread.h>
#include <linux/backing-dev.h>
#include <linux/atomic.h>
#include <linux/scatterlist.h>
+#include <linux/rbtree.h>
#include <asm/page.h>
#include <asm/unaligned.h>
#include <crypto/hash.h>
@@ -55,10 +57,10 @@ struct dm_crypt_io {
struct convert_context ctx;
- atomic_t io_pending;
int error;
sector_t sector;
- struct dm_crypt_io *base_io;
+
+ struct rb_node rb_node;
} CRYPTO_MINALIGN_ATTR;
struct dm_crypt_request {
@@ -121,14 +123,18 @@ struct crypt_config {
* pool for per bio private data, crypto requests and
* encryption requeusts/buffer pages
*/
- mempool_t *io_pool;
mempool_t *req_pool;
mempool_t *page_pool;
struct bio_set *bs;
+ struct mutex bio_alloc_lock;
struct workqueue_struct *io_queue;
struct workqueue_struct *crypt_queue;
+ struct task_struct *write_thread;
+ wait_queue_head_t write_thread_wait;
+ struct rb_root write_tree;
+
char *cipher;
char *cipher_string;
@@ -172,9 +178,6 @@ struct crypt_config {
};
#define MIN_IOS 16
-#define MIN_POOL_PAGES 32
-
-static struct kmem_cache *_crypt_io_pool;
static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -952,57 +955,71 @@ static int crypt_convert(struct crypt_config *cc,
return 0;
}
+static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
+
/*
* Generate a new unfragmented bio with the given size
* This should never violate the device limitations
- * May return a smaller bio when running out of pages, indicated by
- * *out_of_pages set to 1.
+ *
+ * This function may be called concurrently. If we allocate from the mempool
+ * concurrently, there is a possibility of deadlock. For example, if we have
+ * mempool of 256 pages, two processes, each wanting 256, pages allocate from
+ * the mempool concurrently, it may deadlock in a situation where both processes
+ * have allocated 128 pages and the mempool is exhausted.
+ *
+ * In order to avoid this scenario we allocate the pages under a mutex.
+ *
+ * In order to not degrade performance with excessive locking, we try
+ * non-blocking allocation without a mutex first and if it fails, we fallback
+ * to a blocking allocation with a mutex.
*/
-static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
- unsigned *out_of_pages)
+static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
{
struct crypt_config *cc = io->cc;
struct bio *clone;
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
- unsigned i, len;
+ gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
+ unsigned i, len, remaining_size;
struct page *page;
+ struct bio_vec *bvec;
+
+retry:
+ if (unlikely(gfp_mask & __GFP_WAIT))
+ mutex_lock(&cc->bio_alloc_lock);
clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
if (!clone)
- return NULL;
+ goto return_clone;
clone_init(io, clone);
- *out_of_pages = 0;
+
+ remaining_size = size;
for (i = 0; i < nr_iovecs; i++) {
page = mempool_alloc(cc->page_pool, gfp_mask);
if (!page) {
- *out_of_pages = 1;
- break;
+ BUG_ON(gfp_mask & __GFP_WAIT);
+ crypt_free_buffer_pages(cc, clone);
+ bio_put(clone);
+ gfp_mask |= __GFP_WAIT;
+ goto retry;
}
- /*
- * If additional pages cannot be allocated without waiting,
- * return a partially-allocated bio. The caller will then try
- * to allocate more bios while submitting this partial bio.
- */
- gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
+ len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
- len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
+ bvec = &clone->bi_io_vec[clone->bi_vcnt++];
+ bvec->bv_page = page;
+ bvec->bv_len = len;
+ bvec->bv_offset = 0;
- if (!bio_add_page(clone, page, len, 0)) {
- mempool_free(page, cc->page_pool);
- break;
- }
+ clone->bi_iter.bi_size += len;
- size -= len;
+ remaining_size -= len;
}
- if (!clone->bi_iter.bi_size) {
- bio_put(clone);
- return NULL;
- }
+return_clone:
+ if (unlikely(gfp_mask & __GFP_WAIT))
+ mutex_unlock(&cc->bio_alloc_lock);
return clone;
}
@@ -1026,43 +1043,23 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
io->base_bio = bio;
io->sector = sector;
io->error = 0;
- io->base_io = NULL;
io->ctx.req = NULL;
- atomic_set(&io->io_pending, 0);
-}
-
-static void crypt_inc_pending(struct dm_crypt_io *io)
-{
- atomic_inc(&io->io_pending);
}
/*
* One of the bios was finished. Check for completion of
* the whole request and correctly clean up the buffer.
- * If base_io is set, wait for the last fragment to complete.
*/
-static void crypt_dec_pending(struct dm_crypt_io *io)
+static void crypt_end_io(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
struct bio *base_bio = io->base_bio;
- struct dm_crypt_io *base_io = io->base_io;
int error = io->error;
- if (!atomic_dec_and_test(&io->io_pending))
- return;
-
if (io->ctx.req)
crypt_free_req(cc, io->ctx.req, base_bio);
- if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
- mempool_free(io, cc->io_pool);
-
- if (likely(!base_io))
- bio_endio(base_bio, error);
- else {
- if (error && !base_io->error)
- base_io->error = error;
- crypt_dec_pending(base_io);
- }
+
+ bio_endio(base_bio, error);
}
/*
@@ -1107,7 +1104,7 @@ static void crypt_endio(struct bio *clone, int error)
if (unlikely(error))
io->error = error;
- crypt_dec_pending(io);
+ crypt_end_io(io);
}
static void clone_init(struct dm_crypt_io *io, struct bio *clone)
@@ -1135,8 +1132,6 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
if (!clone)
return 1;
- crypt_inc_pending(io);
-
clone_init(io, clone);
clone->bi_iter.bi_sector = cc->start + io->sector;
@@ -1144,42 +1139,100 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
return 0;
}
+static void kcryptd_io_read_work(struct work_struct *work)
+{
+ struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+
+ if (kcryptd_io_read(io, GFP_NOIO))
+ io->error = -ENOMEM;
+}
+
+static void kcryptd_queue_read(struct dm_crypt_io *io)
+{
+ struct crypt_config *cc = io->cc;
+
+ INIT_WORK(&io->work, kcryptd_io_read_work);
+ queue_work(cc->io_queue, &io->work);
+}
+
static void kcryptd_io_write(struct dm_crypt_io *io)
{
struct bio *clone = io->ctx.bio_out;
+
generic_make_request(clone);
}
-static void kcryptd_io(struct work_struct *work)
+#define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node)
+
+static int dmcrypt_write(void *data)
{
- struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+ struct crypt_config *cc = data;
+ struct dm_crypt_io *io;
- if (bio_data_dir(io->base_bio) == READ) {
- crypt_inc_pending(io);
- if (kcryptd_io_read(io, GFP_NOIO))
- io->error = -ENOMEM;
- crypt_dec_pending(io);
- } else
- kcryptd_io_write(io);
-}
+ while (1) {
+ struct rb_root write_tree;
+ struct blk_plug plug;
-static void kcryptd_queue_io(struct dm_crypt_io *io)
-{
- struct crypt_config *cc = io->cc;
+ DECLARE_WAITQUEUE(wait, current);
- INIT_WORK(&io->work, kcryptd_io);
- queue_work(cc->io_queue, &io->work);
+ spin_lock_irq(&cc->write_thread_wait.lock);
+continue_locked:
+
+ if (!RB_EMPTY_ROOT(&cc->write_tree))
+ goto pop_from_list;
+
+ __set_current_state(TASK_INTERRUPTIBLE);
+ __add_wait_queue(&cc->write_thread_wait, &wait);
+
+ spin_unlock_irq(&cc->write_thread_wait.lock);
+
+ if (unlikely(kthread_should_stop())) {
+ set_task_state(current, TASK_RUNNING);
+ remove_wait_queue(&cc->write_thread_wait, &wait);
+ break;
+ }
+
+ schedule();
+
+ set_task_state(current, TASK_RUNNING);
+ spin_lock_irq(&cc->write_thread_wait.lock);
+ __remove_wait_queue(&cc->write_thread_wait, &wait);
+ goto continue_locked;
+
+pop_from_list:
+ write_tree = cc->write_tree;
+ cc->write_tree = RB_ROOT;
+ spin_unlock_irq(&cc->write_thread_wait.lock);
+
+ BUG_ON(rb_parent(write_tree.rb_node));
+
+ /*
+ * Note: we cannot walk the tree here with rb_next because
+ * the structures may be freed when kcryptd_io_write is called.
+ */
+ blk_start_plug(&plug);
+ do {
+ io = crypt_io_from_node(rb_first(&write_tree));
+ rb_erase(&io->rb_node, &write_tree);
+ kcryptd_io_write(io);
+ } while (!RB_EMPTY_ROOT(&write_tree));
+ blk_finish_plug(&plug);
+ }
+ return 0;
}
-static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
+static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io)
{
struct bio *clone = io->ctx.bio_out;
struct crypt_config *cc = io->cc;
+ unsigned long flags;
+ sector_t sector;
+ struct rb_node **rbp, *parent;
if (unlikely(io->error < 0)) {
crypt_free_buffer_pages(cc, clone);
bio_put(clone);
- crypt_dec_pending(io);
+ crypt_end_io(io);
return;
}
@@ -1188,109 +1241,62 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
clone->bi_iter.bi_sector = cc->start + io->sector;
- if (async)
- kcryptd_queue_io(io);
- else
- generic_make_request(clone);
+ spin_lock_irqsave(&cc->write_thread_wait.lock, flags);
+ rbp = &cc->write_tree.rb_node;
+ parent = NULL;
+ sector = io->sector;
+ while (*rbp) {
+ parent = *rbp;
+
+ if (sector < crypt_io_from_node(parent)->sector)
+ rbp = &(*rbp)->rb_left;
+ else
+ rbp = &(*rbp)->rb_right;
+ }
+ rb_link_node(&io->rb_node, parent, rbp);
+ rb_insert_color(&io->rb_node, &cc->write_tree);
+
+ wake_up_locked(&cc->write_thread_wait);
+ spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags);
}
static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
struct bio *clone;
- struct dm_crypt_io *new_io;
int crypt_finished;
- unsigned out_of_pages = 0;
- unsigned remaining = io->base_bio->bi_iter.bi_size;
sector_t sector = io->sector;
int r;
- /*
- * Prevent io from disappearing until this function completes.
- */
- crypt_inc_pending(io);
crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
- /*
- * The allocated buffers can be smaller than the whole bio,
- * so repeat the whole process until all the data can be handled.
- */
- while (remaining) {
- clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
- if (unlikely(!clone)) {
- io->error = -ENOMEM;
- break;
- }
-
- io->ctx.bio_out = clone;
- io->ctx.iter_out = clone->bi_iter;
-
- remaining -= clone->bi_iter.bi_size;
- sector += bio_sectors(clone);
-
- crypt_inc_pending(io);
-
- r = crypt_convert(cc, &io->ctx);
- if (r < 0)
- io->error = -EIO;
-
- crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
-
- /* Encryption was already finished, submit io now */
- if (crypt_finished) {
- kcryptd_crypt_write_io_submit(io, 0);
-
- /*
- * If there was an error, do not try next fragments.
- * For async, error is processed in async handler.
- */
- if (unlikely(r < 0))
- break;
+ clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
+ if (unlikely(!clone)) {
+ io->error = -EIO;
+ crypt_end_io(io);
+ return;
+ }
- io->sector = sector;
- }
+ io->ctx.bio_out = clone;
+ io->ctx.iter_out = clone->bi_iter;
- /*
- * Out of memory -> run queues
- * But don't wait if split was due to the io size restriction
- */
- if (unlikely(out_of_pages))
- congestion_wait(BLK_RW_ASYNC, HZ/100);
+ sector += bio_sectors(clone);
- /*
- * With async crypto it is unsafe to share the crypto context
- * between fragments, so switch to a new dm_crypt_io structure.
- */
- if (unlikely(!crypt_finished && remaining)) {
- new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
- crypt_io_init(new_io, io->cc, io->base_bio, sector);
- crypt_inc_pending(new_io);
- crypt_convert_init(cc, &new_io->ctx, NULL,
- io->base_bio, sector);
- new_io->ctx.iter_in = io->ctx.iter_in;
-
- /*
- * Fragments after the first use the base_io
- * pending count.
- */
- if (!io->base_io)
- new_io->base_io = io;
- else {
- new_io->base_io = io->base_io;
- crypt_inc_pending(io->base_io);
- crypt_dec_pending(io);
- }
+ r = crypt_convert(cc, &io->ctx);
+ if (r)
+ io->error = -EIO;
+ crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
- io = new_io;
- }
+ /* Encryption was already finished, submit io now */
+ if (crypt_finished) {
+ kcryptd_crypt_write_io_submit(io);
+ io->sector = sector;
}
-
- crypt_dec_pending(io);
}
static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
{
- crypt_dec_pending(io);
+ crypt_end_io(io);
}
static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
@@ -1298,8 +1304,6 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
struct crypt_config *cc = io->cc;
int r = 0;
- crypt_inc_pending(io);
-
crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
io->sector);
@@ -1309,8 +1313,6 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
if (atomic_dec_and_test(&io->ctx.cc_pending))
kcryptd_crypt_read_done(io);
-
- crypt_dec_pending(io);
}
static void kcryptd_async_done(struct crypto_async_request *async_req,
@@ -1340,7 +1342,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
if (bio_data_dir(io->base_bio) == READ)
kcryptd_crypt_read_done(io);
else
- kcryptd_crypt_write_io_submit(io, 1);
+ kcryptd_crypt_write_io_submit(io);
}
static void kcryptd_crypt(struct work_struct *work)
@@ -1487,6 +1489,9 @@ static void crypt_dtr(struct dm_target *ti)
if (!cc)
return;
+ if (cc->write_thread)
+ kthread_stop(cc->write_thread);
+
if (cc->io_queue)
destroy_workqueue(cc->io_queue);
if (cc->crypt_queue)
@@ -1501,8 +1506,6 @@ static void crypt_dtr(struct dm_target *ti)
mempool_destroy(cc->page_pool);
if (cc->req_pool)
mempool_destroy(cc->req_pool);
- if (cc->io_pool)
- mempool_destroy(cc->io_pool);
if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
cc->iv_gen_ops->dtr(cc);
@@ -1715,19 +1718,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (ret < 0)
goto bad;
- ret = -ENOMEM;
- cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
- if (!cc->io_pool) {
- ti->error = "Cannot allocate crypt io mempool";
- goto bad;
- }
-
cc->dmreq_start = sizeof(struct ablkcipher_request);
cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment());
cc->dmreq_start += crypto_ablkcipher_alignmask(any_tfm(cc)) &
~(crypto_tfm_ctx_alignment() - 1);
+ ret = -ENOMEM;
cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
sizeof(struct dm_crypt_request) + cc->iv_size);
if (!cc->req_pool) {
@@ -1739,7 +1736,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
sizeof(struct dm_crypt_io) + cc->dmreq_start +
sizeof(struct dm_crypt_request) + cc->iv_size;
- cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
+ cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0);
if (!cc->page_pool) {
ti->error = "Cannot allocate page mempool";
goto bad;
@@ -1751,6 +1748,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
+ mutex_init(&cc->bio_alloc_lock);
+
ret = -EINVAL;
if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
ti->error = "Invalid iv_offset sector";
@@ -1801,12 +1800,24 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
cc->crypt_queue = alloc_workqueue("kcryptd",
- WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
goto bad;
}
+ init_waitqueue_head(&cc->write_thread_wait);
+ cc->write_tree = RB_ROOT;
+
+ cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
+ if (IS_ERR(cc->write_thread)) {
+ ret = PTR_ERR(cc->write_thread);
+ cc->write_thread = NULL;
+ ti->error = "Couldn't spawn write thread";
+ goto bad;
+ }
+ wake_up_process(cc->write_thread);
+
ti->num_flush_bios = 1;
ti->discard_zeroes_data_unsupported = true;
@@ -1841,7 +1852,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
if (bio_data_dir(io->base_bio) == READ) {
if (kcryptd_io_read(io, GFP_NOWAIT))
- kcryptd_queue_io(io);
+ kcryptd_queue_read(io);
} else
kcryptd_queue_crypt(io);
@@ -1987,15 +1998,9 @@ static int __init dm_crypt_init(void)
{
int r;
- _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
- if (!_crypt_io_pool)
- return -ENOMEM;
-
r = dm_register_target(&crypt_target);
- if (r < 0) {
+ if (r < 0)
DMERR("register failed %d", r);
- kmem_cache_destroy(_crypt_io_pool);
- }
return r;
}
@@ -2003,7 +2008,6 @@ static int __init dm_crypt_init(void)
static void __exit dm_crypt_exit(void)
{
dm_unregister_target(&crypt_target);
- kmem_cache_destroy(_crypt_io_pool);
}
module_init(dm_crypt_init);