From 880641bb9da2473e9ecf6c708d993b29928c1b3c Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 5 Mar 2012 14:59:12 -0800 Subject: aio: wake up waiters when freeing unused kiocbs Bart Van Assche reported a hung fio process when either hot-removing storage or when interrupting the fio process itself. The (pruned) call trace for the latter looks like so: fio D 0000000000000001 0 6849 6848 0x00000004 ffff880092541b88 0000000000000046 ffff880000000000 ffff88012fa11dc0 ffff88012404be70 ffff880092541fd8 ffff880092541fd8 ffff880092541fd8 ffff880128b894d0 ffff88012404be70 ffff880092541b88 000000018106f24d Call Trace: schedule+0x3f/0x60 io_schedule+0x8f/0xd0 wait_for_all_aios+0xc0/0x100 exit_aio+0x55/0xc0 mmput+0x2d/0x110 exit_mm+0x10d/0x130 do_exit+0x671/0x860 do_group_exit+0x44/0xb0 get_signal_to_deliver+0x218/0x5a0 do_signal+0x65/0x700 do_notify_resume+0x65/0x80 int_signal+0x12/0x17 The problem lies with the allocation batching code. It will opportunistically allocate kiocbs, and then trim back the list of iocbs when there is not enough room in the completion ring to hold all of the events. In the case above, what happens is that the pruning back of events ends up freeing up the last active request and the context is marked as dead, so it is thus responsible for waking up waiters. Unfortunately, the code does not check for this condition, so we end up with a hung task. Signed-off-by: Jeff Moyer Reported-by: Bart Van Assche Tested-by: Bart Van Assche Cc: [3.2.x only] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/aio.c') diff --git a/fs/aio.c b/fs/aio.c index 969beb0e2231..67e4b9047cc9 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -490,6 +490,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) kmem_cache_free(kiocb_cachep, req); ctx->reqs_active--; } + if (unlikely(!ctx->reqs_active && ctx->dead)) + wake_up_all(&ctx->wait); spin_unlock_irq(&ctx->ctx_lock); } -- cgit v1.2.3 From 86b62a2cb4fc09037bbce2959d2992962396fd7f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 7 Mar 2012 05:16:35 +0000 Subject: aio: fix io_setup/io_destroy race Have ioctx_alloc() return an extra reference, so that caller would drop it on success and not bother with re-grabbing it on failure exit. The current code is obviously broken - io_destroy() from another thread that managed to guess the address io_setup() would've returned would free ioctx right under us; gets especially interesting if aio_context_t * we pass to io_setup() points to PROT_READ mapping, so put_user() fails and we end up doing io_destroy() on kioctx another thread has just got freed... Signed-off-by: Al Viro Acked-by: Benjamin LaHaise Reviewed-by: Jeff Moyer Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/aio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/aio.c') diff --git a/fs/aio.c b/fs/aio.c index 67e4b9047cc9..f6578cb22d00 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -273,7 +273,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) mm = ctx->mm = current->mm; atomic_inc(&mm->mm_count); - atomic_set(&ctx->users, 1); + atomic_set(&ctx->users, 2); spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->ring_info.ring_lock); init_waitqueue_head(&ctx->wait); @@ -1338,10 +1338,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) ret = PTR_ERR(ioctx); if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); - if (!ret) + if (!ret) { + put_ioctx(ioctx); return 0; - - get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */ + } io_destroy(ioctx); } -- cgit v1.2.3 From c7b285550544c22bc005ec20978472c9ac7138c6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 Mar 2012 17:51:19 +0000 Subject: aio: fix the "too late munmap()" race Current code has put_ioctx() called asynchronously from aio_fput_routine(); that's done *after* we have killed the request that used to pin ioctx, so there's nothing to stop io_destroy() waiting in wait_for_all_aios() from progressing. As the result, we can end up with async call of put_ioctx() being the last one and possibly happening during exit_mmap() or elf_core_dump(), neither of which expects stray munmap() being done to them... We do need to prevent _freeing_ ioctx until aio_fput_routine() is done with that, but that's all we care about - neither io_destroy() nor exit_aio() will progress past wait_for_all_aios() until aio_fput_routine() does really_put_req(), so the ioctx teardown won't be done until then and we don't care about the contents of ioctx past that point. Since actual freeing of these suckers is RCU-delayed, we don't need to bump ioctx refcount when request goes into list for async removal. All we need is rcu_read_lock held just over the ->ctx_lock-protected area in aio_fput_routine(). Signed-off-by: Al Viro Reviewed-by: Jeff Moyer Acked-by: Benjamin LaHaise Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/aio.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'fs/aio.c') diff --git a/fs/aio.c b/fs/aio.c index f6578cb22d00..b9d64d89a043 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -228,12 +228,6 @@ static void __put_ioctx(struct kioctx *ctx) call_rcu(&ctx->rcu_head, ctx_rcu_free); } -static inline void get_ioctx(struct kioctx *kioctx) -{ - BUG_ON(atomic_read(&kioctx->users) <= 0); - atomic_inc(&kioctx->users); -} - static inline int try_get_ioctx(struct kioctx *kioctx) { return atomic_inc_not_zero(&kioctx->users); @@ -609,11 +603,16 @@ static void aio_fput_routine(struct work_struct *data) fput(req->ki_filp); /* Link the iocb into the context's free list */ + rcu_read_lock(); spin_lock_irq(&ctx->ctx_lock); really_put_req(ctx, req); + /* + * at that point ctx might've been killed, but actual + * freeing is RCU'd + */ spin_unlock_irq(&ctx->ctx_lock); + rcu_read_unlock(); - put_ioctx(ctx); spin_lock_irq(&fput_lock); } spin_unlock_irq(&fput_lock); @@ -644,7 +643,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) * this function will be executed w/out any aio kthread wakeup. */ if (unlikely(!fput_atomic(req->ki_filp))) { - get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); spin_unlock(&fput_lock); -- cgit v1.2.3