summaryrefslogtreecommitdiff
path: root/fs/io-wq.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2021-03-05 12:59:30 -0700
committerJens Axboe <axboe@kernel.dk>2021-03-06 10:57:01 -0700
commit886d0137f104a440d9dfa1d16efc1db06c9a2c02 (patch)
treec6a5fd1829769e9a4112ce6ab37dd55f9f4f55c7 /fs/io-wq.c
parenta38fd8748464831584a19438cbb3082b5a2dab15 (diff)
io-wq: fix race in freeing 'wq' and worker access
Ran into a use-after-free on the main io-wq struct, wq. It has a worker ref and completion event, but the manager itself isn't holding a reference. This can lead to a race where the manager thinks there are no workers and exits, but a worker is being added. That leads to the following trace: BUG: KASAN: use-after-free in io_wqe_worker+0x4c0/0x5e0 Read of size 8 at addr ffff888108baa8a0 by task iou-wrk-3080422/3080425 CPU: 5 PID: 3080425 Comm: iou-wrk-3080422 Not tainted 5.12.0-rc1+ #110 Hardware name: Micro-Star International Co., Ltd. MS-7C60/TRX40 PRO 10G (MS-7C60), BIOS 1.60 05/13/2020 Call Trace: dump_stack+0x90/0xbe print_address_description.constprop.0+0x67/0x28d ? io_wqe_worker+0x4c0/0x5e0 kasan_report.cold+0x7b/0xd4 ? io_wqe_worker+0x4c0/0x5e0 __asan_load8+0x6d/0xa0 io_wqe_worker+0x4c0/0x5e0 ? io_worker_handle_work+0xc00/0xc00 ? recalc_sigpending+0xe5/0x120 ? io_worker_handle_work+0xc00/0xc00 ? io_worker_handle_work+0xc00/0xc00 ret_from_fork+0x1f/0x30 Allocated by task 3080422: kasan_save_stack+0x23/0x60 __kasan_kmalloc+0x80/0xa0 kmem_cache_alloc_node_trace+0xa0/0x480 io_wq_create+0x3b5/0x600 io_uring_alloc_task_context+0x13c/0x380 io_uring_add_task_file+0x109/0x140 __x64_sys_io_uring_enter+0x45f/0x660 do_syscall_64+0x32/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 3080422: kasan_save_stack+0x23/0x60 kasan_set_track+0x20/0x40 kasan_set_free_info+0x24/0x40 __kasan_slab_free+0xe8/0x120 kfree+0xa8/0x400 io_wq_put+0x14a/0x220 io_wq_put_and_exit+0x9a/0xc0 io_uring_clean_tctx+0x101/0x140 __io_uring_files_cancel+0x36e/0x3c0 do_exit+0x169/0x1340 __x64_sys_exit+0x34/0x40 do_syscall_64+0x32/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Have the manager itself hold a reference, and now both drop points drop and complete if we hit zero, and the manager can unconditionally do a wait_for_completion() instead of having a race between reading the ref count and waiting if it was non-zero. Fixes: fb3a1f6c745c ("io-wq: have manager wait for all workers to exit") Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/io-wq.c')
-rw-r--r--fs/io-wq.c16
1 files changed, 8 insertions, 8 deletions
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 28868eb4cd09..1bfdb86336e4 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -722,9 +722,9 @@ static int io_wq_manager(void *data)
io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
rcu_read_unlock();
- /* we might not ever have created any workers */
- if (atomic_read(&wq->worker_refs))
- wait_for_completion(&wq->worker_done);
+ if (atomic_dec_and_test(&wq->worker_refs))
+ complete(&wq->worker_done);
+ wait_for_completion(&wq->worker_done);
spin_lock_irq(&wq->hash->wait.lock);
for_each_node(node)
@@ -774,7 +774,8 @@ static int io_wq_fork_manager(struct io_wq *wq)
if (wq->manager)
return 0;
- reinit_completion(&wq->worker_done);
+ init_completion(&wq->worker_done);
+ atomic_set(&wq->worker_refs, 1);
tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE);
if (!IS_ERR(tsk)) {
wq->manager = get_task_struct(tsk);
@@ -782,6 +783,9 @@ static int io_wq_fork_manager(struct io_wq *wq)
return 0;
}
+ if (atomic_dec_and_test(&wq->worker_refs))
+ complete(&wq->worker_done);
+
return PTR_ERR(tsk);
}
@@ -1018,13 +1022,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
init_completion(&wq->exited);
refcount_set(&wq->refs, 1);
- init_completion(&wq->worker_done);
- atomic_set(&wq->worker_refs, 0);
-
ret = io_wq_fork_manager(wq);
if (!ret)
return wq;
-
err:
io_wq_put_hash(data->hash);
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);