summaryrefslogtreecommitdiff
path: root/fs/io_uring.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-06 09:26:07 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-06 09:26:07 -0700
commit60f8fbaa954452104a1914e21c5cc109f7bf276a (patch)
tree8212a8c7d3efc455028f95a4520424db85d9e50f /fs/io_uring.c
parent20fbb11fe4ea99e02d77824613f1438bea456683 (diff)
parent2fc2a7a62eb58650e71b4550cf6fa6cc0a75b2d2 (diff)
Merge tag 'for-5.15/io_uring-2021-09-04' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: "As sometimes happens, two reports came in around the merge window open that led to some fixes. Hence this one is a bit bigger than usual followup fixes, but most of it will be going towards stable, outside of the fixes that are addressing regressions from this merge window. In detail: - postgres is a heavy user of signals between tasks, and if we're unlucky this can interfere with io-wq worker creation. Make sure we're resilient against unrelated signal handling. This set of changes also includes hardening against allocation failures, which could previously had led to stalls. - Some use cases that end up having a mix of bounded and unbounded work would have starvation issues related to that. Split the pending work lists to handle that better. - Completion trace int -> unsigned -> long fix - Fix issue with REGISTER_IOWQ_MAX_WORKERS and SQPOLL - Fix regression with hash wait lock in this merge window - Fix retry issued on block devices (Ming) - Fix regression with links in this merge window (Pavel) - Fix race with multi-shot poll and completions (Xiaoguang) - Ensure regular file IO doesn't inadvertently skip completion batching (Pavel) - Ensure submissions are flushed after running task_work (Pavel)" * tag 'for-5.15/io_uring-2021-09-04' of git://git.kernel.dk/linux-block: io_uring: io_uring_complete() trace should take an integer io_uring: fix possible poll event lost in multi shot mode io_uring: prolong tctx_task_work() with flushing io_uring: don't disable kiocb_done() CQE batching io_uring: ensure IORING_REGISTER_IOWQ_MAX_WORKERS works with SQPOLL io-wq: make worker creation resilient against signals io-wq: get rid of FIXED worker flag io-wq: only exit on fatal signals io-wq: split bounded and unbounded work into separate lists io-wq: fix queue stalling race io_uring: don't submit half-prepared drain request io_uring: fix queueing half-created requests io-wq: ensure that hash wait lock is IRQ disabling io_uring: retry in case of short read on block device io_uring: IORING_OP_WRITE needs hash_reg_file set io-wq: fix race between adding work and activating a free worker
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c76
1 files changed, 66 insertions, 10 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f35b1285865..d816c09c88a5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1021,6 +1021,7 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_WRITE] = {
.needs_file = 1,
+ .hash_reg_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
.plug = 1,
@@ -1851,6 +1852,17 @@ static void io_req_complete_failed(struct io_kiocb *req, long res)
io_req_complete_post(req, res, 0);
}
+static void io_req_complete_fail_submit(struct io_kiocb *req)
+{
+ /*
+ * We don't submit, fail them all, for that replace hardlinks with
+ * normal links. Extra REQ_F_LINK is tolerated.
+ */
+ req->flags &= ~REQ_F_HARDLINK;
+ req->flags |= REQ_F_LINK;
+ io_req_complete_failed(req, req->result);
+}
+
/*
* Don't initialise the fields below on every allocation, but do that in
* advance and keep them valid across allocations.
@@ -2119,6 +2131,9 @@ static void tctx_task_work(struct callback_head *cb)
while (1) {
struct io_wq_work_node *node;
+ if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
+ io_submit_flush_completions(ctx);
+
spin_lock_irq(&tctx->task_lock);
node = tctx->task_list.first;
INIT_WQ_LIST(&tctx->task_list);
@@ -2673,7 +2688,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
{
if (__io_complete_rw_common(req, res))
return;
- __io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
+ __io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
@@ -3410,6 +3425,12 @@ static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
return -EINVAL;
}
+static bool need_read_all(struct io_kiocb *req)
+{
+ return req->flags & REQ_F_ISREG ||
+ S_ISBLK(file_inode(req->file)->i_mode);
+}
+
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
@@ -3464,7 +3485,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
} else if (ret == -EIOCBQUEUED) {
goto out_free;
} else if (ret <= 0 || ret == io_size || !force_nonblock ||
- (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
+ (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
}
@@ -5249,7 +5270,7 @@ static void io_poll_remove_double(struct io_kiocb *req)
}
}
-static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
__must_hold(&req->ctx->completion_lock)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -5271,10 +5292,19 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
if (flags & IORING_CQE_F_MORE)
ctx->cq_extra++;
- io_commit_cqring(ctx);
return !(flags & IORING_CQE_F_MORE);
}
+static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+ __must_hold(&req->ctx->completion_lock)
+{
+ bool done;
+
+ done = __io_poll_complete(req, mask);
+ io_commit_cqring(req->ctx);
+ return done;
+}
+
static void io_poll_task_func(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -5285,7 +5315,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
} else {
bool done;
- done = io_poll_complete(req, req->result);
+ done = __io_poll_complete(req, req->result);
if (done) {
io_poll_remove_double(req);
hash_del(&req->hash_node);
@@ -5293,6 +5323,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
req->result = 0;
add_wait_queue(req->poll.head, &req->poll.wait);
}
+ io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
@@ -6398,6 +6429,11 @@ static bool io_drain_req(struct io_kiocb *req)
int ret;
u32 seq;
+ if (req->flags & REQ_F_FAIL) {
+ io_req_complete_fail_submit(req);
+ return true;
+ }
+
/*
* If we need to drain a request in the middle of a link, drain the
* head request and the next request/link after the current link.
@@ -6914,7 +6950,7 @@ static inline void io_queue_sqe(struct io_kiocb *req)
if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
__io_queue_sqe(req);
} else if (req->flags & REQ_F_FAIL) {
- io_req_complete_failed(req, req->result);
+ io_req_complete_fail_submit(req);
} else {
int ret = io_req_prep_async(req);
@@ -10498,26 +10534,46 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
void __user *arg)
{
- struct io_uring_task *tctx = current->io_uring;
+ struct io_uring_task *tctx = NULL;
+ struct io_sq_data *sqd = NULL;
__u32 new_count[2];
int i, ret;
- if (!tctx || !tctx->io_wq)
- return -EINVAL;
if (copy_from_user(new_count, arg, sizeof(new_count)))
return -EFAULT;
for (i = 0; i < ARRAY_SIZE(new_count); i++)
if (new_count[i] > INT_MAX)
return -EINVAL;
+ if (ctx->flags & IORING_SETUP_SQPOLL) {
+ sqd = ctx->sq_data;
+ if (sqd) {
+ mutex_lock(&sqd->lock);
+ tctx = sqd->thread->io_uring;
+ }
+ } else {
+ tctx = current->io_uring;
+ }
+
+ ret = -EINVAL;
+ if (!tctx || !tctx->io_wq)
+ goto err;
+
ret = io_wq_max_workers(tctx->io_wq, new_count);
if (ret)
- return ret;
+ goto err;
+
+ if (sqd)
+ mutex_unlock(&sqd->lock);
if (copy_to_user(arg, new_count, sizeof(new_count)))
return -EFAULT;
return 0;
+err:
+ if (sqd)
+ mutex_unlock(&sqd->lock);
+ return ret;
}
static bool io_register_op_must_quiesce(int op)