Merge tag 'for-5.15/io_uring-2021-09-04' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe: "As sometimes happens, two reports came in around the merge window open that led to some fixes. Hence this one is a bit bigger than usual followup fixes, but most of it will be going towards stable, outside of the fixes that are addressing regressions from this merge window. In detail: - postgres is a heavy user of signals between tasks, and if we're unlucky this can interfere with io-wq worker creation. Make sure we're resilient against unrelated signal handling. This set of changes also includes hardening against allocation failures, which could previously had led to stalls. - Some use cases that end up having a mix of bounded and unbounded work would have starvation issues related to that. Split the pending work lists to handle that better. - Completion trace int -> unsigned -> long fix - Fix issue with REGISTER_IOWQ_MAX_WORKERS and SQPOLL - Fix regression with hash wait lock in this merge window - Fix retry issued on block devices (Ming) - Fix regression with links in this merge window (Pavel) - Fix race with multi-shot poll and completions (Xiaoguang) - Ensure regular file IO doesn't inadvertently skip completion batching (Pavel) - Ensure submissions are flushed after running task_work (Pavel)" * tag 'for-5.15/io_uring-2021-09-04' of git://git.kernel.dk/linux-block: io_uring: io_uring_complete() trace should take an integer io_uring: fix possible poll event lost in multi shot mode io_uring: prolong tctx_task_work() with flushing io_uring: don't disable kiocb_done() CQE batching io_uring: ensure IORING_REGISTER_IOWQ_MAX_WORKERS works with SQPOLL io-wq: make worker creation resilient against signals io-wq: get rid of FIXED worker flag io-wq: only exit on fatal signals io-wq: split bounded and unbounded work into separate lists io-wq: fix queue stalling race io_uring: don't submit half-prepared drain request io_uring: fix queueing half-created requests io-wq: ensure that hash wait lock is IRQ disabling io_uring: retry in case of short read on block device io_uring: IORING_OP_WRITE needs hash_reg_file set io-wq: fix race between adding work and activating a free worker
author: Linus Torvalds <torvalds@linux-foundation.org> 2021-09-06 09:26:07 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2021-09-06 09:26:07 -0700
commit: 60f8fbaa954452104a1914e21c5cc109f7bf276a (patch)
tree: 8212a8c7d3efc455028f95a4520424db85d9e50f /fs/io_uring.c
parent: 20fbb11fe4ea99e02d77824613f1438bea456683 (diff)
parent: 2fc2a7a62eb58650e71b4550cf6fa6cc0a75b2d2 (diff)
1 files changed, 66 insertions, 10 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f35b1285865..d816c09c88a5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1021,6 +1021,7 @@ static const struct io_op_def io_op_defs[] = {
 	},
 	[IORING_OP_WRITE] = {
 		.needs_file		= 1,
+		.hash_reg_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollout		= 1,
 		.plug			= 1,
@@ -1851,6 +1852,17 @@ static void io_req_complete_failed(struct io_kiocb *req, long res)
 	io_req_complete_post(req, res, 0);
 }
 
+static void io_req_complete_fail_submit(struct io_kiocb *req)
+{
+	/*
+	 * We don't submit, fail them all, for that replace hardlinks with
+	 * normal links. Extra REQ_F_LINK is tolerated.
+	 */
+	req->flags &= ~REQ_F_HARDLINK;
+	req->flags |= REQ_F_LINK;
+	io_req_complete_failed(req, req->result);
+}
+
 /*
  * Don't initialise the fields below on every allocation, but do that in
  * advance and keep them valid across allocations.
@@ -2119,6 +2131,9 @@ static void tctx_task_work(struct callback_head *cb)
 	while (1) {
 		struct io_wq_work_node *node;
 
+		if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
+			io_submit_flush_completions(ctx);
+
 		spin_lock_irq(&tctx->task_lock);
 		node = tctx->task_list.first;
 		INIT_WQ_LIST(&tctx->task_list);
@@ -2673,7 +2688,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
 {
 	if (__io_complete_rw_common(req, res))
 		return;
-	__io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
+	__io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
 }
 
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
@@ -3410,6 +3425,12 @@ static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
 		return -EINVAL;
 }
 
+static bool need_read_all(struct io_kiocb *req)
+{
+	return req->flags & REQ_F_ISREG ||
+		S_ISBLK(file_inode(req->file)->i_mode);
+}
+
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
@@ -3464,7 +3485,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	} else if (ret == -EIOCBQUEUED) {
 		goto out_free;
 	} else if (ret <= 0 || ret == io_size || !force_nonblock ||
-		   (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
+		   (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
 		/* read all, failed, already did sync or don't want to retry */
 		goto done;
 	}
@@ -5249,7 +5270,7 @@ static void io_poll_remove_double(struct io_kiocb *req)
 	}
 }
 
-static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
 	__must_hold(&req->ctx->completion_lock)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -5271,10 +5292,19 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
 	if (flags & IORING_CQE_F_MORE)
 		ctx->cq_extra++;
 
-	io_commit_cqring(ctx);
 	return !(flags & IORING_CQE_F_MORE);
 }
 
+static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+	__must_hold(&req->ctx->completion_lock)
+{
+	bool done;
+
+	done = __io_poll_complete(req, mask);
+	io_commit_cqring(req->ctx);
+	return done;
+}
+
 static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -5285,7 +5315,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 	} else {
 		bool done;
 
-		done = io_poll_complete(req, req->result);
+		done = __io_poll_complete(req, req->result);
 		if (done) {
 			io_poll_remove_double(req);
 			hash_del(&req->hash_node);
@@ -5293,6 +5323,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 			req->result = 0;
 			add_wait_queue(req->poll.head, &req->poll.wait);
 		}
+		io_commit_cqring(ctx);
 		spin_unlock(&ctx->completion_lock);
 		io_cqring_ev_posted(ctx);
 
@@ -6398,6 +6429,11 @@ static bool io_drain_req(struct io_kiocb *req)
 	int ret;
 	u32 seq;
 
+	if (req->flags & REQ_F_FAIL) {
+		io_req_complete_fail_submit(req);
+		return true;
+	}
+
 	/*
 	 * If we need to drain a request in the middle of a link, drain the
 	 * head request and the next request/link after the current link.
@@ -6914,7 +6950,7 @@ static inline void io_queue_sqe(struct io_kiocb *req)
 	if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
 		__io_queue_sqe(req);
 	} else if (req->flags & REQ_F_FAIL) {
-		io_req_complete_failed(req, req->result);
+		io_req_complete_fail_submit(req);
 	} else {
 		int ret = io_req_prep_async(req);
 
@@ -10498,26 +10534,46 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
 static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
 					void __user *arg)
 {
-	struct io_uring_task *tctx = current->io_uring;
+	struct io_uring_task *tctx = NULL;
+	struct io_sq_data *sqd = NULL;
 	__u32 new_count[2];
 	int i, ret;
 
-	if (!tctx || !tctx->io_wq)
-		return -EINVAL;
 	if (copy_from_user(new_count, arg, sizeof(new_count)))
 		return -EFAULT;
 	for (i = 0; i < ARRAY_SIZE(new_count); i++)
 		if (new_count[i] > INT_MAX)
 			return -EINVAL;
 
+	if (ctx->flags & IORING_SETUP_SQPOLL) {
+		sqd = ctx->sq_data;
+		if (sqd) {
+			mutex_lock(&sqd->lock);
+			tctx = sqd->thread->io_uring;
+		}
+	} else {
+		tctx = current->io_uring;
+	}
+
+	ret = -EINVAL;
+	if (!tctx || !tctx->io_wq)
+		goto err;
+
 	ret = io_wq_max_workers(tctx->io_wq, new_count);
 	if (ret)
-		return ret;
+		goto err;
+
+	if (sqd)
+		mutex_unlock(&sqd->lock);
 
 	if (copy_to_user(arg, new_count, sizeof(new_count)))
 		return -EFAULT;
 
 	return 0;
+err:
+	if (sqd)
+		mutex_unlock(&sqd->lock);
+	return ret;
 }
 
 static bool io_register_op_must_quiesce(int op)
author	Linus Torvalds <torvalds@linux-foundation.org>	2021-09-06 09:26:07 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2021-09-06 09:26:07 -0700
commit	60f8fbaa954452104a1914e21c5cc109f7bf276a (patch)
tree	8212a8c7d3efc455028f95a4520424db85d9e50f /fs/io_uring.c
parent	20fbb11fe4ea99e02d77824613f1438bea456683 (diff)
parent	2fc2a7a62eb58650e71b4550cf6fa6cc0a75b2d2 (diff)