From 00b0db562485fbb259cd4054346208ad0885d662 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 11 Aug 2023 13:53:43 +0100 Subject: io_uring: open code io_fill_cqe_req() io_fill_cqe_req() is only called from one place, open code it, and rename __io_fill_cqe_req(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f432ce75bb1c94cadf0bd2add4d6aa510bd1fb36.1691757663.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'io_uring/rw.c') diff --git a/io_uring/rw.c b/io_uring/rw.c index 1bce2208b65c..9b51afdae505 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -1064,7 +1064,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) continue; req->cqe.flags = io_put_kbuf(req, 0); - if (unlikely(!__io_fill_cqe_req(ctx, req))) { + if (unlikely(!io_fill_cqe_req(ctx, req))) { spin_lock(&ctx->completion_lock); io_req_cqe_overflow(req); spin_unlock(&ctx->completion_lock); -- cgit v1.2.3 From 54927baf6c195fb512ac38b26a041ca44edb2e29 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 24 Aug 2023 23:53:28 +0100 Subject: io_uring: reorder cqring_flush and wakeups Unlike in the past, io_commit_cqring_flush() doesn't do anything that may need io_cqring_wake() to be issued after, all requests it completes will go via task_work. Do io_commit_cqring_flush() after io_cqring_wake() to clean up __io_cq_unlock_post(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ed32dcfeec47e6c97bd6b18c152ddce5b218403f.1692916914.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 14 +++----------- io_uring/rw.c | 2 +- 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'io_uring/rw.c') diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cfc2dc8c4b2f..7c1ef5b6628d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -629,19 +629,11 @@ static inline void io_cq_lock(struct io_ring_ctx *ctx) static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx) { io_commit_cqring(ctx); - - if (ctx->task_complete) { - /* - * ->task_complete implies that only current might be waiting - * for CQEs, and obviously, we currently don't. No one is - * waiting, wakeups are futile, skip them. - */ - io_commit_cqring_flush(ctx); - } else { + if (!ctx->task_complete) { spin_unlock(&ctx->completion_lock); - io_commit_cqring_flush(ctx); io_cqring_wake(ctx); } + io_commit_cqring_flush(ctx); } static void io_cq_unlock_post(struct io_ring_ctx *ctx) @@ -649,8 +641,8 @@ static void io_cq_unlock_post(struct io_ring_ctx *ctx) { io_commit_cqring(ctx); spin_unlock(&ctx->completion_lock); - io_commit_cqring_flush(ctx); io_cqring_wake(ctx); + io_commit_cqring_flush(ctx); } /* Returns true if there are no backlogged entries after the flush */ diff --git a/io_uring/rw.c b/io_uring/rw.c index 9b51afdae505..20140d3505f1 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -985,9 +985,9 @@ copy_iov: static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) { - io_commit_cqring_flush(ctx); if (ctx->flags & IORING_SETUP_SQPOLL) io_cqring_wake(ctx); + io_commit_cqring_flush(ctx); } void io_rw_fail(struct io_kiocb *req) -- cgit v1.2.3 From ec26c225f06f5993f8891fa6c79fab3c92981181 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 24 Aug 2023 23:53:29 +0100 Subject: io_uring: merge iopoll and normal completion paths io_do_iopoll() and io_submit_flush_completions() are pretty similar, both filling CQEs and then free a list of requests. Don't duplicate it and make iopoll use __io_submit_flush_completions(), which also helps with inlining and other optimisations. For that, we need to first find all completed iopoll requests and splice them from the iopoll list and then pass it down. This adds one extra list traversal, which should be fine as requests will stay hot in cache. CQ locking is already conditional, introduce ->lockless_cq and skip locking for IOPOLL as it's protected by ->uring_lock. We also add a wakeup optimisation for IOPOLL to __io_cq_unlock_post(), so it works just like io_cqring_ev_posted_iopoll(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/3840473f5e8a960de35b77292026691880f6bdbc.1692916914.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 + io_uring/io_uring.c | 18 ++++++++++++------ io_uring/io_uring.h | 2 +- io_uring/rw.c | 24 +++++------------------- 4 files changed, 19 insertions(+), 26 deletions(-) (limited to 'io_uring/rw.c') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 9795eda529f7..c0c03d8059df 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -205,6 +205,7 @@ struct io_ring_ctx { unsigned int has_evfd: 1; /* all CQEs should be posted only by the submitter task */ unsigned int task_complete: 1; + unsigned int lockless_cq: 1; unsigned int syscall_iopoll: 1; unsigned int poll_activated: 1; unsigned int drain_disabled: 1; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7c1ef5b6628d..e8321903e3f3 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -147,7 +147,6 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, bool cancel_all); static void io_queue_sqe(struct io_kiocb *req); -static void __io_submit_flush_completions(struct io_ring_ctx *ctx); struct kmem_cache *req_cachep; @@ -616,7 +615,7 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx) static inline void __io_cq_lock(struct io_ring_ctx *ctx) { - if (!ctx->task_complete) + if (!ctx->lockless_cq) spin_lock(&ctx->completion_lock); } @@ -630,8 +629,11 @@ static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx) { io_commit_cqring(ctx); if (!ctx->task_complete) { - spin_unlock(&ctx->completion_lock); - io_cqring_wake(ctx); + if (!ctx->lockless_cq) + spin_unlock(&ctx->completion_lock); + /* IOPOLL rings only need to wake up if it's also SQPOLL */ + if (!ctx->syscall_iopoll) + io_cqring_wake(ctx); } io_commit_cqring_flush(ctx); } @@ -1485,7 +1487,8 @@ void io_queue_next(struct io_kiocb *req) io_req_task_queue(nxt); } -void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node) +static void io_free_batch_list(struct io_ring_ctx *ctx, + struct io_wq_work_node *node) __must_hold(&ctx->uring_lock) { do { @@ -1522,7 +1525,7 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node) } while (node); } -static void __io_submit_flush_completions(struct io_ring_ctx *ctx) +void __io_submit_flush_completions(struct io_ring_ctx *ctx) __must_hold(&ctx->uring_lock) { struct io_submit_state *state = &ctx->submit_state; @@ -3836,6 +3839,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, !(ctx->flags & IORING_SETUP_SQPOLL)) ctx->task_complete = true; + if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL)) + ctx->lockless_cq = true; + /* * lazy poll_wq activation relies on ->task_complete for synchronisation * purposes, see io_activate_pollwq() diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 2960e35b32a5..07fd185064d2 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -72,7 +72,7 @@ int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file, int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts); int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr); int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin); -void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node); +void __io_submit_flush_completions(struct io_ring_ctx *ctx); int io_req_prep_async(struct io_kiocb *req); struct io_wq_work *io_wq_free_work(struct io_wq_work *work); diff --git a/io_uring/rw.c b/io_uring/rw.c index 20140d3505f1..0a1e515f0510 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -983,13 +983,6 @@ copy_iov: return ret; } -static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) -{ - if (ctx->flags & IORING_SETUP_SQPOLL) - io_cqring_wake(ctx); - io_commit_cqring_flush(ctx); -} - void io_rw_fail(struct io_kiocb *req) { int res; @@ -1060,24 +1053,17 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) if (!smp_load_acquire(&req->iopoll_completed)) break; nr_events++; - if (unlikely(req->flags & REQ_F_CQE_SKIP)) - continue; - req->cqe.flags = io_put_kbuf(req, 0); - if (unlikely(!io_fill_cqe_req(ctx, req))) { - spin_lock(&ctx->completion_lock); - io_req_cqe_overflow(req); - spin_unlock(&ctx->completion_lock); - } } - if (unlikely(!nr_events)) return 0; - io_commit_cqring(ctx); - io_cqring_ev_posted_iopoll(ctx); pos = start ? start->next : ctx->iopoll_list.first; wq_list_cut(&ctx->iopoll_list, prev, start); - io_free_batch_list(ctx, pos); + + if (WARN_ON_ONCE(!wq_list_empty(&ctx->submit_state.compl_reqs))) + return 0; + ctx->submit_state.compl_reqs.first = pos; + __io_submit_flush_completions(ctx); return nr_events; } -- cgit v1.2.3