summaryrefslogtreecommitdiff
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c682
1 files changed, 354 insertions, 328 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9f1c682d7caf..d3ee4fc532fa 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -112,7 +112,8 @@
IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS)
#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
- REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA)
+ REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
+ REQ_F_ASYNC_DATA)
#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
IO_REQ_CLEAN_FLAGS)
@@ -297,8 +298,8 @@ struct io_buffer_list {
/* below is for ring provided buffers */
__u16 buf_nr_pages;
__u16 nr_entries;
- __u32 head;
- __u32 mask;
+ __u16 head;
+ __u16 mask;
};
struct io_buffer {
@@ -540,6 +541,7 @@ struct io_uring_task {
const struct io_ring_ctx *last;
struct io_wq *io_wq;
struct percpu_counter inflight;
+ atomic_t inflight_tracked;
atomic_t in_idle;
spinlock_t task_lock;
@@ -781,12 +783,6 @@ struct io_msg {
u32 len;
};
-struct io_nop {
- struct file *file;
- u64 extra1;
- u64 extra2;
-};
-
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -848,6 +844,7 @@ enum {
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
REQ_F_PARTIAL_IO_BIT,
+ REQ_F_CQE32_INIT_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
@@ -917,6 +914,8 @@ enum {
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
/* fast poll multishot mode */
REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
+ /* ->extra1 and ->extra2 are initialised */
+ REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT),
};
struct async_poll {
@@ -991,7 +990,6 @@ struct io_kiocb {
struct io_msg msg;
struct io_xattr xattr;
struct io_socket sock;
- struct io_nop nop;
struct io_uring_cmd uring_cmd;
};
@@ -1118,7 +1116,6 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_NOP] = {
.audit_skip = 1,
.iopoll = 1,
- .buffer_select = 1,
},
[IORING_OP_READV] = {
.needs_file = 1,
@@ -1355,8 +1352,6 @@ static void io_clean_op(struct io_kiocb *req);
static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags);
static struct file *io_file_get_normal(struct io_kiocb *req, int fd);
-static void io_drop_inflight_file(struct io_kiocb *req);
-static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags);
static void io_queue_sqe(struct io_kiocb *req);
static void io_rsrc_put_work(struct work_struct *work);
@@ -1366,7 +1361,9 @@ static int io_req_prep_async(struct io_kiocb *req);
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index);
-static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
+static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
+ unsigned int offset);
+static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags);
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer);
static void io_eventfd_signal(struct io_ring_ctx *ctx);
@@ -1726,9 +1723,16 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
return;
- /* don't recycle if we already did IO to this buffer */
- if (req->flags & REQ_F_PARTIAL_IO)
+ /*
+ * For legacy provided buffer mode, don't recycle if we already did
+ * IO to this buffer. For ring-mapped provided buffer mode, we should
+ * increment ring->head to explicitly monopolize the buffer to avoid
+ * multiple use.
+ */
+ if ((req->flags & REQ_F_BUFFER_SELECTED) &&
+ (req->flags & REQ_F_PARTIAL_IO))
return;
+
/*
* We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
* the flag and hence ensure that bl->head doesn't get incremented.
@@ -1736,8 +1740,13 @@ static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
*/
if (req->flags & REQ_F_BUFFER_RING) {
if (req->buf_list) {
- req->buf_index = req->buf_list->bgid;
- req->flags &= ~REQ_F_BUFFER_RING;
+ if (req->flags & REQ_F_PARTIAL_IO) {
+ req->buf_list->head++;
+ req->buf_list = NULL;
+ } else {
+ req->buf_index = req->buf_list->bgid;
+ req->flags &= ~REQ_F_BUFFER_RING;
+ }
}
return;
}
@@ -1757,9 +1766,29 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
bool cancel_all)
__must_hold(&req->ctx->timeout_lock)
{
+ struct io_kiocb *req;
+
if (task && head->task != task)
return false;
- return cancel_all;
+ if (cancel_all)
+ return true;
+
+ io_for_each_link(req, head) {
+ if (req->flags & REQ_F_INFLIGHT)
+ return true;
+ }
+ return false;
+}
+
+static bool io_match_linked(struct io_kiocb *head)
+{
+ struct io_kiocb *req;
+
+ io_for_each_link(req, head) {
+ if (req->flags & REQ_F_INFLIGHT)
+ return true;
+ }
+ return false;
}
/*
@@ -1769,9 +1798,24 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
bool cancel_all)
{
+ bool matched;
+
if (task && head->task != task)
return false;
- return cancel_all;
+ if (cancel_all)
+ return true;
+
+ if (head->flags & REQ_F_LINK_TIMEOUT) {
+ struct io_ring_ctx *ctx = head->ctx;
+
+ /* protect against races with linked timeouts */
+ spin_lock_irq(&ctx->timeout_lock);
+ matched = io_match_linked(head);
+ spin_unlock_irq(&ctx->timeout_lock);
+ } else {
+ matched = io_match_linked(head);
+ }
+ return matched;
}
static inline bool req_has_async_data(struct io_kiocb *req)
@@ -1927,6 +1971,14 @@ static inline bool io_req_ffs_set(struct io_kiocb *req)
return req->flags & REQ_F_FIXED_FILE;
}
+static inline void io_req_track_inflight(struct io_kiocb *req)
+{
+ if (!(req->flags & REQ_F_INFLIGHT)) {
+ req->flags |= REQ_F_INFLIGHT;
+ atomic_inc(&current->io_uring->inflight_tracked);
+ }
+}
+
static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
{
if (WARN_ON_ONCE(!req->link))
@@ -2395,94 +2447,66 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
return true;
}
-static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
+static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
{
struct io_uring_cqe *cqe;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, user_data);
- WRITE_ONCE(cqe->res, res);
- WRITE_ONCE(cqe->flags, cflags);
- return true;
- }
- return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
-}
+ if (!(ctx->flags & IORING_SETUP_CQE32)) {
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, 0, 0);
-static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(*cqe));
+ return true;
+ }
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ 0, 0);
+ } else {
+ u64 extra1 = 0, extra2 = 0;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(*cqe));
- return true;
- }
- return io_cqring_event_overflow(ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
-}
+ if (req->flags & REQ_F_CQE32_INIT) {
+ extra1 = req->extra1;
+ extra2 = req->extra2;
+ }
-static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
- u64 extra1 = req->extra1;
- u64 extra2 = req->extra2;
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, extra1, extra2);
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, extra1, extra2);
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
+ WRITE_ONCE(cqe->big_cqe[0], extra1);
+ WRITE_ONCE(cqe->big_cqe[1], extra2);
+ return true;
+ }
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
- cqe->big_cqe[0] = extra1;
- cqe->big_cqe[1] = extra2;
- return true;
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ extra1, extra2);
}
-
- return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res,
- req->cqe.flags, extra1, extra2);
}
-static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
-{
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0);
- return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
-}
-
-static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags,
- u64 extra1, u64 extra2)
+static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
+ s32 res, u32 cflags)
{
- struct io_ring_ctx *ctx = req->ctx;
struct io_uring_cqe *cqe;
- if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32)))
- return;
- if (req->flags & REQ_F_CQE_SKIP)
- return;
-
- trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags,
- extra1, extra2);
+ ctx->cq_extra++;
+ trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
/*
* If we can't get a cq entry, userspace overflowed the
@@ -2491,23 +2515,17 @@ static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags
*/
cqe = io_get_cqe(ctx);
if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, req->cqe.user_data);
+ WRITE_ONCE(cqe->user_data, user_data);
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, cflags);
- WRITE_ONCE(cqe->big_cqe[0], extra1);
- WRITE_ONCE(cqe->big_cqe[1], extra2);
- return;
- }
- io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2);
-}
-
-static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
-{
- ctx->cq_extra++;
- trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
- return __io_fill_cqe(ctx, user_data, res, cflags);
+ if (ctx->flags & IORING_SETUP_CQE32) {
+ WRITE_ONCE(cqe->big_cqe[0], 0);
+ WRITE_ONCE(cqe->big_cqe[1], 0);
+ }
+ return true;
+ }
+ return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
}
static void __io_req_complete_put(struct io_kiocb *req)
@@ -2544,16 +2562,11 @@ static void __io_req_complete_put(struct io_kiocb *req)
static void __io_req_complete_post(struct io_kiocb *req, s32 res,
u32 cflags)
{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe_req(req, res, cflags);
- __io_req_complete_put(req);
-}
-
-static void __io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe32_req(req, res, cflags, extra1, extra2);
+ if (!(req->flags & REQ_F_CQE_SKIP)) {
+ req->cqe.res = res;
+ req->cqe.flags = cflags;
+ __io_fill_cqe_req(req->ctx, req);
+ }
__io_req_complete_put(req);
}
@@ -2568,18 +2581,6 @@ static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags)
io_cqring_ev_posted(ctx);
}
-static void io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- struct io_ring_ctx *ctx = req->ctx;
-
- spin_lock(&ctx->completion_lock);
- __io_req_complete_post32(req, res, cflags, extra1, extra2);
- io_commit_cqring(ctx);
- spin_unlock(&ctx->completion_lock);
- io_cqring_ev_posted(ctx);
-}
-
static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
u32 cflags)
{
@@ -2597,19 +2598,6 @@ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
io_req_complete_post(req, res, cflags);
}
-static inline void __io_req_complete32(struct io_kiocb *req,
- unsigned int issue_flags, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
- io_req_complete_state(req, res, cflags);
- req->extra1 = extra1;
- req->extra2 = extra2;
- } else {
- io_req_complete_post32(req, res, cflags, extra1, extra2);
- }
-}
-
static inline void io_req_complete(struct io_kiocb *req, s32 res)
{
if (res < 0)
@@ -2988,8 +2976,6 @@ static void __io_req_task_work_add(struct io_kiocb *req,
unsigned long flags;
bool running;
- io_drop_inflight_file(req);
-
spin_lock_irqsave(&tctx->task_lock, flags);
wq_list_add_tail(&req->io_task_work.node, list);
running = tctx->task_running;
@@ -3158,12 +3144,8 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
- if (!(req->flags & REQ_F_CQE_SKIP)) {
- if (!(ctx->flags & IORING_SETUP_CQE32))
- __io_fill_cqe_req_filled(ctx, req);
- else
- __io_fill_cqe32_req_filled(ctx, req);
- }
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe_req(ctx, req);
}
io_commit_cqring(ctx);
@@ -3282,7 +3264,9 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
nr_events++;
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;
- __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
+
+ req->cqe.flags = io_put_kbuf(req, 0);
+ __io_fill_cqe_req(req->ctx, req);
}
if (unlikely(!nr_events))
@@ -3633,6 +3617,20 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int ret;
kiocb->ki_pos = READ_ONCE(sqe->off);
+ /* used for fixed read/write too - just read unconditionally */
+ req->buf_index = READ_ONCE(sqe->buf_index);
+
+ if (req->opcode == IORING_OP_READ_FIXED ||
+ req->opcode == IORING_OP_WRITE_FIXED) {
+ struct io_ring_ctx *ctx = req->ctx;
+ u16 index;
+
+ if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+ return -EFAULT;
+ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+ req->imu = ctx->user_bufs[index];
+ io_req_set_rsrc_node(req, ctx, 0);
+ }
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
@@ -3645,12 +3643,9 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
kiocb->ki_ioprio = get_current_ioprio();
}
- req->imu = NULL;
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
req->rw.flags = READ_ONCE(sqe->rw_flags);
- /* used for fixed read/write too - just read unconditionally */
- req->buf_index = READ_ONCE(sqe->buf_index);
return 0;
}
@@ -3782,20 +3777,9 @@ static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter
static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
unsigned int issue_flags)
{
- struct io_mapped_ubuf *imu = req->imu;
- u16 index, buf_index = req->buf_index;
-
- if (likely(!imu)) {
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(buf_index >= ctx->nr_user_bufs))
- return -EFAULT;
- io_req_set_rsrc_node(req, ctx, issue_flags);
- index = array_index_nospec(buf_index, ctx->nr_user_bufs);
- imu = READ_ONCE(ctx->user_bufs[index]);
- req->imu = imu;
- }
- return __io_import_fixed(req, rw, iter, imu);
+ if (WARN_ON_ONCE(!req->imu))
+ return -EFAULT;
+ return __io_import_fixed(req, rw, iter, req->imu);
}
static int io_buffer_add_list(struct io_ring_ctx *ctx,
@@ -3832,19 +3816,17 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
{
struct io_uring_buf_ring *br = bl->buf_ring;
struct io_uring_buf *buf;
- __u32 head = bl->head;
+ __u16 head = bl->head;
- if (unlikely(smp_load_acquire(&br->tail) == head)) {
- io_ring_submit_unlock(req->ctx, issue_flags);
+ if (unlikely(smp_load_acquire(&br->tail) == head))
return NULL;
- }
head &= bl->mask;
if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
buf = &br->bufs[head];
} else {
int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
- int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1;
+ int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
buf = page_address(bl->buf_pages[index]);
buf += off;
}
@@ -3854,7 +3836,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
req->buf_list = bl;
req->buf_index = buf->bid;
- if (issue_flags & IO_URING_F_UNLOCKED) {
+ if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
/*
* If we came in unlocked, we have no choice but to consume the
* buffer here. This does mean it'll be pinned until the IO
@@ -4176,6 +4158,16 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
return 0;
}
+static int io_readv_prep_async(struct io_kiocb *req)
+{
+ return io_rw_prep_async(req, READ);
+}
+
+static int io_writev_prep_async(struct io_kiocb *req)
+{
+ return io_rw_prep_async(req, WRITE);
+}
+
/*
* This is our waitqueue callback handler, registered through __folio_lock_async()
* when we initially tried to do the IO with the iocb armed our waitqueue.
@@ -5025,10 +5017,18 @@ void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
req->uring_cmd.task_work_cb = task_work_cb;
req->io_task_work.func = io_uring_cmd_work;
- io_req_task_prio_work_add(req);
+ io_req_task_work_add(req);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
+ u64 extra1, u64 extra2)
+{
+ req->extra1 = extra1;
+ req->extra2 = extra2;
+ req->flags |= REQ_F_CQE32_INIT;
+}
+
/*
* Called by consumers of io_uring_cmd, if they originally returned
* -EIOCBQUEUED upon receiving the command.
@@ -5039,10 +5039,10 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
if (ret < 0)
req_set_fail(req);
+
if (req->ctx->flags & IORING_SETUP_CQE32)
- __io_req_complete32(req, 0, ret, 0, res2, 0);
- else
- io_req_complete(req, ret);
+ io_req_set_cqe32_extra(req, res2, 0);
+ io_req_complete(req, ret);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
@@ -5103,42 +5103,6 @@ static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
-static int io_shutdown_prep(struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
-{
-#if defined(CONFIG_NET)
- if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
- sqe->buf_index || sqe->splice_fd_in))
- return -EINVAL;
-
- req->shutdown.how = READ_ONCE(sqe->len);
- return 0;
-#else
- return -EOPNOTSUPP;
-#endif
-}
-
-static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
-{
-#if defined(CONFIG_NET)
- struct socket *sock;
- int ret;
-
- if (issue_flags & IO_URING_F_NONBLOCK)
- return -EAGAIN;
-
- sock = sock_from_file(req->file);
- if (unlikely(!sock))
- return -ENOTSOCK;
-
- ret = __sys_shutdown_sock(sock, req->shutdown.how);
- io_req_complete(req, ret);
- return 0;
-#else
- return -EOPNOTSUPP;
-#endif
-}
-
static int __io_splice_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
@@ -5240,14 +5204,6 @@ done:
static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- /*
- * If the ring is setup with CQE32, relay back addr/addr
- */
- if (req->ctx->flags & IORING_SETUP_CQE32) {
- req->nop.extra1 = READ_ONCE(sqe->addr);
- req->nop.extra2 = READ_ONCE(sqe->addr2);
- }
-
return 0;
}
@@ -5256,23 +5212,7 @@ static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
*/
static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
{
- unsigned int cflags;
- void __user *buf;
-
- if (req->flags & REQ_F_BUFFER_SELECT) {
- size_t len = 1;
-
- buf = io_buffer_select(req, &len, issue_flags);
- if (!buf)
- return -ENOBUFS;
- }
-
- cflags = io_put_kbuf(req, issue_flags);
- if (!(req->ctx->flags & IORING_SETUP_CQE32))
- __io_req_complete(req, issue_flags, 0, cflags);
- else
- __io_req_complete32(req, issue_flags, 0, cflags,
- req->nop.extra1, req->nop.extra2);
+ __io_req_complete(req, issue_flags, 0, 0);
return 0;
}
@@ -5445,15 +5385,11 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx)
unsigned long nr = ctx->nr_user_files;
int ret;
- if (table->alloc_hint >= nr)
- table->alloc_hint = 0;
-
do {
ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint);
- if (ret != nr) {
- table->alloc_hint = ret + 1;
+ if (ret != nr)
return ret;
- }
+
if (!table->alloc_hint)
break;
@@ -5464,6 +5400,10 @@ static int io_file_bitmap_get(struct io_ring_ctx *ctx)
return -ENFILE;
}
+/*
+ * Note when io_fixed_fd_install() returns error value, it will ensure
+ * fput() is called correspondingly.
+ */
static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
struct file *file, unsigned int file_slot)
{
@@ -5471,26 +5411,24 @@ static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags,
struct io_ring_ctx *ctx = req->ctx;
int ret;
+ io_ring_submit_lock(ctx, issue_flags);
+
if (alloc_slot) {
- io_ring_submit_lock(ctx, issue_flags);
ret = io_file_bitmap_get(ctx);
- if (unlikely(ret < 0)) {
- io_ring_submit_unlock(ctx, issue_flags);
- return ret;
- }
-
+ if (unlikely(ret < 0))
+ goto err;
file_slot = ret;
} else {
file_slot--;
}
ret = io_install_fixed_file(req, file, issue_flags, file_slot);
- if (alloc_slot) {
- io_ring_submit_unlock(ctx, issue_flags);
- if (!ret)
- return file_slot;
- }
-
+ if (!ret && alloc_slot)
+ ret = file_slot;
+err:
+ io_ring_submit_unlock(ctx, issue_flags);
+ if (unlikely(ret < 0))
+ fput(file);
return ret;
}
@@ -5990,7 +5928,7 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
struct files_struct *files = current->files;
struct io_close *close = &req->close;
struct fdtable *fdt;
- struct file *file = NULL;
+ struct file *file;
int ret = -EBADF;
if (req->close.file_slot) {
@@ -6008,7 +5946,6 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
lockdep_is_held(&files->file_lock));
if (!file || file->f_op == &io_uring_fops) {
spin_unlock(&files->file_lock);
- file = NULL;
goto err;
}
@@ -6018,21 +5955,16 @@ static int io_close(struct io_kiocb *req, unsigned int issue_flags)
return -EAGAIN;
}
- ret = __close_fd_get_file(close->fd, &file);
+ file = __close_fd_get_file(close->fd);
spin_unlock(&files->file_lock);
- if (ret < 0) {
- if (ret == -ENOENT)
- ret = -EBADF;
+ if (!file)
goto err;
- }
/* No ->flush() or already async, safely close from here */
ret = filp_close(file, current->files);
err:
if (ret < 0)
req_set_fail(req);
- if (file)
- fput(file);
__io_req_complete(req, issue_flags, ret, 0);
return 0;
}
@@ -6063,6 +5995,34 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
}
#if defined(CONFIG_NET)
+static int io_shutdown_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
+ sqe->buf_index || sqe->splice_fd_in))
+ return -EINVAL;
+
+ req->shutdown.how = READ_ONCE(sqe->len);
+ return 0;
+}
+
+static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct socket *sock;
+ int ret;
+
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ return -EAGAIN;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ ret = __sys_shutdown_sock(sock, req->shutdown.how);
+ io_req_complete(req, ret);
+ return 0;
+}
+
static bool io_net_retry(struct socket *sock, int flags)
{
if (!(flags & MSG_WAITALL))
@@ -6674,8 +6634,8 @@ static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
fd_install(fd, file);
ret = fd;
} else {
- ret = io_install_fixed_file(req, file, issue_flags,
- sock->file_slot - 1);
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ sock->file_slot);
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
@@ -6767,6 +6727,7 @@ IO_NETOP_PREP_ASYNC(recvmsg);
IO_NETOP_PREP_ASYNC(connect);
IO_NETOP_PREP(accept);
IO_NETOP_PREP(socket);
+IO_NETOP_PREP(shutdown);
IO_NETOP_FN(send);
IO_NETOP_FN(recv);
#endif /* CONFIG_NET */
@@ -6905,10 +6866,6 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
if (!req->cqe.res) {
struct poll_table_struct pt = { ._key = req->apoll_events };
- unsigned flags = locked ? 0 : IO_URING_F_UNLOCKED;
-
- if (unlikely(!io_assign_file(req, flags)))
- return -EBADF;
req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events;
}
@@ -7390,7 +7347,7 @@ static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
}
-static int io_poll_update_prep(struct io_kiocb *req,
+static int io_poll_remove_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_poll_update *upd = &req->poll_update;
@@ -7454,7 +7411,7 @@ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
-static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
+static int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_cancel_data cd = { .data = req->poll_update.old_user_data, };
struct io_ring_ctx *ctx = req->ctx;
@@ -7698,8 +7655,9 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
-static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- bool is_timeout_link)
+static int __io_timeout_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe,
+ bool is_timeout_link)
{
struct io_timeout_data *data;
unsigned flags;
@@ -7754,6 +7712,18 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0;
}
+static int io_timeout_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return __io_timeout_prep(req, sqe, false);
+}
+
+static int io_link_timeout_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return __io_timeout_prep(req, sqe, true);
+}
+
static int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -7970,7 +7940,7 @@ done:
return 0;
}
-static int io_rsrc_update_prep(struct io_kiocb *req,
+static int io_files_update_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
@@ -7986,6 +7956,41 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
return 0;
}
+static int io_files_update_with_index_alloc(struct io_kiocb *req,
+ unsigned int issue_flags)
+{
+ __s32 __user *fds = u64_to_user_ptr(req->rsrc_update.arg);
+ unsigned int done;
+ struct file *file;
+ int ret, fd;
+
+ for (done = 0; done < req->rsrc_update.nr_args; done++) {
+ if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ file = fget(fd);
+ if (!file) {
+ ret = -EBADF;
+ break;
+ }
+ ret = io_fixed_fd_install(req, issue_flags, file,
+ IORING_FILE_INDEX_ALLOC);
+ if (ret < 0)
+ break;
+ if (copy_to_user(&fds[done], &ret, sizeof(ret))) {
+ __io_close_fixed(req, issue_flags, ret);
+ ret = -EFAULT;
+ break;
+ }
+ }
+
+ if (done)
+ return done;
+ return ret;
+}
+
static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -7999,10 +8004,14 @@ static int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
up.resv = 0;
up.resv2 = 0;
- io_ring_submit_lock(ctx, issue_flags);
- ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
- &up, req->rsrc_update.nr_args);
- io_ring_submit_unlock(ctx, issue_flags);
+ if (req->rsrc_update.offset == IORING_FILE_INDEX_ALLOC) {
+ ret = io_files_update_with_index_alloc(req, issue_flags);
+ } else {
+ io_ring_submit_lock(ctx, issue_flags);
+ ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
+ &up, req->rsrc_update.nr_args);
+ io_ring_submit_unlock(ctx, issue_flags);
+ }
if (ret < 0)
req_set_fail(req);
@@ -8025,7 +8034,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
case IORING_OP_POLL_ADD:
return io_poll_add_prep(req, sqe);
case IORING_OP_POLL_REMOVE:
- return io_poll_update_prep(req, sqe);
+ return io_poll_remove_prep(req, sqe);
case IORING_OP_FSYNC:
return io_fsync_prep(req, sqe);
case IORING_OP_SYNC_FILE_RANGE:
@@ -8039,13 +8048,13 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
case IORING_OP_CONNECT:
return io_connect_prep(req, sqe);
case IORING_OP_TIMEOUT:
- return io_timeout_prep(req, sqe, false);
+ return io_timeout_prep(req, sqe);
case IORING_OP_TIMEOUT_REMOVE:
return io_timeout_remove_prep(req, sqe);
case IORING_OP_ASYNC_CANCEL:
return io_async_cancel_prep(req, sqe);
case IORING_OP_LINK_TIMEOUT:
- return io_timeout_prep(req, sqe, true);
+ return io_link_timeout_prep(req, sqe);
case IORING_OP_ACCEPT:
return io_accept_prep(req, sqe);
case IORING_OP_FALLOCATE:
@@ -8055,7 +8064,7 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
case IORING_OP_CLOSE:
return io_close_prep(req, sqe);
case IORING_OP_FILES_UPDATE:
- return io_rsrc_update_prep(req, sqe);
+ return io_files_update_prep(req, sqe);
case IORING_OP_STATX:
return io_statx_prep(req, sqe);
case IORING_OP_FADVISE:
@@ -8123,9 +8132,9 @@ static int io_req_prep_async(struct io_kiocb *req)
switch (req->opcode) {
case IORING_OP_READV:
- return io_rw_prep_async(req, READ);
+ return io_readv_prep_async(req);
case IORING_OP_WRITEV:
- return io_rw_prep_async(req, WRITE);
+ return io_writev_prep_async(req);
case IORING_OP_SENDMSG:
return io_sendmsg_prep_async(req);
case IORING_OP_RECVMSG:
@@ -8264,6 +8273,11 @@ static void io_clean_op(struct io_kiocb *req)
kfree(req->apoll);
req->apoll = NULL;
}
+ if (req->flags & REQ_F_INFLIGHT) {
+ struct io_uring_task *tctx = req->task->io_uring;
+
+ atomic_dec(&tctx->inflight_tracked);
+ }
if (req->flags & REQ_F_CREDS)
put_cred(req->creds);
if (req->flags & REQ_F_ASYNC_DATA) {
@@ -8288,6 +8302,7 @@ static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags)
static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
{
+ const struct io_op_def *def = &io_op_defs[req->opcode];
const struct cred *creds = NULL;
int ret;
@@ -8297,7 +8312,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
creds = override_creds(req->creds);
- if (!io_op_defs[req->opcode].audit_skip)
+ if (!def->audit_skip)
audit_uring_entry(req->opcode);
switch (req->opcode) {
@@ -8321,7 +8336,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
ret = io_poll_add(req, issue_flags);
break;
case IORING_OP_POLL_REMOVE:
- ret = io_poll_update(req, issue_flags);
+ ret = io_poll_remove(req, issue_flags);
break;
case IORING_OP_SYNC_FILE_RANGE:
ret = io_sync_file_range(req, issue_flags);
@@ -8436,7 +8451,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
break;
}
- if (!io_op_defs[req->opcode].audit_skip)
+ if (!def->audit_skip)
audit_uring_exit(!ret, ret);
if (creds)
@@ -8569,19 +8584,6 @@ out:
return file;
}
-/*
- * Drop the file for requeue operations. Only used of req->file is the
- * io_uring descriptor itself.
- */
-static void io_drop_inflight_file(struct io_kiocb *req)
-{
- if (unlikely(req->flags & REQ_F_INFLIGHT)) {
- fput(req->file);
- req->file = NULL;
- req->flags &= ~REQ_F_INFLIGHT;
- }
-}
-
static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
{
struct file *file = fget(fd);
@@ -8590,7 +8592,7 @@ static struct file *io_file_get_normal(struct io_kiocb *req, int fd)
/* we don't allow fixed io_uring files */
if (file && file->f_op == &io_uring_fops)
- req->flags |= REQ_F_INFLIGHT;
+ io_req_track_inflight(req);
return file;
}
@@ -8688,6 +8690,7 @@ static void io_queue_async(struct io_kiocb *req, int ret)
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
+ io_kbuf_recycle(req, 0);
io_queue_iowq(req, NULL);
break;
case IO_APOLL_OK:
@@ -8788,6 +8791,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
const struct io_uring_sqe *sqe)
__must_hold(&ctx->uring_lock)
{
+ const struct io_op_def *def;
unsigned int sqe_flags;
int personality;
u8 opcode;
@@ -8805,12 +8809,13 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->opcode = 0;
return -EINVAL;
}
+ def = &io_op_defs[opcode];
if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) {
/* enforce forwards compatibility on users */
if (sqe_flags & ~SQE_VALID_FLAGS)
return -EINVAL;
if (sqe_flags & IOSQE_BUFFER_SELECT) {
- if (!io_op_defs[opcode].buffer_select)
+ if (!def->buffer_select)
return -EOPNOTSUPP;
req->buf_index = READ_ONCE(sqe->buf_group);
}
@@ -8836,12 +8841,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
}
}
- if (!io_op_defs[opcode].ioprio && sqe->ioprio)
+ if (!def->ioprio && sqe->ioprio)
return -EINVAL;
- if (!io_op_defs[opcode].iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
+ if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (io_op_defs[opcode].needs_file) {
+ if (def->needs_file) {
struct io_submit_state *state = &ctx->submit_state;
req->cqe.fd = READ_ONCE(sqe->fd);
@@ -8850,7 +8855,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
* Plug now if we have more than 2 IO left after this, and the
* target is potentially a read/write to block based storage.
*/
- if (state->need_plug && io_op_defs[opcode].plug) {
+ if (state->need_plug && def->plug) {
state->plug_started = true;
state->need_plug = false;
blk_start_plug_nr_ios(&state->plug, state->submit_nr);
@@ -9658,8 +9663,7 @@ static inline void io_file_bitmap_set(struct io_file_table *table, int bit)
{
WARN_ON_ONCE(test_bit(bit, table->bitmap));
__set_bit(bit, table->bitmap);
- if (bit == table->alloc_hint)
- table->alloc_hint++;
+ table->alloc_hint = bit + 1;
}
static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
@@ -9702,11 +9706,19 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_files;
int ret;
if (!ctx->file_data)
return -ENXIO;
+
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_files = 0;
ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+ ctx->nr_user_files = nr;
if (!ret)
__io_sqe_files_unregister(ctx);
return ret;
@@ -10113,21 +10125,19 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
unsigned int issue_flags, u32 slot_index)
+ __must_hold(&req->ctx->uring_lock)
{
struct io_ring_ctx *ctx = req->ctx;
bool needs_switch = false;
struct io_fixed_file *file_slot;
- int ret = -EBADF;
+ int ret;
- io_ring_submit_lock(ctx, issue_flags);
if (file->f_op == &io_uring_fops)
- goto err;
- ret = -ENXIO;
+ return -EBADF;
if (!ctx->file_data)
- goto err;
- ret = -EINVAL;
+ return -ENXIO;
if (slot_index >= ctx->nr_user_files)
- goto err;
+ return -EINVAL;
slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
@@ -10158,15 +10168,14 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
err:
if (needs_switch)
io_rsrc_node_switch(ctx, ctx->file_data);
- io_ring_submit_unlock(ctx, issue_flags);
if (ret)
fput(file);
return ret;
}
-static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags,
+ unsigned int offset)
{
- unsigned int offset = req->close.file_slot - 1;
struct io_ring_ctx *ctx = req->ctx;
struct io_fixed_file *file_slot;
struct file *file;
@@ -10203,6 +10212,11 @@ out:
return ret;
}
+static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags)
+{
+ return __io_close_fixed(req, issue_flags, req->close.file_slot - 1);
+}
+
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update2 *up,
unsigned nr_args)
@@ -10351,6 +10365,7 @@ static __cold int io_uring_alloc_task_context(struct task_struct *task,
xa_init(&tctx->xa);
init_waitqueue_head(&tctx->wait);
atomic_set(&tctx->in_idle, 0);
+ atomic_set(&tctx->inflight_tracked, 0);
task->io_uring = tctx;
spin_lock_init(&tctx->task_lock);
INIT_WQ_LIST(&tctx->task_list);
@@ -10601,12 +10616,19 @@ static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_bufs;
int ret;
if (!ctx->buf_data)
return -ENXIO;
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_bufs = 0;
ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
+ ctx->nr_user_bufs = nr;
if (!ret)
__io_sqe_buffers_unregister(ctx);
return ret;
@@ -11046,6 +11068,7 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
xa_for_each(&ctx->io_bl_xa, index, bl) {
xa_erase(&ctx->io_bl_xa, bl->bgid);
__io_remove_buffers(ctx, bl, -1U);
+ kfree(bl);
}
while (!list_empty(&ctx->io_buffers_pages)) {
@@ -11581,7 +11604,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx)
static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
{
if (tracked)
- return 0;
+ return atomic_read(&tctx->inflight_tracked);
return percpu_counter_sum(&tctx->inflight);
}
@@ -11957,14 +11980,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
return -EINVAL;
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
f.file = tctx->registered_rings[fd];
- if (unlikely(!f.file))
- return -EBADF;
+ f.flags = 0;
} else {
f = fdget(fd);
- if (unlikely(!f.file))
- return -EBADF;
}
+ if (unlikely(!f.file))
+ return -EBADF;
+
ret = -EOPNOTSUPP;
if (unlikely(f.file->f_op != &io_uring_fops))
goto out_fput;
@@ -12062,8 +12085,7 @@ iopoll_locked:
out:
percpu_ref_put(&ctx->refs);
out_fput:
- if (!(flags & IORING_ENTER_REGISTERED_RING))
- fdput(f);
+ fdput(f);
return ret;
}
@@ -12913,6 +12935,10 @@ static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (!is_power_of_2(reg.ring_entries))
return -EINVAL;
+ /* cannot disambiguate full vs empty due to head/tail size */
+ if (reg.ring_entries >= 65536)
+ return -EINVAL;
+
if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
int ret = io_init_bl_list(ctx);
if (ret)