summaryrefslogtreecommitdiff
path: root/io_uring/net.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/net.c')
-rw-r--r--io_uring/net.c438
1 files changed, 320 insertions, 118 deletions
diff --git a/io_uring/net.c b/io_uring/net.c
index 32fc3da04e41..caa6a803cb72 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -55,28 +55,22 @@ struct io_sr_msg {
struct user_msghdr __user *umsg;
void __user *buf;
};
+ unsigned len;
+ unsigned done_io;
unsigned msg_flags;
- unsigned flags;
- size_t len;
- size_t done_io;
-};
-
-struct io_sendzc {
- struct file *file;
- void __user *buf;
- size_t len;
- u16 slot_idx;
- unsigned msg_flags;
- unsigned flags;
- unsigned addr_len;
+ u16 flags;
+ /* initialised and used only by !msg send variants */
+ u16 addr_len;
void __user *addr;
+ /* used only for send zerocopy */
+ struct io_kiocb *notif;
};
#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
+ struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
sqe->buf_index || sqe->splice_fd_in))
@@ -88,7 +82,7 @@ int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_shutdown *shutdown = io_kiocb_to_cmd(req);
+ struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
struct socket *sock;
int ret;
@@ -115,7 +109,7 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_async_msghdr *hdr = req->async_data;
- if (!hdr || issue_flags & IO_URING_F_UNLOCKED)
+ if (!req_has_async_data(req) || issue_flags & IO_URING_F_UNLOCKED)
return;
/* Let normal cleanup path reap it if we fail adding to the cache */
@@ -125,47 +119,58 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
}
}
-static struct io_async_msghdr *io_recvmsg_alloc_async(struct io_kiocb *req,
- unsigned int issue_flags)
+static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req,
+ unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_cache_entry *entry;
+ struct io_async_msghdr *hdr;
if (!(issue_flags & IO_URING_F_UNLOCKED) &&
(entry = io_alloc_cache_get(&ctx->netmsg_cache)) != NULL) {
- struct io_async_msghdr *hdr;
-
hdr = container_of(entry, struct io_async_msghdr, cache);
+ hdr->free_iov = NULL;
req->flags |= REQ_F_ASYNC_DATA;
req->async_data = hdr;
return hdr;
}
- if (!io_alloc_async_data(req))
- return req->async_data;
-
+ if (!io_alloc_async_data(req)) {
+ hdr = req->async_data;
+ hdr->free_iov = NULL;
+ return hdr;
+ }
return NULL;
}
+static inline struct io_async_msghdr *io_msg_alloc_async_prep(struct io_kiocb *req)
+{
+ /* ->prep_async is always called from the submission context */
+ return io_msg_alloc_async(req, 0);
+}
+
static int io_setup_async_msg(struct io_kiocb *req,
struct io_async_msghdr *kmsg,
unsigned int issue_flags)
{
- struct io_async_msghdr *async_msg = req->async_data;
+ struct io_async_msghdr *async_msg;
- if (async_msg)
+ if (req_has_async_data(req))
return -EAGAIN;
- async_msg = io_recvmsg_alloc_async(req, issue_flags);
+ async_msg = io_msg_alloc_async(req, issue_flags);
if (!async_msg) {
kfree(kmsg->free_iov);
return -ENOMEM;
}
req->flags |= REQ_F_NEED_CLEANUP;
memcpy(async_msg, kmsg, sizeof(*kmsg));
- async_msg->msg.msg_name = &async_msg->addr;
+ if (async_msg->msg.msg_name)
+ async_msg->msg.msg_name = &async_msg->addr;
/* if were using fast_iov, set it to the new one */
- if (!async_msg->free_iov)
- async_msg->msg.msg_iter.iov = async_msg->fast_iov;
+ if (!kmsg->free_iov) {
+ size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov;
+ async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx];
+ }
return -EAGAIN;
}
@@ -173,7 +178,7 @@ static int io_setup_async_msg(struct io_kiocb *req,
static int io_sendmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
iomsg->msg.msg_name = &iomsg->addr;
iomsg->free_iov = iomsg->fast_iov;
@@ -181,10 +186,43 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
&iomsg->free_iov);
}
+int io_send_prep_async(struct io_kiocb *req)
+{
+ struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *io;
+ int ret;
+
+ if (!zc->addr || req_has_async_data(req))
+ return 0;
+ io = io_msg_alloc_async_prep(req);
+ if (!io)
+ return -ENOMEM;
+ ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
+ return ret;
+}
+
+static int io_setup_async_addr(struct io_kiocb *req,
+ struct sockaddr_storage *addr_storage,
+ unsigned int issue_flags)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *io;
+
+ if (!sr->addr || req_has_async_data(req))
+ return -EAGAIN;
+ io = io_msg_alloc_async(req, issue_flags);
+ if (!io)
+ return -ENOMEM;
+ memcpy(&io->addr, addr_storage, sizeof(io->addr));
+ return -EAGAIN;
+}
+
int io_sendmsg_prep_async(struct io_kiocb *req)
{
int ret;
+ if (!io_msg_alloc_async_prep(req))
+ return -ENOMEM;
ret = io_sendmsg_copy_hdr(req, req->async_data);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
@@ -200,10 +238,16 @@ void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- if (unlikely(sqe->file_index || sqe->addr2))
+ if (req->opcode == IORING_OP_SEND) {
+ if (READ_ONCE(sqe->__pad3[0]))
+ return -EINVAL;
+ sr->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ sr->addr_len = READ_ONCE(sqe->addr_len);
+ } else if (sqe->addr2 || sqe->file_index) {
return -EINVAL;
+ }
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
@@ -224,7 +268,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr iomsg, *kmsg;
struct socket *sock;
unsigned flags;
@@ -259,13 +303,13 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
if (ret < min_ret) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return io_setup_async_msg(req, kmsg, issue_flags);
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_msg(req, kmsg, issue_flags);
}
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
req_set_fail(req);
}
/* fast path, check for non-NULL to avoid function call */
@@ -283,7 +327,8 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
int io_send(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct sockaddr_storage __address;
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct msghdr msg;
struct iovec iov;
struct socket *sock;
@@ -291,9 +336,29 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
int min_ret = 0;
int ret;
+ msg.msg_name = NULL;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_namelen = 0;
+ msg.msg_ubuf = NULL;
+
+ if (sr->addr) {
+ if (req_has_async_data(req)) {
+ struct io_async_msghdr *io = req->async_data;
+
+ msg.msg_name = &io->addr;
+ } else {
+ ret = move_addr_to_kernel(sr->addr, sr->addr_len, &__address);
+ if (unlikely(ret < 0))
+ return ret;
+ msg.msg_name = (struct sockaddr *)&__address;
+ }
+ msg.msg_namelen = sr->addr_len;
+ }
+
if (!(req->flags & REQ_F_POLLED) &&
(sr->flags & IORING_RECVSEND_POLL_FIRST))
- return -EAGAIN;
+ return io_setup_async_addr(req, &__address, issue_flags);
sock = sock_from_file(req->file);
if (unlikely(!sock))
@@ -303,12 +368,6 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(ret))
return ret;
- msg.msg_name = NULL;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_namelen = 0;
- msg.msg_ubuf = NULL;
-
flags = sr->msg_flags;
if (issue_flags & IO_URING_F_NONBLOCK)
flags |= MSG_DONTWAIT;
@@ -319,16 +378,17 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
ret = sock_sendmsg(sock, &msg);
if (ret < min_ret) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
- return -EAGAIN;
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
+ return io_setup_async_addr(req, &__address, issue_flags);
+
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
- return -EAGAIN;
+ return io_setup_async_addr(req, &__address, issue_flags);
}
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
req_set_fail(req);
}
if (ret >= 0)
@@ -357,7 +417,7 @@ static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct user_msghdr msg;
int ret;
@@ -404,7 +464,7 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
struct io_async_msghdr *iomsg)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct compat_msghdr msg;
struct compat_iovec __user *uiov;
int ret;
@@ -422,7 +482,6 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
if (msg.msg_iovlen == 0) {
sr->len = 0;
- iomsg->free_iov = NULL;
} else if (msg.msg_iovlen > 1) {
return -EINVAL;
} else {
@@ -433,7 +492,6 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
if (clen < 0)
return -EINVAL;
sr->len = clen;
- iomsg->free_iov = NULL;
}
if (req->flags & REQ_F_APOLL_MULTISHOT) {
@@ -472,6 +530,8 @@ int io_recvmsg_prep_async(struct io_kiocb *req)
{
int ret;
+ if (!io_msg_alloc_async_prep(req))
+ return -ENOMEM;
ret = io_recvmsg_copy_hdr(req, req->async_data);
if (!ret)
req->flags |= REQ_F_NEED_CLEANUP;
@@ -482,7 +542,7 @@ int io_recvmsg_prep_async(struct io_kiocb *req)
int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
@@ -517,7 +577,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static inline void io_recv_prep_retry(struct io_kiocb *req)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
sr->len = 0; /* get from the provided buffer */
@@ -575,12 +635,12 @@ static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
if (kmsg->controllen) {
unsigned long control = ubuf + hdr - kmsg->controllen;
- kmsg->msg.msg_control_user = (void *) control;
+ kmsg->msg.msg_control_user = (void __user *) control;
kmsg->msg.msg_controllen = kmsg->controllen;
}
sr->buf = *buf; /* stash for later copy */
- *buf = (void *) (ubuf + hdr);
+ *buf = (void __user *) (ubuf + hdr);
kmsg->payloadlen = *len = *len - hdr;
return 0;
}
@@ -646,7 +706,7 @@ static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr iomsg, *kmsg;
struct socket *sock;
unsigned int cflags;
@@ -719,13 +779,13 @@ retry_multishot:
}
return ret;
}
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
return io_setup_async_msg(req, kmsg, issue_flags);
}
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
req_set_fail(req);
@@ -758,7 +818,7 @@ retry_multishot:
int io_recv(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req);
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct msghdr msg;
struct socket *sock;
struct iovec iov;
@@ -815,8 +875,6 @@ retry_multishot:
return -EAGAIN;
}
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
sr->buf += ret;
@@ -824,6 +882,8 @@ retry_multishot:
req->flags |= REQ_F_PARTIAL_IO;
return -EAGAIN;
}
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
req_set_fail(req);
} else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
out_free:
@@ -847,18 +907,46 @@ out_free:
return ret;
}
-int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+void io_send_zc_cleanup(struct io_kiocb *req)
{
- struct io_sendzc *zc = io_kiocb_to_cmd(req);
+ struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *io;
+
+ if (req_has_async_data(req)) {
+ io = req->async_data;
+ /* might be ->fast_iov if *msg_copy_hdr failed */
+ if (io->free_iov != io->fast_iov)
+ kfree(io->free_iov);
+ }
+ if (zc->notif) {
+ io_notif_flush(zc->notif);
+ zc->notif = NULL;
+ }
+}
+
+int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_ring_ctx *ctx = req->ctx;
+ struct io_kiocb *notif;
- if (READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))
+ if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
+ return -EINVAL;
+ /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
+ if (req->flags & REQ_F_CQE_SKIP)
return -EINVAL;
zc->flags = READ_ONCE(sqe->ioprio);
if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
- IORING_RECVSEND_FIXED_BUF | IORING_RECVSEND_NOTIF_FLUSH))
+ IORING_RECVSEND_FIXED_BUF))
return -EINVAL;
+ notif = zc->notif = io_alloc_notif(ctx);
+ if (!notif)
+ return -ENOMEM;
+ notif->cqe.user_data = req->cqe.user_data;
+ notif->cqe.res = 0;
+ notif->cqe.flags = IORING_CQE_F_NOTIF;
+ req->flags |= REQ_F_NEED_CLEANUP;
if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
unsigned idx = READ_ONCE(sqe->buf_index);
@@ -866,18 +954,28 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EFAULT;
idx = array_index_nospec(idx, ctx->nr_user_bufs);
req->imu = READ_ONCE(ctx->user_bufs[idx]);
- io_req_set_rsrc_node(req, ctx, 0);
+ io_req_set_rsrc_node(notif, ctx, 0);
+ }
+
+ if (req->opcode == IORING_OP_SEND_ZC) {
+ if (READ_ONCE(sqe->__pad3[0]))
+ return -EINVAL;
+ zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ zc->addr_len = READ_ONCE(sqe->addr_len);
+ } else {
+ if (unlikely(sqe->addr2 || sqe->file_index))
+ return -EINVAL;
+ if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
+ return -EINVAL;
}
zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
zc->len = READ_ONCE(sqe->len);
zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
- zc->slot_idx = READ_ONCE(sqe->notification_idx);
if (zc->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
- zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
- zc->addr_len = READ_ONCE(sqe->addr_len);
+ zc->done_io = 0;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
@@ -886,6 +984,13 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
+static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb,
+ struct iov_iter *from, size_t length)
+{
+ skb_zcopy_downgrade_managed(skb);
+ return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
+}
+
static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
struct iov_iter *from, size_t length)
{
@@ -896,13 +1001,10 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
ssize_t copied = 0;
unsigned long truesize = 0;
- if (!shinfo->nr_frags)
+ if (!frag)
shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
-
- if (!skb_zcopy_managed(skb) || !iov_iter_is_bvec(from)) {
- skb_zcopy_downgrade_managed(skb);
+ else if (unlikely(!skb_zcopy_managed(skb)))
return __zerocopy_sg_from_iter(NULL, sk, skb, from, length);
- }
bi.bi_size = min(from->count, length);
bi.bi_bvec_done = from->iov_offset;
@@ -923,7 +1025,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
shinfo->nr_frags = frag;
from->bvec += bi.bi_idx;
from->nr_segs -= bi.bi_idx;
- from->count = bi.bi_size;
+ from->count -= copied;
from->iov_offset = bi.bi_bvec_done;
skb->data_len += copied;
@@ -940,62 +1042,58 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
return ret;
}
-int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
+int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
{
- struct sockaddr_storage address;
- struct io_ring_ctx *ctx = req->ctx;
- struct io_sendzc *zc = io_kiocb_to_cmd(req);
- struct io_notif_slot *notif_slot;
- struct io_kiocb *notif;
+ struct sockaddr_storage __address;
+ struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
struct msghdr msg;
struct iovec iov;
struct socket *sock;
unsigned msg_flags;
int ret, min_ret = 0;
- if (!(req->flags & REQ_F_POLLED) &&
- (zc->flags & IORING_RECVSEND_POLL_FIRST))
- return -EAGAIN;
-
- if (issue_flags & IO_URING_F_UNLOCKED)
- return -EAGAIN;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
- notif_slot = io_get_notif_slot(ctx, zc->slot_idx);
- if (!notif_slot)
- return -EINVAL;
- notif = io_get_notif(ctx, notif_slot);
- if (!notif)
- return -ENOMEM;
-
msg.msg_name = NULL;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
+ if (zc->addr) {
+ if (req_has_async_data(req)) {
+ struct io_async_msghdr *io = req->async_data;
+
+ msg.msg_name = &io->addr;
+ } else {
+ ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
+ if (unlikely(ret < 0))
+ return ret;
+ msg.msg_name = (struct sockaddr *)&__address;
+ }
+ msg.msg_namelen = zc->addr_len;
+ }
+
+ if (!(req->flags & REQ_F_POLLED) &&
+ (zc->flags & IORING_RECVSEND_POLL_FIRST))
+ return io_setup_async_addr(req, &__address, issue_flags);
+
if (zc->flags & IORING_RECVSEND_FIXED_BUF) {
ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu,
(u64)(uintptr_t)zc->buf, zc->len);
if (unlikely(ret))
- return ret;
+ return ret;
+ msg.sg_from_iter = io_sg_from_iter;
} else {
ret = import_single_range(WRITE, zc->buf, zc->len, &iov,
&msg.msg_iter);
if (unlikely(ret))
return ret;
- ret = io_notif_account_mem(notif, zc->len);
+ ret = io_notif_account_mem(zc->notif, zc->len);
if (unlikely(ret))
return ret;
- }
-
- if (zc->addr) {
- ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address);
- if (unlikely(ret < 0))
- return ret;
- msg.msg_name = (struct sockaddr *)&address;
- msg.msg_namelen = zc->addr_len;
+ msg.sg_from_iter = io_sg_from_iter_iovec;
}
msg_flags = zc->msg_flags | MSG_ZEROCOPY;
@@ -1005,25 +1103,129 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
min_ret = iov_iter_count(&msg.msg_iter);
msg.msg_flags = msg_flags;
- msg.msg_ubuf = &io_notif_to_data(notif)->uarg;
- msg.sg_from_iter = io_sg_from_iter;
+ msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
ret = sock_sendmsg(sock, &msg);
if (unlikely(ret < min_ret)) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
- return -EAGAIN;
- return ret == -ERESTARTSYS ? -EINTR : ret;
+ return io_setup_async_addr(req, &__address, issue_flags);
+
+ if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
+ zc->len -= ret;
+ zc->buf += ret;
+ zc->done_io += ret;
+ req->flags |= REQ_F_PARTIAL_IO;
+ return io_setup_async_addr(req, &__address, issue_flags);
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ req_set_fail(req);
}
- if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH)
- io_notif_slot_flush_submit(notif_slot, 0);
- io_req_set_res(req, ret, 0);
+ if (ret >= 0)
+ ret += zc->done_io;
+ else if (zc->done_io)
+ ret = zc->done_io;
+
+ /*
+ * If we're in io-wq we can't rely on tw ordering guarantees, defer
+ * flushing notif to io_send_zc_cleanup()
+ */
+ if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ io_notif_flush(zc->notif);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ }
+ io_req_set_res(req, ret, IORING_CQE_F_MORE);
+ return IOU_OK;
+}
+
+int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr iomsg, *kmsg;
+ struct socket *sock;
+ unsigned flags;
+ int ret, min_ret = 0;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ if (req_has_async_data(req)) {
+ kmsg = req->async_data;
+ } else {
+ ret = io_sendmsg_copy_hdr(req, &iomsg);
+ if (ret)
+ return ret;
+ kmsg = &iomsg;
+ }
+
+ if (!(req->flags & REQ_F_POLLED) &&
+ (sr->flags & IORING_RECVSEND_POLL_FIRST))
+ return io_setup_async_msg(req, kmsg, issue_flags);
+
+ flags = sr->msg_flags | MSG_ZEROCOPY;
+ if (issue_flags & IO_URING_F_NONBLOCK)
+ flags |= MSG_DONTWAIT;
+ if (flags & MSG_WAITALL)
+ min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
+ kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
+ kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
+ ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
+
+ if (unlikely(ret < min_ret)) {
+ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+ return io_setup_async_msg(req, kmsg, issue_flags);
+
+ if (ret > 0 && io_net_retry(sock, flags)) {
+ sr->done_io += ret;
+ req->flags |= REQ_F_PARTIAL_IO;
+ return io_setup_async_msg(req, kmsg, issue_flags);
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ req_set_fail(req);
+ }
+ /* fast path, check for non-NULL to avoid function call */
+ if (kmsg->free_iov) {
+ kfree(kmsg->free_iov);
+ kmsg->free_iov = NULL;
+ }
+
+ io_netmsg_recycle(req, issue_flags);
+ if (ret >= 0)
+ ret += sr->done_io;
+ else if (sr->done_io)
+ ret = sr->done_io;
+
+ /*
+ * If we're in io-wq we can't rely on tw ordering guarantees, defer
+ * flushing notif to io_send_zc_cleanup()
+ */
+ if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+ io_notif_flush(sr->notif);
+ req->flags &= ~REQ_F_NEED_CLEANUP;
+ }
+ io_req_set_res(req, ret, IORING_CQE_F_MORE);
return IOU_OK;
}
+void io_sendrecv_fail(struct io_kiocb *req)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+
+ if (req->flags & REQ_F_PARTIAL_IO)
+ req->cqe.res = sr->done_io;
+
+ if ((req->flags & REQ_F_NEED_CLEANUP) &&
+ (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
+ req->cqe.flags |= IORING_CQE_F_MORE;
+}
+
int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_accept *accept = io_kiocb_to_cmd(req);
+ struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
unsigned flags;
if (sqe->len || sqe->buf_index)
@@ -1057,7 +1259,7 @@ int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int io_accept(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
- struct io_accept *accept = io_kiocb_to_cmd(req);
+ struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
bool fixed = !!accept->file_slot;
@@ -1115,7 +1317,7 @@ retry:
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_socket *sock = io_kiocb_to_cmd(req);
+ struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
if (sqe->addr || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
@@ -1136,7 +1338,7 @@ int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int io_socket(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_socket *sock = io_kiocb_to_cmd(req);
+ struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
bool fixed = !!sock->file_slot;
struct file *file;
int ret, fd;
@@ -1170,14 +1372,14 @@ int io_socket(struct io_kiocb *req, unsigned int issue_flags)
int io_connect_prep_async(struct io_kiocb *req)
{
struct io_async_connect *io = req->async_data;
- struct io_connect *conn = io_kiocb_to_cmd(req);
+ struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
}
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_connect *conn = io_kiocb_to_cmd(req);
+ struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
@@ -1189,7 +1391,7 @@ int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int io_connect(struct io_kiocb *req, unsigned int issue_flags)
{
- struct io_connect *connect = io_kiocb_to_cmd(req);
+ struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
struct io_async_connect __io, *io;
unsigned file_flags;
int ret;