summaryrefslogtreecommitdiff
path: root/io_uring/net.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/net.c')
-rw-r--r--io_uring/net.c511
1 files changed, 284 insertions, 227 deletions
diff --git a/io_uring/net.c b/io_uring/net.c
index 5d0b56ff50ee..8944eb679024 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -16,6 +16,7 @@
#include "net.h"
#include "notif.h"
#include "rsrc.h"
+#include "zcrx.h"
#if defined(CONFIG_NET)
struct io_shutdown {
@@ -75,7 +76,7 @@ struct io_sr_msg {
u16 flags;
/* initialised and used only by !msg send variants */
u16 buf_group;
- u16 buf_index;
+ bool retry;
void __user *msg_control;
/* used only for send zerocopy */
struct io_kiocb *notif;
@@ -88,6 +89,14 @@ struct io_sr_msg {
*/
#define MULTISHOT_MAX_RETRY 32
+struct io_recvzc {
+ struct file *file;
+ unsigned msg_flags;
+ u16 flags;
+ u32 len;
+ struct io_zcrx_ifq *ifq;
+};
+
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
@@ -127,11 +136,8 @@ static bool io_net_retry(struct socket *sock, int flags)
static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
{
- if (kmsg->free_iov) {
- kfree(kmsg->free_iov);
- kmsg->free_iov_nr = 0;
- kmsg->free_iov = NULL;
- }
+ if (kmsg->vec.iovec)
+ io_vec_free(&kmsg->vec);
}
static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
@@ -145,10 +151,13 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
}
/* Let normal cleanup path reap it if we fail adding to the cache */
- io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr);
+ io_alloc_cache_vec_kasan(&hdr->vec);
+ if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP)
+ io_vec_free(&hdr->vec);
+
if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
req->async_data = NULL;
- req->flags &= ~REQ_F_ASYNC_DATA;
+ req->flags &= ~(REQ_F_ASYNC_DATA|REQ_F_NEED_CLEANUP);
}
}
@@ -162,7 +171,7 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
return NULL;
/* If the async data was cached, we might have an iov cached inside. */
- if (hdr->free_iov)
+ if (hdr->vec.iovec)
req->flags |= REQ_F_NEED_CLEANUP;
return hdr;
}
@@ -173,10 +182,7 @@ static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg
{
if (iov) {
req->flags |= REQ_F_NEED_CLEANUP;
- kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs;
- if (kmsg->free_iov)
- kfree(kmsg->free_iov);
- kmsg->free_iov = iov;
+ io_vec_reset_iovec(&kmsg->vec, iov, kmsg->msg.msg_iter.nr_segs);
}
}
@@ -187,120 +193,135 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
req->flags &= ~REQ_F_BL_EMPTY;
sr->done_io = 0;
+ sr->retry = false;
sr->len = 0; /* get from the provided buffer */
req->buf_index = sr->buf_group;
}
-#ifdef CONFIG_COMPAT
-static int io_compat_msg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg,
- struct compat_msghdr *msg, int ddir)
+static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
+ const struct iovec __user *uiov, unsigned uvec_seg,
+ int ddir)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- struct compat_iovec __user *uiov;
struct iovec *iov;
int ret, nr_segs;
- if (iomsg->free_iov) {
- nr_segs = iomsg->free_iov_nr;
- iov = iomsg->free_iov;
+ if (iomsg->vec.iovec) {
+ nr_segs = iomsg->vec.nr;
+ iov = iomsg->vec.iovec;
} else {
- iov = &iomsg->fast_iov;
nr_segs = 1;
+ iov = &iomsg->fast_iov;
}
+ ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov,
+ &iomsg->msg.msg_iter, io_is_compat(req->ctx));
+ if (unlikely(ret < 0))
+ return ret;
+ io_net_vec_assign(req, iomsg, iov);
+ return 0;
+}
+
+static int io_compat_msg_copy_hdr(struct io_kiocb *req,
+ struct io_async_msghdr *iomsg,
+ struct compat_msghdr *msg, int ddir,
+ struct sockaddr __user **save_addr)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct compat_iovec __user *uiov;
+ int ret;
+
if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
return -EFAULT;
+ ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr);
+ if (ret)
+ return ret;
+
uiov = compat_ptr(msg->msg_iov);
if (req->flags & REQ_F_BUFFER_SELECT) {
- compat_ssize_t clen;
-
if (msg->msg_iovlen == 0) {
- sr->len = iov->iov_len = 0;
- iov->iov_base = NULL;
+ sr->len = 0;
} else if (msg->msg_iovlen > 1) {
return -EINVAL;
} else {
- if (!access_ok(uiov, sizeof(*uiov)))
- return -EFAULT;
- if (__get_user(clen, &uiov->iov_len))
+ struct compat_iovec tmp_iov;
+
+ if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
return -EFAULT;
- if (clen < 0)
- return -EINVAL;
- sr->len = clen;
+ sr->len = tmp_iov.iov_len;
}
-
- return 0;
}
-
- ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
- nr_segs, &iov, &iomsg->msg.msg_iter, true);
- if (unlikely(ret < 0))
- return ret;
-
- io_net_vec_assign(req, iomsg, iov);
return 0;
}
-#endif
-static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
- struct user_msghdr *msg, int ddir)
+static int io_copy_msghdr_from_user(struct user_msghdr *msg,
+ struct user_msghdr __user *umsg)
{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- struct user_msghdr __user *umsg = sr->umsg;
- struct iovec *iov;
- int ret, nr_segs;
-
- if (iomsg->free_iov) {
- nr_segs = iomsg->free_iov_nr;
- iov = iomsg->free_iov;
- } else {
- iov = &iomsg->fast_iov;
- nr_segs = 1;
- }
-
if (!user_access_begin(umsg, sizeof(*umsg)))
return -EFAULT;
-
- ret = -EFAULT;
unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
+ user_access_end();
+ return 0;
+ua_end:
+ user_access_end();
+ return -EFAULT;
+}
+
+static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
+ struct user_msghdr *msg, int ddir,
+ struct sockaddr __user **save_addr)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct user_msghdr __user *umsg = sr->umsg;
+ int ret;
+
+ iomsg->msg.msg_name = &iomsg->addr;
+ iomsg->msg.msg_iter.nr_segs = 0;
+
+ if (io_is_compat(req->ctx)) {
+ struct compat_msghdr cmsg;
+
+ ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr);
+ if (ret)
+ return ret;
+
+ memset(msg, 0, sizeof(*msg));
+ msg->msg_namelen = cmsg.msg_namelen;
+ msg->msg_controllen = cmsg.msg_controllen;
+ msg->msg_iov = compat_ptr(cmsg.msg_iov);
+ msg->msg_iovlen = cmsg.msg_iovlen;
+ return 0;
+ }
+
+ ret = io_copy_msghdr_from_user(msg, umsg);
+ if (unlikely(ret))
+ return ret;
+
msg->msg_flags = 0;
+ ret = __copy_msghdr(&iomsg->msg, msg, save_addr);
+ if (ret)
+ return ret;
+
if (req->flags & REQ_F_BUFFER_SELECT) {
if (msg->msg_iovlen == 0) {
- sr->len = iov->iov_len = 0;
- iov->iov_base = NULL;
+ sr->len = 0;
} else if (msg->msg_iovlen > 1) {
- ret = -EINVAL;
- goto ua_end;
+ return -EINVAL;
} else {
struct iovec __user *uiov = msg->msg_iov;
+ struct iovec tmp_iov;
- /* we only need the length for provided buffers */
- if (!access_ok(&uiov->iov_len, sizeof(uiov->iov_len)))
- goto ua_end;
- unsafe_get_user(iov->iov_len, &uiov->iov_len, ua_end);
- sr->len = iov->iov_len;
+ if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
+ return -EFAULT;
+ sr->len = tmp_iov.iov_len;
}
- ret = 0;
-ua_end:
- user_access_end();
- return ret;
}
-
- user_access_end();
- ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs,
- &iov, &iomsg->msg.msg_iter, false);
- if (unlikely(ret < 0))
- return ret;
-
- io_net_vec_assign(req, iomsg, iov);
return 0;
}
@@ -311,29 +332,13 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
struct user_msghdr msg;
int ret;
- iomsg->msg.msg_name = &iomsg->addr;
- iomsg->msg.msg_iter.nr_segs = 0;
-
-#ifdef CONFIG_COMPAT
- if (unlikely(req->ctx->compat)) {
- struct compat_msghdr cmsg;
-
- ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
- if (unlikely(ret))
- return ret;
-
- ret = __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
- sr->msg_control = iomsg->msg.msg_control_user;
- return ret;
- }
-#endif
-
- ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
+ ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE, NULL);
if (unlikely(ret))
return ret;
- ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
-
+ if (!(req->flags & REQ_F_BUFFER_SELECT))
+ ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen,
+ ITER_SOURCE);
/* save msg_control as sys_sendmsg() overwrites it */
sr->msg_control = iomsg->msg.msg_control_user;
return ret;
@@ -387,14 +392,31 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr *kmsg = req->async_data;
+
+ sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ return io_sendmsg_copy_hdr(req, kmsg);
+}
+
+static int io_sendmsg_zc_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *kmsg = req->async_data;
+ struct user_msghdr msg;
int ret;
+ if (!(sr->flags & IORING_RECVSEND_FIXED_BUF))
+ return io_sendmsg_setup(req, sqe);
+
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
- ret = io_sendmsg_copy_hdr(req, kmsg);
- if (!ret)
- req->flags |= REQ_F_NEED_CLEANUP;
- return ret;
+ ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL);
+ if (unlikely(ret))
+ return ret;
+ sr->msg_control = kmsg->msg.msg_control_user;
+ kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen;
+
+ return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, msg.msg_iovlen);
}
#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
@@ -404,6 +426,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
+ sr->retry = false;
if (req->opcode != IORING_OP_SEND) {
if (sqe->addr2 || sqe->file_index)
@@ -425,12 +448,12 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->msg_flags |= MSG_WAITALL;
sr->buf_group = req->buf_index;
req->buf_list = NULL;
+ req->flags |= REQ_F_MULTISHOT;
}
-#ifdef CONFIG_COMPAT
- if (req->ctx->compat)
+ if (io_is_compat(req->ctx))
sr->msg_flags |= MSG_CMSG_COMPAT;
-#endif
+
if (unlikely(!io_msg_alloc_async(req)))
return -ENOMEM;
if (req->opcode != IORING_OP_SENDMSG)
@@ -441,7 +464,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static void io_req_msg_cleanup(struct io_kiocb *req,
unsigned int issue_flags)
{
- req->flags &= ~REQ_F_NEED_CLEANUP;
io_netmsg_recycle(req, issue_flags);
}
@@ -464,7 +486,7 @@ static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
if (iter_is_ubuf(&kmsg->msg.msg_iter))
return 1;
- iov = kmsg->free_iov;
+ iov = kmsg->vec.iovec;
if (!iov)
iov = &kmsg->fast_iov;
@@ -580,9 +602,9 @@ static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
.nr_iovs = 1,
};
- if (kmsg->free_iov) {
- arg.nr_iovs = kmsg->free_iov_nr;
- arg.iovs = kmsg->free_iov;
+ if (kmsg->vec.iovec) {
+ arg.nr_iovs = kmsg->vec.nr;
+ arg.iovs = kmsg->vec.iovec;
arg.mode = KBUF_MODE_FREE;
}
@@ -595,9 +617,9 @@ static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
if (unlikely(ret < 0))
return ret;
- if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
- kmsg->free_iov_nr = ret;
- kmsg->free_iov = arg.iovs;
+ if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
+ kmsg->vec.nr = ret;
+ kmsg->vec.iovec = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
sr->len = arg.out_len;
@@ -712,34 +734,16 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
struct user_msghdr msg;
int ret;
- iomsg->msg.msg_name = &iomsg->addr;
- iomsg->msg.msg_iter.nr_segs = 0;
-
-#ifdef CONFIG_COMPAT
- if (unlikely(req->ctx->compat)) {
- struct compat_msghdr cmsg;
-
- ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
- if (unlikely(ret))
- return ret;
+ ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr);
+ if (unlikely(ret))
+ return ret;
- ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
+ if (!(req->flags & REQ_F_BUFFER_SELECT)) {
+ ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen,
+ ITER_DEST);
if (unlikely(ret))
return ret;
-
- return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
- cmsg.msg_controllen);
}
-#endif
-
- ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
- if (unlikely(ret))
- return ret;
-
- ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
- if (unlikely(ret))
- return ret;
-
return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
msg.msg_controllen);
}
@@ -773,10 +777,7 @@ static int io_recvmsg_prep_setup(struct io_kiocb *req)
return 0;
}
- ret = io_recvmsg_copy_hdr(req, kmsg);
- if (!ret)
- req->flags |= REQ_F_NEED_CLEANUP;
- return ret;
+ return io_recvmsg_copy_hdr(req, kmsg);
}
#define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
@@ -787,6 +788,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
sr->done_io = 0;
+ sr->retry = false;
if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
@@ -827,14 +829,16 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EINVAL;
}
-#ifdef CONFIG_COMPAT
- if (req->ctx->compat)
+ if (io_is_compat(req->ctx))
sr->msg_flags |= MSG_CMSG_COMPAT;
-#endif
+
sr->nr_multishot_loops = 0;
return io_recvmsg_prep_setup(req);
}
+/* bits to clear in old and inherit in new cflags on bundle retry */
+#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
+
/*
* Finishes io_recv and io_recvmsg.
*
@@ -854,9 +858,19 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
if (sr->flags & IORING_RECVSEND_BUNDLE) {
cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
issue_flags);
+ if (sr->retry)
+ cflags = req->cqe.flags | (cflags & CQE_F_MASK);
/* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY)
goto finish;
+ /* if more is available, retry and append to this one */
+ if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) {
+ req->cqe.flags = cflags & ~CQE_F_MASK;
+ sr->len = kmsg->msg.msg_inq;
+ sr->done_io += *ret;
+ sr->retry = true;
+ return false;
+ }
} else {
cflags |= io_put_kbuf(req, *ret, issue_flags);
}
@@ -867,8 +881,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
*/
if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
- int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
-
+ *ret = IOU_RETRY;
io_mshot_prep_retry(req, kmsg);
/* Known not-empty or unknown state, retry */
if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
@@ -876,23 +889,16 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
return false;
/* mshot retries exceeded, force a requeue */
sr->nr_multishot_loops = 0;
- mshot_retry_ret = IOU_REQUEUE;
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ *ret = IOU_REQUEUE;
}
- if (issue_flags & IO_URING_F_MULTISHOT)
- *ret = mshot_retry_ret;
- else
- *ret = -EAGAIN;
return true;
}
/* Finish the request / stop multishot. */
finish:
io_req_set_res(req, *ret, cflags);
-
- if (issue_flags & IO_URING_F_MULTISHOT)
- *ret = IOU_STOP_MULTISHOT;
- else
- *ret = IOU_OK;
+ *ret = IOU_COMPLETE;
io_req_msg_cleanup(req, issue_flags);
return true;
}
@@ -1039,16 +1045,15 @@ retry_multishot:
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock) {
- if (issue_flags & IO_URING_F_MULTISHOT) {
+ if (issue_flags & IO_URING_F_MULTISHOT)
io_kbuf_recycle(req, issue_flags);
- return IOU_ISSUE_SKIP_COMPLETE;
- }
- return -EAGAIN;
+
+ return IOU_RETRY;
}
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
req->flags |= REQ_F_BL_NO_RECYCLE;
- return -EAGAIN;
+ return IOU_RETRY;
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -1089,9 +1094,9 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
.mode = KBUF_MODE_EXPAND,
};
- if (kmsg->free_iov) {
- arg.nr_iovs = kmsg->free_iov_nr;
- arg.iovs = kmsg->free_iov;
+ if (kmsg->vec.iovec) {
+ arg.nr_iovs = kmsg->vec.nr;
+ arg.iovs = kmsg->vec.iovec;
arg.mode |= KBUF_MODE_FREE;
}
@@ -1110,9 +1115,9 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
}
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
arg.out_len);
- if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
- kmsg->free_iov_nr = ret;
- kmsg->free_iov = arg.iovs;
+ if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
+ kmsg->vec.nr = ret;
+ kmsg->vec.iovec = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
} else {
@@ -1176,12 +1181,10 @@ retry_multishot:
ret = sock_recvmsg(sock, &kmsg->msg, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock) {
- if (issue_flags & IO_URING_F_MULTISHOT) {
+ if (issue_flags & IO_URING_F_MULTISHOT)
io_kbuf_recycle(req, issue_flags);
- return IOU_ISSUE_SKIP_COMPLETE;
- }
- return -EAGAIN;
+ return IOU_RETRY;
}
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
@@ -1212,6 +1215,73 @@ out_free:
return ret;
}
+int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
+ unsigned ifq_idx;
+
+ if (unlikely(sqe->file_index || sqe->addr2 || sqe->addr ||
+ sqe->addr3))
+ return -EINVAL;
+
+ ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
+ if (ifq_idx != 0)
+ return -EINVAL;
+ zc->ifq = req->ctx->ifq;
+ if (!zc->ifq)
+ return -EINVAL;
+ zc->len = READ_ONCE(sqe->len);
+ zc->flags = READ_ONCE(sqe->ioprio);
+ zc->msg_flags = READ_ONCE(sqe->msg_flags);
+ if (zc->msg_flags)
+ return -EINVAL;
+ if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT))
+ return -EINVAL;
+ /* multishot required */
+ if (!(zc->flags & IORING_RECV_MULTISHOT))
+ return -EINVAL;
+ /* All data completions are posted as aux CQEs. */
+ req->flags |= REQ_F_APOLL_MULTISHOT;
+
+ return 0;
+}
+
+int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
+ struct socket *sock;
+ unsigned int len;
+ int ret;
+
+ if (!(req->flags & REQ_F_POLLED) &&
+ (zc->flags & IORING_RECVSEND_POLL_FIRST))
+ return -EAGAIN;
+
+ sock = sock_from_file(req->file);
+ if (unlikely(!sock))
+ return -ENOTSOCK;
+
+ len = zc->len;
+ ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT,
+ issue_flags, &zc->len);
+ if (len && zc->len == 0) {
+ io_req_set_res(req, 0, 0);
+
+ return IOU_COMPLETE;
+ }
+ if (unlikely(ret <= 0) && ret != -EAGAIN) {
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ if (ret == IOU_REQUEUE)
+ return IOU_REQUEUE;
+
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return IOU_COMPLETE;
+ }
+ return IOU_RETRY;
+}
+
void io_send_zc_cleanup(struct io_kiocb *req)
{
struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
@@ -1235,6 +1305,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_kiocb *notif;
zc->done_io = 0;
+ zc->retry = false;
req->flags |= REQ_F_POLL_NO_LAZY;
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
@@ -1267,25 +1338,24 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->opcode != IORING_OP_SEND_ZC) {
if (unlikely(sqe->addr2 || sqe->file_index))
return -EINVAL;
- if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
- return -EINVAL;
}
zc->len = READ_ONCE(sqe->len);
zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
- zc->buf_index = READ_ONCE(sqe->buf_index);
+ req->buf_index = READ_ONCE(sqe->buf_index);
if (zc->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
-#ifdef CONFIG_COMPAT
- if (req->ctx->compat)
+ if (io_is_compat(req->ctx))
zc->msg_flags |= MSG_CMSG_COMPAT;
-#endif
+
if (unlikely(!io_msg_alloc_async(req)))
return -ENOMEM;
- if (req->opcode != IORING_OP_SENDMSG_ZC)
+ if (req->opcode == IORING_OP_SEND_ZC) {
+ req->flags |= REQ_F_IMPORT_BUFFER;
return io_send_setup(req, sqe);
- return io_sendmsg_setup(req, sqe);
+ }
+ return io_sendmsg_zc_setup(req, sqe);
}
static int io_sg_from_iter_iovec(struct sk_buff *skb,
@@ -1345,24 +1415,10 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
int ret;
if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
- struct io_ring_ctx *ctx = req->ctx;
- struct io_rsrc_node *node;
-
- ret = -EFAULT;
- io_ring_submit_lock(ctx, issue_flags);
- node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index);
- if (node) {
- io_req_assign_buf_node(sr->notif, node);
- ret = 0;
- }
- io_ring_submit_unlock(ctx, issue_flags);
-
- if (unlikely(ret))
- return ret;
-
- ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter,
- node->buf, (u64)(uintptr_t)sr->buf,
- sr->len);
+ sr->notif->buf_index = req->buf_index;
+ ret = io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter,
+ (u64)(uintptr_t)sr->buf, sr->len,
+ ITER_SOURCE, issue_flags);
if (unlikely(ret))
return ret;
kmsg->msg.sg_from_iter = io_sg_from_iter;
@@ -1397,7 +1453,8 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
(zc->flags & IORING_RECVSEND_POLL_FIRST))
return -EAGAIN;
- if (!zc->done_io) {
+ if (req->flags & REQ_F_IMPORT_BUFFER) {
+ req->flags &= ~REQ_F_IMPORT_BUFFER;
ret = io_send_zc_import(req, issue_flags);
if (unlikely(ret))
return ret;
@@ -1441,6 +1498,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
*/
if (!(issue_flags & IO_URING_F_UNLOCKED)) {
io_notif_flush(zc->notif);
+ zc->notif = NULL;
io_req_msg_cleanup(req, 0);
}
io_req_set_res(req, ret, IORING_CQE_F_MORE);
@@ -1455,6 +1513,20 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
unsigned flags;
int ret, min_ret = 0;
+ kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
+
+ if (req->flags & REQ_F_IMPORT_BUFFER) {
+ unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;
+ int ret;
+
+ ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, req,
+ &kmsg->vec, uvec_segs, issue_flags);
+ if (unlikely(ret))
+ return ret;
+ kmsg->msg.sg_from_iter = io_sg_from_iter;
+ req->flags &= ~REQ_F_IMPORT_BUFFER;
+ }
+
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
@@ -1473,7 +1545,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
kmsg->msg.msg_control_user = sr->msg_control;
kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
- kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (unlikely(ret < min_ret)) {
@@ -1501,6 +1572,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
*/
if (!(issue_flags & IO_URING_F_UNLOCKED)) {
io_notif_flush(sr->notif);
+ sr->notif = NULL;
io_req_msg_cleanup(req, 0);
}
io_req_set_res(req, ret, IORING_CQE_F_MORE);
@@ -1587,19 +1659,11 @@ retry:
put_unused_fd(fd);
ret = PTR_ERR(file);
if (ret == -EAGAIN && force_nonblock &&
- !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) {
- /*
- * if it's multishot and polled, we don't need to
- * return EAGAIN to arm the poll infra since it
- * has already been done
- */
- if (issue_flags & IO_URING_F_MULTISHOT)
- return IOU_ISSUE_SKIP_COMPLETE;
- return ret;
- }
+ !(accept->iou_flags & IORING_ACCEPT_DONTWAIT))
+ return IOU_RETRY;
+
if (ret == -ERESTARTSYS)
ret = -EINTR;
- req_set_fail(req);
} else if (!fixed) {
fd_install(fd, file);
ret = fd;
@@ -1612,23 +1676,17 @@ retry:
if (!arg.is_empty)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
- if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
- io_req_set_res(req, ret, cflags);
- return IOU_OK;
- }
-
- if (ret < 0)
- return ret;
- if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
+ if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) &&
+ io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
goto retry;
- if (issue_flags & IO_URING_F_MULTISHOT)
- return IOU_ISSUE_SKIP_COMPLETE;
- return -EAGAIN;
+ return IOU_RETRY;
}
io_req_set_res(req, ret, cflags);
- return IOU_STOP_MULTISHOT;
+ if (ret < 0)
+ req_set_fail(req);
+ return IOU_COMPLETE;
}
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -1820,8 +1878,7 @@ void io_netmsg_cache_free(const void *entry)
{
struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
- if (kmsg->free_iov)
- io_netmsg_iovec_free(kmsg);
+ io_vec_free(&kmsg->vec);
kfree(kmsg);
}
#endif