diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-07 08:52:43 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-07 08:52:43 -0700 |
commit | 0a78a376ef3c2f3d397df48909f00cd75f92137a (patch) | |
tree | 49a642a5ada33bf234247baae3ef9648de323caf /include | |
parent | 188943a15638ceb91f960e072ed7609b2d7f2a55 (diff) | |
parent | 108893ddcc4d3aa0a4a02aeb02d478e997001227 (diff) | |
download | lwn-0a78a376ef3c2f3d397df48909f00cd75f92137a.tar.gz lwn-0a78a376ef3c2f3d397df48909f00cd75f92137a.zip |
Merge tag 'for-6.1/io_uring-2022-10-03' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe:
- Add supported for more directly managed task_work running.
This is beneficial for real world applications that end up issuing
lots of system calls as part of handling work. Normal task_work will
always execute as we transition in and out of the kernel, even for
"unrelated" system calls. It's more efficient to defer the handling
of io_uring's deferred work until the application wants it to be run,
generally in batches.
As part of ongoing work to write an io_uring network backend for
Thrift, this has been shown to greatly improve performance. (Dylan)
- Add IOPOLL support for passthrough (Kanchan)
- Improvements and fixes to the send zero-copy support (Pavel)
- Partial IO handling fixes (Pavel)
- CQE ordering fixes around CQ ring overflow (Pavel)
- Support sendto() for non-zc as well (Pavel)
- Support sendmsg for zerocopy (Pavel)
- Networking iov_iter fix (Stefan)
- Misc fixes and cleanups (Pavel, me)
* tag 'for-6.1/io_uring-2022-10-03' of git://git.kernel.dk/linux: (56 commits)
io_uring/net: fix notif cqe reordering
io_uring/net: don't update msg_name if not provided
io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL
io_uring/rw: defer fsnotify calls to task context
io_uring/net: fix fast_iov assignment in io_setup_async_msg()
io_uring/net: fix non-zc send with address
io_uring/net: don't skip notifs for failed requests
io_uring/rw: don't lose short results on io_setup_async_rw()
io_uring/rw: fix unexpected link breakage
io_uring/net: fix cleanup double free free_iov init
io_uring: fix CQE reordering
io_uring/net: fix UAF in io_sendrecv_fail()
selftest/net: adjust io_uring sendzc notif handling
io_uring: ensure local task_work marks task as running
io_uring/net: zerocopy sendmsg
io_uring/net: combine fail handlers
io_uring/net: rename io_sendzc()
io_uring/net: support non-zerocopy sendto
io_uring/net: refactor io_setup_async_addr
io_uring/net: don't lose partial send_zc on fail
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/blk-mq.h | 1 | ||||
-rw-r--r-- | include/linux/eventfd.h | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | include/linux/io_uring.h | 8 | ||||
-rw-r--r-- | include/linux/io_uring_types.h | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | include/trace/events/io_uring.h | 29 | ||||
-rw-r--r-- | include/uapi/linux/io_uring.h | 8 |
8 files changed, 52 insertions, 4 deletions
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 92294a5fb083..de384f5d2c37 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -980,6 +980,7 @@ int blk_rq_map_kern(struct request_queue *, struct request *, void *, int blk_rq_append_bio(struct request *rq, struct bio *bio); void blk_execute_rq_nowait(struct request *rq, bool at_head); blk_status_t blk_execute_rq(struct request *rq, bool at_head); +bool blk_rq_is_poll(struct request *rq); struct req_iterator { struct bvec_iter iter; diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 305d5f19093b..30eb30d6909b 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -46,7 +46,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); static inline bool eventfd_signal_allowed(void) { - return !current->in_eventfd_signal; + return !current->in_eventfd; } #else /* CONFIG_EVENTFD */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 7098f085d32d..619d683eb5fd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2133,6 +2133,8 @@ struct file_operations { loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); + int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, + unsigned int poll_flags); } __randomize_layout; struct inode_operations { diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 4a2f6cc5a492..58676c0a398f 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -20,8 +20,12 @@ enum io_uring_cmd_flags { struct io_uring_cmd { struct file *file; const void *cmd; - /* callback to defer completions to task context */ - void (*task_work_cb)(struct io_uring_cmd *cmd); + union { + /* callback to defer completions to task context */ + void (*task_work_cb)(struct io_uring_cmd *cmd); + /* used for polled completion */ + void *cookie; + }; u32 cmd_op; u32 pad; u8 pdu[32]; /* available inline for free use */ diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 677a25d44d7f..aa4d90a53866 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -184,6 +184,8 @@ struct io_ev_fd { struct eventfd_ctx *cq_ev_fd; unsigned int eventfd_async: 1; struct rcu_head rcu; + atomic_t refs; + atomic_t ops; }; struct io_alloc_cache { @@ -301,6 +303,8 @@ struct io_ring_ctx { struct io_hash_table cancel_table; bool poll_multi_queue; + struct llist_head work_llist; + struct list_head io_buffers_comp; } ____cacheline_aligned_in_smp; diff --git a/include/linux/sched.h b/include/linux/sched.h index e7b2f8a5c711..8d82d6d32670 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -936,7 +936,7 @@ struct task_struct { #endif #ifdef CONFIG_EVENTFD /* Recursion prevention for eventfd_signal() */ - unsigned in_eventfd_signal:1; + unsigned in_eventfd:1; #endif #ifdef CONFIG_IOMMU_SVA unsigned pasid_activated:1; diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index c5b21ff0ac85..936fd41bf147 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -655,6 +655,35 @@ TRACE_EVENT(io_uring_short_write, __entry->wanted, __entry->got) ); +/* + * io_uring_local_work_run - ran ring local task work + * + * @tctx: pointer to a io_uring_ctx + * @count: how many functions it ran + * @loops: how many loops it ran + * + */ +TRACE_EVENT(io_uring_local_work_run, + + TP_PROTO(void *ctx, int count, unsigned int loops), + + TP_ARGS(ctx, count, loops), + + TP_STRUCT__entry ( + __field(void *, ctx ) + __field(int, count ) + __field(unsigned int, loops ) + ), + + TP_fast_assign( + __entry->ctx = ctx; + __entry->count = count; + __entry->loops = loops; + ), + + TP_printk("ring %p, count %d, loops %u", __entry->ctx, __entry->count, __entry->loops) +); + #endif /* _TRACE_IO_URING_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6b83177fd41d..92f29d9505a6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -157,6 +157,13 @@ enum { */ #define IORING_SETUP_SINGLE_ISSUER (1U << 12) +/* + * Defer running task work to get events. + * Rather than running bits of task work whenever the task transitions + * try to do it just before it is needed. + */ +#define IORING_SETUP_DEFER_TASKRUN (1U << 13) + enum io_uring_op { IORING_OP_NOP, IORING_OP_READV, @@ -206,6 +213,7 @@ enum io_uring_op { IORING_OP_SOCKET, IORING_OP_URING_CMD, IORING_OP_SEND_ZC, + IORING_OP_SENDMSG_ZC, /* this goes last, obviously */ IORING_OP_LAST, |