diff options
Diffstat (limited to 'include')
38 files changed, 873 insertions, 194 deletions
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index ff3e82553a76..492dce43236e 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -158,9 +158,24 @@ * Useful if your architecture doesn't use IPIs for remote TLB invalidates * and therefore doesn't naturally serialize with software page-table walkers. * + * MMU_GATHER_NO_FLUSH_CACHE + * + * Indicates the architecture has flush_cache_range() but it needs *NOT* be called + * before unmapping a VMA. + * + * NOTE: strictly speaking we shouldn't have this knob and instead rely on + * flush_cache_range() being a NOP, except Sparc64 seems to be + * different here. + * + * MMU_GATHER_MERGE_VMAS + * + * Indicates the architecture wants to merge ranges over VMAs; typical when + * multiple range invalidates are more expensive than a full invalidate. + * * MMU_GATHER_NO_RANGE * - * Use this if your architecture lacks an efficient flush_tlb_range(). + * Use this if your architecture lacks an efficient flush_tlb_range(). This + * option implies MMU_GATHER_MERGE_VMAS above. * * MMU_GATHER_NO_GATHER * @@ -288,6 +303,7 @@ struct mmu_gather { */ unsigned int vma_exec : 1; unsigned int vma_huge : 1; + unsigned int vma_pfn : 1; unsigned int batch_count; @@ -334,8 +350,8 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) #ifdef CONFIG_MMU_GATHER_NO_RANGE -#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma) -#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma() +#if defined(tlb_flush) +#error MMU_GATHER_NO_RANGE relies on default tlb_flush() #endif /* @@ -352,20 +368,9 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_mm(tlb->mm); } -static inline void -tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - -#define tlb_end_vma tlb_end_vma -static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - #else /* CONFIG_MMU_GATHER_NO_RANGE */ #ifndef tlb_flush - -#if defined(tlb_start_vma) || defined(tlb_end_vma) -#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() -#endif - /* * When an architecture does not provide its own tlb_flush() implementation * but does have a reasonably efficient flush_vma_range() implementation @@ -385,6 +390,9 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_range(&vma, tlb->start, tlb->end); } } +#endif + +#endif /* CONFIG_MMU_GATHER_NO_RANGE */ static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) @@ -402,17 +410,9 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); + tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); } -#else - -static inline void -tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } - -#endif - -#endif /* CONFIG_MMU_GATHER_NO_RANGE */ - static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { /* @@ -486,32 +486,36 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) * case where we're doing a full MM flush. When we're doing a munmap, * the vmas are adjusted to only cover the region to be torn down. */ -#ifndef tlb_start_vma static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; tlb_update_vma_flags(tlb, vma); +#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE flush_cache_range(vma, vma->vm_start, vma->vm_end); -} #endif +} -#ifndef tlb_end_vma static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; /* - * Do a TLB flush and reset the range at VMA boundaries; this avoids - * the ranges growing with the unused space between consecutive VMAs, - * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on - * this. + * VM_PFNMAP is more fragile because the core mm will not track the + * page mapcount -- there might not be page-frames for these PFNs after + * all. Force flush TLBs for such ranges to avoid munmap() vs + * unmap_mapping_range() races. */ - tlb_flush_mmu_tlbonly(tlb); + if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) { + /* + * Do a TLB flush and reset the range at VMA boundaries; this avoids + * the ranges growing with the unused space between consecutive VMAs. + */ + tlb_flush_mmu_tlbonly(tlb); + } } -#endif /* * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 0fca8f38bee4..addb135eeea6 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -28,7 +28,7 @@ #include <linux/dma-fence.h> #include <linux/completion.h> #include <linux/xarray.h> -#include <linux/irq_work.h> +#include <linux/workqueue.h> #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) @@ -295,7 +295,7 @@ struct drm_sched_job { */ union { struct dma_fence_cb finish_cb; - struct irq_work work; + struct work_struct work; }; uint64_t id; diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1bfcfb1af352..d4427d0a0e18 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -264,7 +264,8 @@ struct css_set { * List of csets participating in the on-going migration either as * source or destination. Protected by cgroup_mutex. */ - struct list_head mg_preload_node; + struct list_head mg_src_preload_node; + struct list_head mg_dst_preload_node; struct list_head mg_node; /* diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 2c7477354744..314802f98b9d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, extern ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_retbleed(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 3af34de54330..56d6a0196534 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -149,19 +149,19 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset); * It is used in atomic context when code wants to access the contents of a * page that might be allocated from high memory (see __GFP_HIGHMEM), for * example a page in the pagecache. The API has two functions, and they - * can be used in a manner similar to the following: + * can be used in a manner similar to the following:: * - * -- Find the page of interest. -- - * struct page *page = find_get_page(mapping, offset); + * // Find the page of interest. + * struct page *page = find_get_page(mapping, offset); * - * -- Gain access to the contents of that page. -- - * void *vaddr = kmap_atomic(page); + * // Gain access to the contents of that page. + * void *vaddr = kmap_atomic(page); * - * -- Do something to the contents of that page. -- - * memset(vaddr, 0, PAGE_SIZE); + * // Do something to the contents of that page. + * memset(vaddr, 0, PAGE_SIZE); * - * -- Unmap that page. -- - * kunmap_atomic(vaddr); + * // Unmap that page. + * kunmap_atomic(vaddr); * * Note that the kunmap_atomic() call takes the result of the kmap_atomic() * call, not the argument. diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h new file mode 100644 index 000000000000..d54b8b7e0746 --- /dev/null +++ b/include/linux/io_uring_types.h @@ -0,0 +1,544 @@ +#ifndef IO_URING_TYPES_H +#define IO_URING_TYPES_H + +#include <linux/blkdev.h> +#include <linux/task_work.h> +#include <linux/bitmap.h> +#include <uapi/linux/io_uring.h> + +struct io_wq_work_node { + struct io_wq_work_node *next; +}; + +struct io_wq_work_list { + struct io_wq_work_node *first; + struct io_wq_work_node *last; +}; + +struct io_wq_work { + struct io_wq_work_node list; + unsigned flags; + /* place it here instead of io_kiocb as it fills padding and saves 4B */ + int cancel_seq; +}; + +struct io_fixed_file { + /* file * with additional FFS_* flags */ + unsigned long file_ptr; +}; + +struct io_file_table { + struct io_fixed_file *files; + unsigned long *bitmap; + unsigned int alloc_hint; +}; + +struct io_hash_bucket { + spinlock_t lock; + struct hlist_head list; +} ____cacheline_aligned_in_smp; + +struct io_hash_table { + struct io_hash_bucket *hbs; + unsigned hash_bits; +}; + +struct io_uring { + u32 head ____cacheline_aligned_in_smp; + u32 tail ____cacheline_aligned_in_smp; +}; + +/* + * This data is shared with the application through the mmap at offsets + * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING. + * + * The offsets to the member fields are published through struct + * io_sqring_offsets when calling io_uring_setup. + */ +struct io_rings { + /* + * Head and tail offsets into the ring; the offsets need to be + * masked to get valid indices. + * + * The kernel controls head of the sq ring and the tail of the cq ring, + * and the application controls tail of the sq ring and the head of the + * cq ring. + */ + struct io_uring sq, cq; + /* + * Bitmasks to apply to head and tail offsets (constant, equals + * ring_entries - 1) + */ + u32 sq_ring_mask, cq_ring_mask; + /* Ring sizes (constant, power of 2) */ + u32 sq_ring_entries, cq_ring_entries; + /* + * Number of invalid entries dropped by the kernel due to + * invalid index stored in array + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * After a new SQ head value was read by the application this + * counter includes all submissions that were dropped reaching + * the new SQ head (and possibly more). + */ + u32 sq_dropped; + /* + * Runtime SQ flags + * + * Written by the kernel, shouldn't be modified by the + * application. + * + * The application needs a full memory barrier before checking + * for IORING_SQ_NEED_WAKEUP after updating the sq tail. + */ + atomic_t sq_flags; + /* + * Runtime CQ flags + * + * Written by the application, shouldn't be modified by the + * kernel. + */ + u32 cq_flags; + /* + * Number of completion events lost because the queue was full; + * this should be avoided by the application by making sure + * there are not more requests pending than there is space in + * the completion queue. + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * As completion events come in out of order this counter is not + * ordered with any other data. + */ + u32 cq_overflow; + /* + * Ring buffer of completion events. + * + * The kernel writes completion events fresh every time they are + * produced, so the application is allowed to modify pending + * entries. + */ + struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; +}; + +struct io_restriction { + DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); + DECLARE_BITMAP(sqe_op, IORING_OP_LAST); + u8 sqe_flags_allowed; + u8 sqe_flags_required; + bool registered; +}; + +struct io_submit_link { + struct io_kiocb *head; + struct io_kiocb *last; +}; + +struct io_submit_state { + /* inline/task_work completion list, under ->uring_lock */ + struct io_wq_work_node free_list; + /* batch completion logic */ + struct io_wq_work_list compl_reqs; + struct io_submit_link link; + + bool plug_started; + bool need_plug; + unsigned short submit_nr; + struct blk_plug plug; +}; + +struct io_ev_fd { + struct eventfd_ctx *cq_ev_fd; + unsigned int eventfd_async: 1; + struct rcu_head rcu; +}; + +struct io_alloc_cache { + struct hlist_head list; + unsigned int nr_cached; +}; + +struct io_ring_ctx { + /* const or read-mostly hot data */ + struct { + struct percpu_ref refs; + + struct io_rings *rings; + unsigned int flags; + enum task_work_notify_mode notify_method; + unsigned int compat: 1; + unsigned int drain_next: 1; + unsigned int restricted: 1; + unsigned int off_timeout_used: 1; + unsigned int drain_active: 1; + unsigned int drain_disabled: 1; + unsigned int has_evfd: 1; + unsigned int syscall_iopoll: 1; + } ____cacheline_aligned_in_smp; + + /* submission data */ + struct { + struct mutex uring_lock; + + /* + * Ring buffer of indices into array of io_uring_sqe, which is + * mmapped by the application using the IORING_OFF_SQES offset. + * + * This indirection could e.g. be used to assign fixed + * io_uring_sqe entries to operations and only submit them to + * the queue when needed. + * + * The kernel modifies neither the indices array nor the entries + * array. + */ + u32 *sq_array; + struct io_uring_sqe *sq_sqes; + unsigned cached_sq_head; + unsigned sq_entries; + + /* + * Fixed resources fast path, should be accessed only under + * uring_lock, and updated through io_uring_register(2) + */ + struct io_rsrc_node *rsrc_node; + int rsrc_cached_refs; + atomic_t cancel_seq; + struct io_file_table file_table; + unsigned nr_user_files; + unsigned nr_user_bufs; + struct io_mapped_ubuf **user_bufs; + + struct io_submit_state submit_state; + + struct io_buffer_list *io_bl; + struct xarray io_bl_xa; + struct list_head io_buffers_cache; + + struct io_hash_table cancel_table_locked; + struct list_head cq_overflow_list; + struct io_alloc_cache apoll_cache; + struct io_alloc_cache netmsg_cache; + } ____cacheline_aligned_in_smp; + + /* IRQ completion list, under ->completion_lock */ + struct io_wq_work_list locked_free_list; + unsigned int locked_free_nr; + + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ + struct io_sq_data *sq_data; /* if using sq thread polling */ + + struct wait_queue_head sqo_sq_wait; + struct list_head sqd_list; + + unsigned long check_cq; + + unsigned int file_alloc_start; + unsigned int file_alloc_end; + + struct xarray personalities; + u32 pers_next; + + struct { + /* + * We cache a range of free CQEs we can use, once exhausted it + * should go through a slower range setup, see __io_get_cqe() + */ + struct io_uring_cqe *cqe_cached; + struct io_uring_cqe *cqe_sentinel; + + unsigned cached_cq_tail; + unsigned cq_entries; + struct io_ev_fd __rcu *io_ev_fd; + struct wait_queue_head cq_wait; + unsigned cq_extra; + } ____cacheline_aligned_in_smp; + + struct { + spinlock_t completion_lock; + + /* + * ->iopoll_list is protected by the ctx->uring_lock for + * io_uring instances that don't use IORING_SETUP_SQPOLL. + * For SQPOLL, only the single threaded io_sq_thread() will + * manipulate the list, hence no extra locking is needed there. + */ + struct io_wq_work_list iopoll_list; + struct io_hash_table cancel_table; + bool poll_multi_queue; + + struct list_head io_buffers_comp; + } ____cacheline_aligned_in_smp; + + /* timeouts */ + struct { + spinlock_t timeout_lock; + atomic_t cq_timeouts; + struct list_head timeout_list; + struct list_head ltimeout_list; + unsigned cq_last_tm_flush; + } ____cacheline_aligned_in_smp; + + /* Keep this last, we don't need it for the fast path */ + + struct io_restriction restrictions; + struct task_struct *submitter_task; + + /* slow path rsrc auxilary data, used by update/register */ + struct io_rsrc_node *rsrc_backup_node; + struct io_mapped_ubuf *dummy_ubuf; + struct io_rsrc_data *file_data; + struct io_rsrc_data *buf_data; + + struct delayed_work rsrc_put_work; + struct llist_head rsrc_put_llist; + struct list_head rsrc_ref_list; + spinlock_t rsrc_ref_lock; + + struct list_head io_buffers_pages; + + #if defined(CONFIG_UNIX) + struct socket *ring_sock; + #endif + /* hashed buffered write serialization */ + struct io_wq_hash *hash_map; + + /* Only used for accounting purposes */ + struct user_struct *user; + struct mm_struct *mm_account; + + /* ctx exit and cancelation */ + struct llist_head fallback_llist; + struct delayed_work fallback_work; + struct work_struct exit_work; + struct list_head tctx_list; + struct completion ref_comp; + + /* io-wq management, e.g. thread count */ + u32 iowq_limits[2]; + bool iowq_limits_set; + + struct list_head defer_list; + unsigned sq_thread_idle; + /* protected by ->completion_lock */ + unsigned evfd_last_cq_tail; +}; + +enum { + REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, + REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, + REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, + REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, + REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, + REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, + REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, + + /* first byte is taken by user flags, shift it to not overlap */ + REQ_F_FAIL_BIT = 8, + REQ_F_INFLIGHT_BIT, + REQ_F_CUR_POS_BIT, + REQ_F_NOWAIT_BIT, + REQ_F_LINK_TIMEOUT_BIT, + REQ_F_NEED_CLEANUP_BIT, + REQ_F_POLLED_BIT, + REQ_F_BUFFER_SELECTED_BIT, + REQ_F_BUFFER_RING_BIT, + REQ_F_REISSUE_BIT, + REQ_F_CREDS_BIT, + REQ_F_REFCOUNT_BIT, + REQ_F_ARM_LTIMEOUT_BIT, + REQ_F_ASYNC_DATA_BIT, + REQ_F_SKIP_LINK_CQES_BIT, + REQ_F_SINGLE_POLL_BIT, + REQ_F_DOUBLE_POLL_BIT, + REQ_F_PARTIAL_IO_BIT, + REQ_F_CQE32_INIT_BIT, + REQ_F_APOLL_MULTISHOT_BIT, + REQ_F_CLEAR_POLLIN_BIT, + REQ_F_HASH_LOCKED_BIT, + /* keep async read/write and isreg together and in order */ + REQ_F_SUPPORT_NOWAIT_BIT, + REQ_F_ISREG_BIT, + + /* not a real bit, just to check we're not overflowing the space */ + __REQ_F_LAST_BIT, +}; + +enum { + /* ctx owns file */ + REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), + /* drain existing IO first */ + REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), + /* linked sqes */ + REQ_F_LINK = BIT(REQ_F_LINK_BIT), + /* doesn't sever on completion < 0 */ + REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), + /* IOSQE_ASYNC */ + REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), + /* IOSQE_BUFFER_SELECT */ + REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + /* IOSQE_CQE_SKIP_SUCCESS */ + REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), + + /* fail rest of links */ + REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), + /* on inflight list, should be cancelled and waited on exit reliably */ + REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), + /* read/write uses file position */ + REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), + /* must not punt to workers */ + REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), + /* has or had linked timeout */ + REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), + /* needs cleanup */ + REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), + /* already went through poll handler */ + REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), + /* buffer already selected */ + REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + /* buffer selected from ring, needs commit */ + REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT), + /* caller should reissue async */ + REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), + /* supports async reads/writes */ + REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT), + /* regular file */ + REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), + /* has creds assigned */ + REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), + /* skip refcounting if not set */ + REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + /* there is a linked timeout that has to be armed */ + REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), + /* ->async_data allocated */ + REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), + /* don't post CQEs while failing linked requests */ + REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), + /* single poll may be active */ + REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT), + /* double poll may active */ + REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT), + /* request has already done partial IO */ + REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), + /* fast poll multishot mode */ + REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), + /* ->extra1 and ->extra2 are initialised */ + REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT), + /* recvmsg special flag, clear EPOLLIN */ + REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), + /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ + REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), +}; + +typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); + +struct io_task_work { + struct llist_node node; + io_req_tw_func_t func; +}; + +struct io_cqe { + __u64 user_data; + __s32 res; + /* fd initially, then cflags for completion */ + union { + __u32 flags; + int fd; + }; +}; + +/* + * Each request type overlays its private data structure on top of this one. + * They must not exceed this one in size. + */ +struct io_cmd_data { + struct file *file; + /* each command gets 56 bytes of data */ + __u8 data[56]; +}; + +#define io_kiocb_to_cmd(req) ((void *) &(req)->cmd) +#define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr) + +struct io_kiocb { + union { + /* + * NOTE! Each of the io_kiocb union members has the file pointer + * as the first entry in their struct definition. So you can + * access the file pointer through any of the sub-structs, + * or directly as just 'file' in this struct. + */ + struct file *file; + struct io_cmd_data cmd; + }; + + u8 opcode; + /* polled IO has completed */ + u8 iopoll_completed; + /* + * Can be either a fixed buffer index, or used with provided buffers. + * For the latter, before issue it points to the buffer group ID, + * and after selection it points to the buffer ID itself. + */ + u16 buf_index; + unsigned int flags; + + struct io_cqe cqe; + + struct io_ring_ctx *ctx; + struct task_struct *task; + + struct io_rsrc_node *rsrc_node; + + union { + /* store used ubuf, so we can prevent reloading */ + struct io_mapped_ubuf *imu; + + /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ + struct io_buffer *kbuf; + + /* + * stores buffer ID for ring provided buffers, valid IFF + * REQ_F_BUFFER_RING is set. + */ + struct io_buffer_list *buf_list; + }; + + union { + /* used by request caches, completion batching and iopoll */ + struct io_wq_work_node comp_list; + /* cache ->apoll->events */ + __poll_t apoll_events; + }; + atomic_t refs; + atomic_t poll_refs; + struct io_task_work io_task_work; + /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ + union { + struct hlist_node hash_node; + struct { + u64 extra1; + u64 extra2; + }; + }; + /* internal polling, see IORING_FEAT_FAST_POLL */ + struct async_poll *apoll; + /* opcode allocated if it needs to store data for async defer */ + void *async_data; + /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */ + struct io_kiocb *link; + /* custom credentials, valid IFF REQ_F_CREDS is set */ + const struct cred *creds; + struct io_wq_work work; +}; + +struct io_overflow_cqe { + struct list_head list; + struct io_uring_cqe cqe; +}; + +#endif diff --git a/include/linux/kexec.h b/include/linux/kexec.h index ce6536f1d269..475683cd67f1 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -452,6 +452,12 @@ static inline int kexec_crash_loaded(void) { return 0; } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_KEXEC_SIG +void set_kexec_sig_enforced(void); +#else +static inline void set_kexec_sig_enforced(void) {} +#endif + #endif /* !defined(__ASSEBMLY__) */ #endif /* LINUX_KEXEC_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c20f2d55840c..90a45ef7203b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1513,7 +1513,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) { } -static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) { return false; } @@ -1822,6 +1822,15 @@ struct _kvm_stats_desc { STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \ KVM_STATS_BASE_POW10, 0) +/* Instantaneous boolean value, read only */ +#define STATS_DESC_IBOOLEAN(SCOPE, name) \ + STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ + KVM_STATS_BASE_POW10, 0) +/* Peak (sticky) boolean value, read/write */ +#define STATS_DESC_PBOOLEAN(SCOPE, name) \ + STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ + KVM_STATS_BASE_POW10, 0) + /* Cumulative time in nanosecond */ #define STATS_DESC_TIME_NSEC(SCOPE, name) \ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ @@ -1853,7 +1862,7 @@ struct _kvm_stats_desc { HALT_POLL_HIST_COUNT), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \ HALT_POLL_HIST_COUNT), \ - STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking) + STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) extern struct dentry *kvm_debugfs_dir; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 1773e5df8e65..1b18dfa52e48 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -214,7 +214,7 @@ struct netfs_request_ops { void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, - struct folio *folio, void **_fsdata); + struct folio **foliop, void **_fsdata); void (*done)(struct netfs_io_request *rreq); }; diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 15b940ec1eac..10bc88cc3bf6 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -32,11 +32,16 @@ struct unwind_hint { * * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. * Useful for code which doesn't have an ELF function annotation. + * + * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 #define UNWIND_HINT_TYPE_FUNC 3 +#define UNWIND_HINT_TYPE_ENTRY 4 +#define UNWIND_HINT_TYPE_SAVE 5 +#define UNWIND_HINT_TYPE_RESTORE 6 #ifdef CONFIG_OBJTOOL @@ -124,7 +129,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -177,7 +182,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm diff --git a/include/linux/reset.h b/include/linux/reset.h index 8a21b5756c3e..514ddf003efc 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -731,7 +731,7 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); } /** diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 505aaf9fe477..81cab4b01edc 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -85,7 +85,7 @@ static inline void exit_thread(struct task_struct *tsk) extern __noreturn void do_group_exit(int); extern void exit_files(struct task_struct *); -extern void exit_itimers(struct signal_struct *); +extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 657a0fc68a3f..fde258b3decd 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -390,6 +390,11 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif +static inline bool uart_console_enabled(struct uart_port *port) +{ + return uart_console(port) && (port->cons->flags & CON_ENABLED); +} + struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, diff --git a/include/linux/socket.h b/include/linux/socket.h index 3c11ef18a9cf..d4523974efbd 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -421,10 +421,9 @@ extern int recvmsg_copy_msghdr(struct msghdr *msg, struct user_msghdr __user *umsg, unsigned flags, struct sockaddr __user **uaddr, struct iovec **iov); -extern int __copy_msghdr_from_user(struct msghdr *kmsg, - struct user_msghdr __user *umsg, - struct sockaddr __user **save_addr, - struct iovec __user **uiov, size_t *nsegs); +extern int __copy_msghdr(struct msghdr *kmsg, + struct user_msghdr *umsg, + struct sockaddr __user **save_addr); /* helpers which do the actual work for syscalls */ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 29917850f079..8df475db88c0 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -260,6 +260,7 @@ struct plat_stmmacenet_data { bool has_crossts; int int_snapshot_num; int ext_snapshot_num; + bool int_snapshot_en; bool ext_snapshot_en; bool multi_msi_en; int msi_mac_vec; diff --git a/include/net/amt.h b/include/net/amt.h index 0e40c3d64fcf..08fc30cf2f34 100644 --- a/include/net/amt.h +++ b/include/net/amt.h @@ -78,6 +78,15 @@ enum amt_status { #define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1) +/* Gateway events only */ +enum amt_event { + AMT_EVENT_NONE, + AMT_EVENT_RECEIVE, + AMT_EVENT_SEND_DISCOVERY, + AMT_EVENT_SEND_REQUEST, + __AMT_EVENT_MAX, +}; + struct amt_header { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 type:4, @@ -292,6 +301,12 @@ struct amt_group_node { struct hlist_head sources[]; }; +#define AMT_MAX_EVENTS 16 +struct amt_events { + enum amt_event event; + struct sk_buff *skb; +}; + struct amt_dev { struct net_device *dev; struct net_device *stream_dev; @@ -308,6 +323,7 @@ struct amt_dev { struct delayed_work req_wq; /* Protected by RTNL */ struct delayed_work secret_wq; + struct work_struct event_wq; /* AMT status */ enum amt_status status; /* Generated key */ @@ -345,6 +361,10 @@ struct amt_dev { /* Used only in gateway mode */ u64 mac:48, reserved:16; + /* AMT gateway side message handler queue */ + struct amt_events events[AMT_MAX_EVENTS]; + u8 event_idx; + u8 nr_events; }; #define AMT_TOS 0xc0 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6d02e12e4702..80f41446b1f0 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8462,11 +8462,12 @@ int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp, * cfg80211_obss_color_collision_notify - notify about bss color collision * @dev: network device * @color_bitmap: representations of the colors that the local BSS is aware of + * @gfp: allocation flags */ static inline int cfg80211_obss_color_collision_notify(struct net_device *dev, - u64 color_bitmap) + u64 color_bitmap, gfp_t gfp) { - return cfg80211_bss_color_notify(dev, GFP_KERNEL, + return cfg80211_bss_color_notify(dev, gfp, NL80211_CMD_OBSS_COLOR_COLLISION, 0, color_bitmap); } diff --git a/include/net/compat.h b/include/net/compat.h index 595fee069b82..84c163f40f38 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -46,9 +46,8 @@ struct compat_rtentry { unsigned short rt_irtt; /* Initial RTT */ }; -int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg, - struct sockaddr __user **save_addr, compat_uptr_t *ptr, - compat_size_t *len); +int __get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr *msg, + struct sockaddr __user **save_addr); int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *, struct sockaddr __user **, struct iovec **); int put_cmsg_compat(struct msghdr*, int, int, int, void *); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ebfa3df6f8dc..fd6b510d114b 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -179,7 +179,7 @@ static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index daead5fb389a..6395f6b9a5d2 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -107,7 +107,8 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) { - if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) + if (!sk->sk_mark && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)) return skb->mark; return sk->sk_mark; @@ -120,7 +121,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, skb->skb_iif); #endif @@ -132,7 +133,7 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk) #ifdef CONFIG_NET_L3_MASTER_DEV struct net *net = sock_net(sk); - if (!net->ipv4.sysctl_tcp_l3mdev_accept) + if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept)) return l3mdev_master_ifindex_by_index(net, sk->sk_bound_dev_if); #endif @@ -374,7 +375,7 @@ static inline bool inet_get_convert_csum(struct sock *sk) static inline bool inet_can_nonlocal_bind(struct net *net, struct inet_sock *inet) { - return net->ipv4.sysctl_ip_nonlocal_bind || + return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) || inet->freebind || inet->transparent; } diff --git a/include/net/ip.h b/include/net/ip.h index 26fffda78cca..1c979fd1904c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -357,7 +357,7 @@ static inline bool sysctl_dev_name_is_allowed(const char *name) static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port) { - return port < net->ipv4.sysctl_ip_prot_sock; + return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock); } #else @@ -384,7 +384,7 @@ void ipfrag_init(void); void ip_static_sysctl_init(void); #define IP4_REPLY_MARK(net, mark) \ - ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0) static inline bool ip_is_fragment(const struct iphdr *iph) { @@ -446,7 +446,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, struct net *net = dev_net(dst->dev); unsigned int mtu; - if (net->ipv4.sysctl_ip_fwd_use_pmtu || + if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { mtu = rt->rt_pmtu; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ebadb2103968..47642b020706 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -6960,10 +6960,11 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is * aware of. + * @gfp: allocation flags */ void ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap); + u64 color_bitmap, gfp_t gfp); /** * ieee80211_is_tx_data - check if frame is a data frame diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5c4e5a96a984..64cf655c818c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -657,18 +657,22 @@ static inline void nft_set_ext_prepare(struct nft_set_ext_tmpl *tmpl) tmpl->len = sizeof(struct nft_set_ext); } -static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, - unsigned int len) +static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id, + unsigned int len) { tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align); - BUG_ON(tmpl->len > U8_MAX); + if (tmpl->len > U8_MAX) + return -EINVAL; + tmpl->offset[id] = tmpl->len; tmpl->len += nft_set_ext_types[id].len + len; + + return 0; } -static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) +static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id) { - nft_set_ext_add_length(tmpl, id, 0); + return nft_set_ext_add_length(tmpl, id, 0); } static inline void nft_set_ext_init(struct nft_set_ext *ext, diff --git a/include/net/protocol.h b/include/net/protocol.h index f51c06ae365f..6aef8cb11cc8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -35,8 +35,6 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); - int (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ @@ -52,8 +50,6 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { - void (*early_demux)(struct sk_buff *skb); - void (*early_demux_handler)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); /* This returns an error if we weren't able to handle the error. */ diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df594853..c51a635671a7 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, + return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/net/route.h b/include/net/route.h index 991a3985712d..bbcf2aba149f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -373,7 +373,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) struct net *net = dev_net(dst->dev); if (hoplimit == 0) - hoplimit = net->ipv4.sysctl_ip_default_ttl; + hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); return hoplimit; } diff --git a/include/net/sock.h b/include/net/sock.h index 72ca97ccb460..9fa54762e077 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1529,7 +1529,7 @@ void __sk_mem_reclaim(struct sock *sk, int amount); /* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */ static inline long sk_prot_mem_limits(const struct sock *sk, int index) { - long val = sk->sk_prot->sysctl_mem[index]; + long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]); #if PAGE_SIZE > SK_MEM_QUANTUM val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT; diff --git a/include/net/tcp.h b/include/net/tcp.h index 1e99f5c61f84..071735e10872 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -932,7 +932,7 @@ extern const struct inet_connection_sock_af_ops ipv6_specific; INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb)); -INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb)); +void tcp_v6_early_demux(struct sk_buff *skb); #endif @@ -1403,8 +1403,8 @@ static inline void tcp_slow_start_after_idle_check(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); s32 delta; - if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || - ca_ops->cong_control) + if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) || + tp->packets_out || ca_ops->cong_control) return; delta = tcp_jiffies32 - tp->lsndtime; if (delta > inet_csk(sk)->icsk_rto) @@ -1493,21 +1493,24 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; + return tp->keepalive_intvl ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; + return tp->keepalive_time ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; + return tp->keepalive_probes ? : + READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) @@ -1520,7 +1523,8 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout); const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) @@ -2023,7 +2027,7 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; + return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } bool tcp_stream_memory_free(const struct sock *sk, int wake); diff --git a/include/net/tls.h b/include/net/tls.h index 8017f1703447..8bd938f98bdd 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -704,7 +704,7 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_crypto_info *crypto_info); #ifdef CONFIG_TLS_DEVICE -void tls_device_init(void); +int tls_device_init(void); void tls_device_cleanup(void); void tls_device_sk_destruct(struct sock *sk); int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); @@ -724,7 +724,7 @@ static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk) return tls_get_ctx(sk)->rx_conf == TLS_HW; } #else -static inline void tls_device_init(void) {} +static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int diff --git a/include/net/udp.h b/include/net/udp.h index b83a00330566..8dd4aa1485a6 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -167,7 +167,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb) typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport, __be16 dport); -INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +void udp_v6_early_demux(struct sk_buff *skb); INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, @@ -238,7 +238,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept, + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index aa2f951b07cd..95a8cfaad15a 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -7,6 +7,7 @@ #include <linux/tracepoint.h> #include <uapi/linux/io_uring.h> +#include <linux/io_uring_types.h> #include <linux/io_uring.h> struct io_wq_work; @@ -97,9 +98,7 @@ TRACE_EVENT(io_uring_register, /** * io_uring_file_get - called before getting references to an SQE file * - * @ctx: pointer to a ring context structure * @req: pointer to a submitted request - * @user_data: user data associated with the request * @fd: SQE file descriptor * * Allows to trace out how often an SQE file reference is obtained, which can @@ -108,9 +107,9 @@ TRACE_EVENT(io_uring_register, */ TRACE_EVENT(io_uring_file_get, - TP_PROTO(void *ctx, void *req, unsigned long long user_data, int fd), + TP_PROTO(struct io_kiocb *req, int fd), - TP_ARGS(ctx, req, user_data, fd), + TP_ARGS(req, fd), TP_STRUCT__entry ( __field( void *, ctx ) @@ -120,9 +119,9 @@ TRACE_EVENT(io_uring_file_get, ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = req->ctx; __entry->req = req; - __entry->user_data = user_data; + __entry->user_data = req->cqe.user_data; __entry->fd = fd; ), @@ -133,22 +132,16 @@ TRACE_EVENT(io_uring_file_get, /** * io_uring_queue_async_work - called before submitting a new async work * - * @ctx: pointer to a ring context structure * @req: pointer to a submitted request - * @user_data: user data associated with the request - * @opcode: opcode of request - * @flags request flags - * @work: pointer to a submitted io_wq_work * @rw: type of workqueue, hashed or normal * * Allows to trace asynchronous work submission. */ TRACE_EVENT(io_uring_queue_async_work, - TP_PROTO(void *ctx, void * req, unsigned long long user_data, u8 opcode, - unsigned int flags, struct io_wq_work *work, int rw), + TP_PROTO(struct io_kiocb *req, int rw), - TP_ARGS(ctx, req, user_data, opcode, flags, work, rw), + TP_ARGS(req, rw), TP_STRUCT__entry ( __field( void *, ctx ) @@ -159,19 +152,19 @@ TRACE_EVENT(io_uring_queue_async_work, __field( struct io_wq_work *, work ) __field( int, rw ) - __string( op_str, io_uring_get_opcode(opcode) ) + __string( op_str, io_uring_get_opcode(req->opcode) ) ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = req->ctx; __entry->req = req; - __entry->user_data = user_data; - __entry->flags = flags; - __entry->opcode = opcode; - __entry->work = work; + __entry->user_data = req->cqe.user_data; + __entry->flags = req->flags; + __entry->opcode = req->opcode; + __entry->work = &req->work; __entry->rw = rw; - __assign_str(op_str, io_uring_get_opcode(opcode)); + __assign_str(op_str, io_uring_get_opcode(req->opcode)); ), TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p", @@ -183,19 +176,16 @@ TRACE_EVENT(io_uring_queue_async_work, /** * io_uring_defer - called when an io_uring request is deferred * - * @ctx: pointer to a ring context structure * @req: pointer to a deferred request - * @user_data: user data associated with the request - * @opcode: opcode of request * * Allows to track deferred requests, to get an insight about what requests are * not started immediately. */ TRACE_EVENT(io_uring_defer, - TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode), + TP_PROTO(struct io_kiocb *req), - TP_ARGS(ctx, req, user_data, opcode), + TP_ARGS(req), TP_STRUCT__entry ( __field( void *, ctx ) @@ -203,16 +193,16 @@ TRACE_EVENT(io_uring_defer, __field( unsigned long long, data ) __field( u8, opcode ) - __string( op_str, io_uring_get_opcode(opcode) ) + __string( op_str, io_uring_get_opcode(req->opcode) ) ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = req->ctx; __entry->req = req; - __entry->data = user_data; - __entry->opcode = opcode; + __entry->data = req->cqe.user_data; + __entry->opcode = req->opcode; - __assign_str(op_str, io_uring_get_opcode(opcode)); + __assign_str(op_str, io_uring_get_opcode(req->opcode)); ), TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s", @@ -224,7 +214,6 @@ TRACE_EVENT(io_uring_defer, * io_uring_link - called before the io_uring request added into link_list of * another request * - * @ctx: pointer to a ring context structure * @req: pointer to a linked request * @target_req: pointer to a previous request, that would contain @req * @@ -233,9 +222,9 @@ TRACE_EVENT(io_uring_defer, */ TRACE_EVENT(io_uring_link, - TP_PROTO(void *ctx, void *req, void *target_req), + TP_PROTO(struct io_kiocb *req, struct io_kiocb *target_req), - TP_ARGS(ctx, req, target_req), + TP_ARGS(req, target_req), TP_STRUCT__entry ( __field( void *, ctx ) @@ -244,7 +233,7 @@ TRACE_EVENT(io_uring_link, ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = req->ctx; __entry->req = req; __entry->target_req = target_req; ), @@ -285,10 +274,7 @@ TRACE_EVENT(io_uring_cqring_wait, /** * io_uring_fail_link - called before failing a linked request * - * @ctx: pointer to a ring context structure * @req: request, which links were cancelled - * @user_data: user data associated with the request - * @opcode: opcode of request * @link: cancelled link * * Allows to track linked requests cancellation, to see not only that some work @@ -296,9 +282,9 @@ TRACE_EVENT(io_uring_cqring_wait, */ TRACE_EVENT(io_uring_fail_link, - TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, void *link), + TP_PROTO(struct io_kiocb *req, struct io_kiocb *link), - TP_ARGS(ctx, req, user_data, opcode, link), + TP_ARGS(req, link), TP_STRUCT__entry ( __field( void *, ctx ) @@ -307,17 +293,17 @@ TRACE_EVENT(io_uring_fail_link, __field( u8, opcode ) __field( void *, link ) - __string( op_str, io_uring_get_opcode(opcode) ) + __string( op_str, io_uring_get_opcode(req->opcode) ) ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = req->ctx; __entry->req = req; - __entry->user_data = user_data; - __entry->opcode = opcode; + __entry->user_data = req->cqe.user_data; + __entry->opcode = req->opcode; __entry->link = link; - __assign_str(op_str, io_uring_get_opcode(opcode)); + __assign_str(op_str, io_uring_get_opcode(req->opcode)); ), TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p", @@ -376,23 +362,17 @@ TRACE_EVENT(io_uring_complete, /** * io_uring_submit_sqe - called before submitting one SQE * - * @ctx: pointer to a ring context structure * @req: pointer to a submitted request - * @user_data: user data associated with the request - * @opcode: opcode of request - * @flags request flags * @force_nonblock: whether a context blocking or not - * @sq_thread: true if sq_thread has submitted this SQE * * Allows to track SQE submitting, to understand what was the source of it, SQ * thread or io_uring_enter call. */ TRACE_EVENT(io_uring_submit_sqe, - TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, u32 flags, - bool force_nonblock, bool sq_thread), + TP_PROTO(struct io_kiocb *req, bool force_nonblock), - TP_ARGS(ctx, req, user_data, opcode, flags, force_nonblock, sq_thread), + TP_ARGS(req, force_nonblock), TP_STRUCT__entry ( __field( void *, ctx ) @@ -403,19 +383,19 @@ TRACE_EVENT(io_uring_submit_sqe, __field( bool, force_nonblock ) __field( bool, sq_thread ) - __string( op_str, io_uring_get_opcode(opcode) ) + __string( op_str, io_uring_get_opcode(req->opcode) ) ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = req->ctx; __entry->req = req; - __entry->user_data = user_data; - __entry->opcode = opcode; - __entry->flags = flags; + __entry->user_data = req->cqe.user_data; + __entry->opcode = req->opcode; + __entry->flags = req->flags; __entry->force_nonblock = force_nonblock; - __entry->sq_thread = sq_thread; + __entry->sq_thread = req->ctx->flags & IORING_SETUP_SQPOLL; - __assign_str(op_str, io_uring_get_opcode(opcode)); + __assign_str(op_str, io_uring_get_opcode(req->opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, " @@ -427,10 +407,7 @@ TRACE_EVENT(io_uring_submit_sqe, /* * io_uring_poll_arm - called after arming a poll wait if successful * - * @ctx: pointer to a ring context structure * @req: pointer to the armed request - * @user_data: user data associated with the request - * @opcode: opcode of request * @mask: request poll events mask * @events: registered events of interest * @@ -439,10 +416,9 @@ TRACE_EVENT(io_uring_submit_sqe, */ TRACE_EVENT(io_uring_poll_arm, - TP_PROTO(void *ctx, void *req, u64 user_data, u8 opcode, - int mask, int events), + TP_PROTO(struct io_kiocb *req, int mask, int events), - TP_ARGS(ctx, req, user_data, opcode, mask, events), + TP_ARGS(req, mask, events), TP_STRUCT__entry ( __field( void *, ctx ) @@ -452,18 +428,18 @@ TRACE_EVENT(io_uring_poll_arm, __field( int, mask ) __field( int, events ) - __string( op_str, io_uring_get_opcode(opcode) ) + __string( op_str, io_uring_get_opcode(req->opcode) ) ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = req->ctx; __entry->req = req; - __entry->user_data = user_data; - __entry->opcode = opcode; + __entry->user_data = req->cqe.user_data; + __entry->opcode = req->opcode; __entry->mask = mask; __entry->events = events; - __assign_str(op_str, io_uring_get_opcode(opcode)); + __assign_str(op_str, io_uring_get_opcode(req->opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x", @@ -475,18 +451,15 @@ TRACE_EVENT(io_uring_poll_arm, /* * io_uring_task_add - called after adding a task * - * @ctx: pointer to a ring context structure * @req: pointer to request - * @user_data: user data associated with the request - * @opcode: opcode of request * @mask: request poll events mask * */ TRACE_EVENT(io_uring_task_add, - TP_PROTO(void *ctx, void *req, unsigned long long user_data, u8 opcode, int mask), + TP_PROTO(struct io_kiocb *req, int mask), - TP_ARGS(ctx, req, user_data, opcode, mask), + TP_ARGS(req, mask), TP_STRUCT__entry ( __field( void *, ctx ) @@ -495,17 +468,17 @@ TRACE_EVENT(io_uring_task_add, __field( u8, opcode ) __field( int, mask ) - __string( op_str, io_uring_get_opcode(opcode) ) + __string( op_str, io_uring_get_opcode(req->opcode) ) ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = req->ctx; __entry->req = req; - __entry->user_data = user_data; - __entry->opcode = opcode; + __entry->user_data = req->cqe.user_data; + __entry->opcode = req->opcode; __entry->mask = mask; - __assign_str(op_str, io_uring_get_opcode(opcode)); + __assign_str(op_str, io_uring_get_opcode(req->opcode)); ), TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x", @@ -518,7 +491,6 @@ TRACE_EVENT(io_uring_task_add, * io_uring_req_failed - called when an sqe is errored dring submission * * @sqe: pointer to the io_uring_sqe that failed - * @ctx: pointer to a ring context structure * @req: pointer to request * @error: error it failed with * @@ -526,9 +498,9 @@ TRACE_EVENT(io_uring_task_add, */ TRACE_EVENT(io_uring_req_failed, - TP_PROTO(const struct io_uring_sqe *sqe, void *ctx, void *req, int error), + TP_PROTO(const struct io_uring_sqe *sqe, struct io_kiocb *req, int error), - TP_ARGS(sqe, ctx, req, error), + TP_ARGS(sqe, req, error), TP_STRUCT__entry ( __field( void *, ctx ) @@ -552,7 +524,7 @@ TRACE_EVENT(io_uring_req_failed, ), TP_fast_assign( - __entry->ctx = ctx; + __entry->ctx = req->ctx; __entry->req = req; __entry->user_data = sqe->user_data; __entry->opcode = sqe->opcode; @@ -622,12 +594,42 @@ TRACE_EVENT(io_uring_cqe_overflow, __entry->ocqe = ocqe; ), - TP_printk("ring %p, user_data 0x%llx, res %d, flags %x, " + TP_printk("ring %p, user_data 0x%llx, res %d, cflags 0x%x, " "overflow_cqe %p", __entry->ctx, __entry->user_data, __entry->res, __entry->cflags, __entry->ocqe) ); +/* + * io_uring_task_work_run - ran task work + * + * @tctx: pointer to a io_uring_task + * @count: how many functions it ran + * @loops: how many loops it ran + * + */ +TRACE_EVENT(io_uring_task_work_run, + + TP_PROTO(void *tctx, unsigned int count, unsigned int loops), + + TP_ARGS(tctx, count, loops), + + TP_STRUCT__entry ( + __field( void *, tctx ) + __field( unsigned int, count ) + __field( unsigned int, loops ) + ), + + TP_fast_assign( + __entry->tctx = tctx; + __entry->count = count; + __entry->loops = loops; + ), + + TP_printk("tctx %p, count %u, loops %u", + __entry->tctx, __entry->count, __entry->loops) +); + #endif /* _TRACE_IO_URING_H */ /* This part must be outside protection */ diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h index e282ce02fa2d..6d1626e7a4ce 100644 --- a/include/trace/events/iocost.h +++ b/include/trace/events/iocost.h @@ -160,7 +160,7 @@ TRACE_EVENT(iocost_ioc_vrate_adj, TP_fast_assign( __assign_str(devname, ioc_name(ioc)); - __entry->old_vrate = atomic64_read(&ioc->vtime_rate);; + __entry->old_vrate = atomic64_read(&ioc->vtime_rate); __entry->new_vrate = new_vrate; __entry->busy_level = ioc->busy_level; __entry->read_missed_ppm = missed_ppm[READ]; diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index 12c315782766..777ee6cbe933 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -98,7 +98,7 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_STRUCT__entry( __array(char, name, 32) - __field(long *, sysctl_mem) + __array(long, sysctl_mem, 3) __field(long, allocated) __field(int, sysctl_rmem) __field(int, rmem_alloc) @@ -110,7 +110,9 @@ TRACE_EVENT(sock_exceed_buf_limit, TP_fast_assign( strncpy(__entry->name, prot->name, 32); - __entry->sysctl_mem = prot->sysctl_mem; + __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]); + __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]); + __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]); __entry->allocated = allocated; __entry->sysctl_rmem = sk_get_rmem0(sk, prot); __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f4009dbdf62d..ef78e0e1a754 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5222,22 +5222,25 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length - * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if + * *flags* is not 0. * - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. + * *flags* is currently unused. * Return * 0 on success, -E2BIG if *offset* + *len* exceeds the length * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* - * is a read-only dynptr. + * is a read-only dynptr or if *flags* is not 0. * * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) * Description diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index ef4257ab3026..2557eb7b0561 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -78,10 +78,13 @@ struct input_id { * Note that input core does not clamp reported values to the * [minimum, maximum] limits, such task is left to userspace. * - * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z) - * is reported in units per millimeter (units/mm), resolution - * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported - * in units per radian. + * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z, + * ABS_MT_POSITION_X, ABS_MT_POSITION_Y) is reported in units + * per millimeter (units/mm), resolution for rotational axes + * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian. + * The resolution for the size axes (ABS_MT_TOUCH_MAJOR, + * ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MAJOR, ABS_MT_WIDTH_MINOR) + * is reported in units per millimeter (units/mm). * When INPUT_PROP_ACCELEROMETER is set the resolution changes. * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in * units per g (units/g) and in units per degree per second diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0ad3da28d2fc..4c9b11e2e991 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -10,6 +10,7 @@ #include <linux/fs.h> #include <linux/types.h> +#include <linux/time_types.h> /* * IO submission data structure (Submission Queue Entry) @@ -50,6 +51,7 @@ struct io_uring_sqe { __u32 unlink_flags; __u32 hardlink_flags; __u32 xattr_flags; + __u32 msg_ring_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -140,9 +142,12 @@ enum { * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. */ #define IORING_SETUP_TASKRUN_FLAG (1U << 9) - #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ #define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */ +/* + * Only one task is allowed to submit requests + */ +#define IORING_SETUP_SINGLE_ISSUER (1U << 12) enum io_uring_op { IORING_OP_NOP, @@ -229,10 +234,13 @@ enum io_uring_op { * * IORING_POLL_UPDATE Update existing poll request, matching * sqe->addr as the old user_data field. + * + * IORING_POLL_LEVEL Level triggered poll. */ #define IORING_POLL_ADD_MULTI (1U << 0) #define IORING_POLL_UPDATE_EVENTS (1U << 1) #define IORING_POLL_UPDATE_USER_DATA (1U << 2) +#define IORING_POLL_ADD_LEVEL (1U << 3) /* * ASYNC_CANCEL flags. @@ -241,10 +249,12 @@ enum io_uring_op { * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the * request 'user_data' * IORING_ASYNC_CANCEL_ANY Match any request + * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor */ #define IORING_ASYNC_CANCEL_ALL (1U << 0) #define IORING_ASYNC_CANCEL_FD (1U << 1) #define IORING_ASYNC_CANCEL_ANY (1U << 2) +#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3) /* * send/sendmsg and recv/recvmsg flags (sqe->ioprio) @@ -253,8 +263,13 @@ enum io_uring_op { * or receive and arm poll if that yields an * -EAGAIN result, arm poll upfront and skip * the initial transfer attempt. + * + * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if + * the handler will continue to report + * CQEs on behalf of the same SQE. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) +#define IORING_RECV_MULTISHOT (1U << 1) /* * accept flags stored in sqe->ioprio @@ -262,6 +277,22 @@ enum io_uring_op { #define IORING_ACCEPT_MULTISHOT (1U << 0) /* + * IORING_OP_MSG_RING command types, stored in sqe->addr + */ +enum { + IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */ + IORING_MSG_SEND_FD, /* send a registered fd to another ring */ +}; + +/* + * IORING_OP_MSG_RING flags (sqe->msg_ring_flags) + * + * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not + * applicable for IORING_MSG_DATA, obviously. + */ +#define IORING_MSG_RING_CQE_SKIP (1U << 0) + +/* * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { @@ -420,6 +451,12 @@ enum { IORING_REGISTER_PBUF_RING = 22, IORING_UNREGISTER_PBUF_RING = 23, + /* sync cancelation API */ + IORING_REGISTER_SYNC_CANCEL = 24, + + /* register a range of fixed file slots for automatic slot allocation */ + IORING_REGISTER_FILE_ALLOC_RANGE = 25, + /* this goes last */ IORING_REGISTER_LAST }; @@ -483,7 +520,7 @@ struct io_uring_probe { __u8 ops_len; /* length of ops[] array below */ __u16 resv; __u32 resv2[3]; - struct io_uring_probe_op ops[0]; + struct io_uring_probe_op ops[]; }; struct io_uring_restriction { @@ -555,4 +592,32 @@ struct io_uring_getevents_arg { __u64 ts; }; +/* + * Argument for IORING_REGISTER_SYNC_CANCEL + */ +struct io_uring_sync_cancel_reg { + __u64 addr; + __s32 fd; + __u32 flags; + struct __kernel_timespec timeout; + __u64 pad[4]; +}; + +/* + * Argument for IORING_REGISTER_FILE_ALLOC_RANGE + * The range is specified as [off, off + len) + */ +struct io_uring_file_index_range { + __u32 off; + __u32 len; + __u64 resv; +}; + +struct io_uring_recvmsg_out { + __u32 namelen; + __u32 controllen; + __u32 payloadlen; + __u32 flags; +}; + #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5088bd9f1922..860f867c50c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -2083,7 +2083,8 @@ struct kvm_stats_header { #define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) #define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) -#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES +#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN #define KVM_STATS_BASE_SHIFT 8 #define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h index 9d0f06bfbac3..68aeae2addec 100644 --- a/include/uapi/linux/tty.h +++ b/include/uapi/linux/tty.h @@ -38,8 +38,9 @@ #define N_NULL 27 /* Null ldisc used for error handling */ #define N_MCTP 28 /* MCTP-over-serial */ #define N_DEVELOPMENT 29 /* Manual out-of-tree testing */ +#define N_CAN327 30 /* ELM327 based OBD-II interfaces */ /* Always the newest line discipline + 1 */ -#define NR_LDISCS 30 +#define NR_LDISCS 31 #endif /* _UAPI_LINUX_TTY_H */ |